From 8111c67266e82bac1edffbaee7ea5a8b1d9d0cf7 Mon Sep 17 00:00:00 2001 From: Luke Shingles Date: Wed, 19 Feb 2025 11:57:38 +0000 Subject: [PATCH] Pass LevelTransition to [col/rad]_excitation_ratecoeff (#168) ~2-3% speedup of CI jobs. No change for normal fast-math mode on Apple M1. --- kpkt.cc | 4 ++-- macroatom.cc | 26 ++++++++++++-------------- macroatom.h | 8 ++++---- nltepop.cc | 13 +++++++------ nonthermal.cc | 20 +++++++++++--------- 5 files changed, 36 insertions(+), 35 deletions(-) diff --git a/kpkt.cc b/kpkt.cc index d8b195cc7..9f0bf7dd9 100644 --- a/kpkt.cc +++ b/kpkt.cc @@ -95,7 +95,7 @@ auto calculate_cooling_rates_ion(const int nonemptymgi, const int element, const const int upper = uptranslist[ii].targetlevelindex; const double epsilon_trans = epsilon(element, ion, upper) - epsilon_current; const double C = nnlevel * - col_excitation_ratecoeff(T_e, nne, element, ion, level, ii, epsilon_trans, statweight) * + col_excitation_ratecoeff(T_e, nne, element, ion, uptranslist[ii], epsilon_trans, statweight) * epsilon_trans; C_ion += C; if constexpr (!update_cooling_contrib_list) { @@ -599,7 +599,7 @@ __host__ __device__ void do_kpkt(Packet &pkt, const double t2, const int nts) { // printout(" excitation to level %d possible\n",upper); const double epsilon_trans = epsilon(element, ion, tmpupper) - epsilon_current; const double C = nnlevel * - col_excitation_ratecoeff(T_e, nne, element, ion, level, ii, epsilon_trans, statweight) * + col_excitation_ratecoeff(T_e, nne, element, ion, uptranslist[ii], epsilon_trans, statweight) * epsilon_trans; contrib += C; if (contrib > rndcool_ion_process) { diff --git a/macroatom.cc b/macroatom.cc index c30766e5c..a11b0b5f6 100644 --- a/macroatom.cc +++ b/macroatom.cc @@ -87,9 +87,9 @@ auto calculate_macroatom_transitionrates(const int nonemptymgi, const int elemen const auto &uptrans = uptranslist[i]; const double epsilon_trans = epsilon(element, ion, uptrans.targetlevelindex) - epsilon_current; - const double R = - rad_excitation_ratecoeff(nonemptymgi, element, ion, level, i, epsilon_trans, nnlevel, uptrans.lineindex, t_mid); - const double C = col_excitation_ratecoeff(T_e, nne, element, ion, level, i, epsilon_trans, statweight); + const double R = rad_excitation_ratecoeff(nonemptymgi, element, ion, level, uptrans, epsilon_trans, nnlevel, + uptrans.lineindex, t_mid); + const double C = col_excitation_ratecoeff(T_e, nne, element, ion, uptrans, epsilon_trans, statweight); const double NT = nonthermal::nt_excitation_ratecoeff(nonemptymgi, element, ion, level, i, uptrans.lineindex); sum_internal_up_same += (R + C + NT) * epsilon_current; @@ -692,15 +692,14 @@ auto rad_deexcitation_ratecoeff(const int nonemptymgi, const int element, const // multiply by lower level population to get a rate per second auto rad_excitation_ratecoeff(const int nonemptymgi, const int element, const int ion, const int lower, - const int uptransindex, const double epsilon_trans, const double nnlevel_lower, + const LevelTransition &uptrans, const double epsilon_trans, const double nnlevel_lower, const int lineindex, const double t_current) -> double { - const auto &uptr = get_uptranslist(element, ion, lower)[uptransindex]; - const int upper = uptr.targetlevelindex; + const int upper = uptrans.targetlevelindex; const double n_u = get_levelpop(nonemptymgi, element, ion, upper); const auto &n_l = nnlevel_lower; const double nu_trans = epsilon_trans / H; - const double A_ul = uptr.einstein_A; + const double A_ul = uptrans.einstein_A; const double B_ul = CLIGHTSQUAREDOVERTWOH / std::pow(nu_trans, 3) * A_ul; const double B_lu = stat_weight(element, ion, upper) / stat_weight(element, ion, lower) * B_ul; @@ -902,18 +901,17 @@ auto col_deexcitation_ratecoeff(const float T_e, const float nne, const double e // multiply by lower level population to get a rate per second -auto col_excitation_ratecoeff(const float T_e, const float nne, const int element, const int ion, const int lower, - const int uptransindex, const double epsilon_trans, const double lowerstatweight) +auto col_excitation_ratecoeff(const float T_e, const float nne, const int element, const int ion, + const LevelTransition &uptrans, const double epsilon_trans, const double lowerstatweight) -> double { - const auto &uptr = get_uptranslist(element, ion, lower)[uptransindex]; - const double coll_strength = uptr.coll_str; + const double coll_strength = uptrans.coll_str; const double eoverkt = epsilon_trans / (KB * T_e); if (coll_strength < 0) { - const bool forbidden = uptr.forbidden; + const bool forbidden = uptrans.forbidden; if (!forbidden) { // alternative condition: (coll_strength > -1.5) i.e. to catch -1 - const double trans_osc_strength = uptr.osc_strength; + const double trans_osc_strength = uptrans.osc_strength; // permitted E1 electric dipole transitions // collisional excitation: formula valid only for atoms!!!!!!!!!!! // Rutten script eq. 3.32. p.50 @@ -937,7 +935,7 @@ auto col_excitation_ratecoeff(const float T_e, const float nne, const int elemen // forbidden transitions: magnetic dipole, electric quadropole... // Axelrod's approximation (thesis 1980) - const int upper = uptr.targetlevelindex; + const int upper = uptrans.targetlevelindex; const double upperstatweight = stat_weight(element, ion, upper); return nne * 8.629e-6 * 0.01 * std::exp(-eoverkt) * upperstatweight / std::sqrt(T_e); } diff --git a/macroatom.h b/macroatom.h index 69e13f6da..06b4d48f1 100644 --- a/macroatom.h +++ b/macroatom.h @@ -13,9 +13,9 @@ void do_macroatom(Packet &pkt, const MacroAtomState &pktmastate); float A_ul, double upperstatweight, double nnlevelupper, double t_current) -> double; -[[nodiscard]] auto rad_excitation_ratecoeff(int nonemptymgi, int element, int ion, int lower, int uptransindex, - double epsilon_trans, double nnlevel_lower, int lineindex, double t_current) - -> double; +[[nodiscard]] auto rad_excitation_ratecoeff(int nonemptymgi, int element, int ion, int lower, + const LevelTransition &uptrans, double epsilon_trans, double nnlevel_lower, + int lineindex, double t_current) -> double; [[nodiscard]] auto rad_recombination_ratecoeff(float T_e, float nne, int element, int upperion, int upperionlevel, int lowerionlevel, int nonemptymgi) -> double; @@ -32,7 +32,7 @@ void do_macroatom(Packet &pkt, const MacroAtomState &pktmastate); [[nodiscard]] auto col_deexcitation_ratecoeff(float T_e, float nne, double epsilon_trans, int element, int ion, int upper, const LevelTransition &downtransition) -> double; -[[nodiscard]] auto col_excitation_ratecoeff(float T_e, float nne, int element, int ion, int lower, int uptransindex, +[[nodiscard]] auto col_excitation_ratecoeff(float T_e, float nne, int element, int ion, const LevelTransition &uptrans, double epsilon_trans, double lowerstatweight) -> double; #endif // MACROATOM_H diff --git a/nltepop.cc b/nltepop.cc index b1b0b6b1c..756a91f76 100644 --- a/nltepop.cc +++ b/nltepop.cc @@ -484,18 +484,19 @@ void nltepop_matrix_add_boundbound(const int nonemptymgi, const int element, con const auto *const leveluptranslist = get_uptranslist(element, ion, level); const auto nuptransindices = std::ranges::iota_view{0, nuptrans}; std::for_each(nuptransindices.begin(), nuptransindices.end(), [&](const auto i) { - const int lineindex = leveluptranslist[i].lineindex; - const int upper = leveluptranslist[i].targetlevelindex; + const auto &uptrans = leveluptranslist[i]; + const int lineindex = uptrans.lineindex; + const int upper = uptrans.targetlevelindex; const double epsilon_trans = epsilon(element, ion, upper) - epsilon_level; - const double R = - rad_excitation_ratecoeff(nonemptymgi, element, ion, level, i, epsilon_trans, nnlevel, lineindex, t_mid) * - s_renorm[level]; + const double R = rad_excitation_ratecoeff(nonemptymgi, element, ion, level, uptrans, epsilon_trans, nnlevel, + lineindex, t_mid) * + s_renorm[level]; assert_always(R >= 0); assert_always(std::isfinite(R)); const double C = - col_excitation_ratecoeff(T_e, nne, element, ion, level, i, epsilon_trans, statweight) * s_renorm[level]; + col_excitation_ratecoeff(T_e, nne, element, ion, uptrans, epsilon_trans, statweight) * s_renorm[level]; assert_always(C >= 0); assert_always(std::isfinite(C)); diff --git a/nonthermal.cc b/nonthermal.cc index 9400bde0a..6f505f298 100644 --- a/nonthermal.cc +++ b/nonthermal.cc @@ -541,14 +541,15 @@ auto get_approx_shell_occupancies(const int nbound, const int ioncharge) { } auto get_shell_occupancies(const int nbound, const int ioncharge) { - assert_always(nbound > 0); - assert_always(ioncharge >= 0); - const int Z = nbound + ioncharge; + assert_testmodeonly(nbound > 0); + assert_testmodeonly(ioncharge >= 0); if constexpr (!NT_WORKFUNCTION_USE_SHELL_OCCUPANCY_FILE) { return get_approx_shell_occupancies(nbound, ioncharge); } + const int Z = nbound + ioncharge; + const auto &element_shells_q_neutral = elements_shells_q.at(Z - 1); const size_t shellcount = std::min(element_shells_q_neutral.size(), elements_electron_binding[Z - 1].size()); auto element_shells_q = std::vector(shellcount); @@ -1504,9 +1505,9 @@ auto select_nt_ionization(const int nonemptymgi) -> std::tuple { auto get_uptransindex(const int element, const int ion, const int lower, const int upper) { const int nuptrans = get_nuptrans(element, ion, lower); - const auto *const leveluptrans = get_uptranslist(element, ion, lower); + const auto *const leveluptranslist = get_uptranslist(element, ion, lower); for (int t = 0; t < nuptrans; t++) { - if (upper == leveluptrans[t].targetlevelindex) { + if (upper == leveluptranslist[t].targetlevelindex) { return t; } } @@ -1725,16 +1726,17 @@ void analyse_sf_solution(const int nonemptymgi, const int timestep, const bool e const double epsilon_trans = epsilon(element, ion, upper) - epsilon(element, ion, lower); const double ntcollexc_ratecoeff = ntexc.ratecoeffperdeposition * deposition_rate_density; + const auto &uptrans = get_uptranslist(element, ion, lower)[uptransindex]; const double t_mid = globals::timesteps[timestep].mid; - const double radexc_ratecoeff = rad_excitation_ratecoeff(nonemptymgi, element, ion, lower, uptransindex, + const double radexc_ratecoeff = rad_excitation_ratecoeff(nonemptymgi, element, ion, lower, uptrans, epsilon_trans, nnlevel_lower, lineindex, t_mid); - const double collexc_ratecoeff = col_excitation_ratecoeff(T_e, nne, element, ion, lower, uptransindex, - epsilon_trans, stat_weight(element, ion, lower)); + const double collexc_ratecoeff = + col_excitation_ratecoeff(T_e, nne, element, ion, uptrans, epsilon_trans, stat_weight(element, ion, lower)); const double exc_ratecoeff = radexc_ratecoeff + collexc_ratecoeff + ntcollexc_ratecoeff; - const auto coll_str = get_uptranslist(element, ion, lower)[uptransindex].coll_str; + const auto coll_str = uptrans.coll_str; printout( " frac_deposition %.3e Z=%2d ionstage %d lower %4d upper %4d rad_exc %.1e coll_exc %.1e nt_exc %.1e "