From 8111c67266e82bac1edffbaee7ea5a8b1d9d0cf7 Mon Sep 17 00:00:00 2001
From: Luke Shingles <luke.shingles@gmail.com>
Date: Wed, 19 Feb 2025 11:57:38 +0000
Subject: [PATCH] Pass LevelTransition to [col/rad]_excitation_ratecoeff (#168)

~2-3% speedup of CI jobs. No change for normal fast-math mode on Apple
M1.
---
 kpkt.cc       |  4 ++--
 macroatom.cc  | 26 ++++++++++++--------------
 macroatom.h   |  8 ++++----
 nltepop.cc    | 13 +++++++------
 nonthermal.cc | 20 +++++++++++---------
 5 files changed, 36 insertions(+), 35 deletions(-)

diff --git a/kpkt.cc b/kpkt.cc
index d8b195cc7..9f0bf7dd9 100644
--- a/kpkt.cc
+++ b/kpkt.cc
@@ -95,7 +95,7 @@ auto calculate_cooling_rates_ion(const int nonemptymgi, const int element, const
         const int upper = uptranslist[ii].targetlevelindex;
         const double epsilon_trans = epsilon(element, ion, upper) - epsilon_current;
         const double C = nnlevel *
-                         col_excitation_ratecoeff(T_e, nne, element, ion, level, ii, epsilon_trans, statweight) *
+                         col_excitation_ratecoeff(T_e, nne, element, ion, uptranslist[ii], epsilon_trans, statweight) *
                          epsilon_trans;
         C_ion += C;
         if constexpr (!update_cooling_contrib_list) {
@@ -599,7 +599,7 @@ __host__ __device__ void do_kpkt(Packet &pkt, const double t2, const int nts) {
       // printout("    excitation to level %d possible\n",upper);
       const double epsilon_trans = epsilon(element, ion, tmpupper) - epsilon_current;
       const double C = nnlevel *
-                       col_excitation_ratecoeff(T_e, nne, element, ion, level, ii, epsilon_trans, statweight) *
+                       col_excitation_ratecoeff(T_e, nne, element, ion, uptranslist[ii], epsilon_trans, statweight) *
                        epsilon_trans;
       contrib += C;
       if (contrib > rndcool_ion_process) {
diff --git a/macroatom.cc b/macroatom.cc
index c30766e5c..a11b0b5f6 100644
--- a/macroatom.cc
+++ b/macroatom.cc
@@ -87,9 +87,9 @@ auto calculate_macroatom_transitionrates(const int nonemptymgi, const int elemen
     const auto &uptrans = uptranslist[i];
     const double epsilon_trans = epsilon(element, ion, uptrans.targetlevelindex) - epsilon_current;
 
-    const double R =
-        rad_excitation_ratecoeff(nonemptymgi, element, ion, level, i, epsilon_trans, nnlevel, uptrans.lineindex, t_mid);
-    const double C = col_excitation_ratecoeff(T_e, nne, element, ion, level, i, epsilon_trans, statweight);
+    const double R = rad_excitation_ratecoeff(nonemptymgi, element, ion, level, uptrans, epsilon_trans, nnlevel,
+                                              uptrans.lineindex, t_mid);
+    const double C = col_excitation_ratecoeff(T_e, nne, element, ion, uptrans, epsilon_trans, statweight);
     const double NT = nonthermal::nt_excitation_ratecoeff(nonemptymgi, element, ion, level, i, uptrans.lineindex);
 
     sum_internal_up_same += (R + C + NT) * epsilon_current;
@@ -692,15 +692,14 @@ auto rad_deexcitation_ratecoeff(const int nonemptymgi, const int element, const
 // multiply by lower level population to get a rate per second
 
 auto rad_excitation_ratecoeff(const int nonemptymgi, const int element, const int ion, const int lower,
-                              const int uptransindex, const double epsilon_trans, const double nnlevel_lower,
+                              const LevelTransition &uptrans, const double epsilon_trans, const double nnlevel_lower,
                               const int lineindex, const double t_current) -> double {
-  const auto &uptr = get_uptranslist(element, ion, lower)[uptransindex];
-  const int upper = uptr.targetlevelindex;
+  const int upper = uptrans.targetlevelindex;
 
   const double n_u = get_levelpop(nonemptymgi, element, ion, upper);
   const auto &n_l = nnlevel_lower;
   const double nu_trans = epsilon_trans / H;
-  const double A_ul = uptr.einstein_A;
+  const double A_ul = uptrans.einstein_A;
   const double B_ul = CLIGHTSQUAREDOVERTWOH / std::pow(nu_trans, 3) * A_ul;
   const double B_lu = stat_weight(element, ion, upper) / stat_weight(element, ion, lower) * B_ul;
 
@@ -902,18 +901,17 @@ auto col_deexcitation_ratecoeff(const float T_e, const float nne, const double e
 
 // multiply by lower level population to get a rate per second
 
-auto col_excitation_ratecoeff(const float T_e, const float nne, const int element, const int ion, const int lower,
-                              const int uptransindex, const double epsilon_trans, const double lowerstatweight)
+auto col_excitation_ratecoeff(const float T_e, const float nne, const int element, const int ion,
+                              const LevelTransition &uptrans, const double epsilon_trans, const double lowerstatweight)
     -> double {
-  const auto &uptr = get_uptranslist(element, ion, lower)[uptransindex];
-  const double coll_strength = uptr.coll_str;
+  const double coll_strength = uptrans.coll_str;
   const double eoverkt = epsilon_trans / (KB * T_e);
 
   if (coll_strength < 0) {
-    const bool forbidden = uptr.forbidden;
+    const bool forbidden = uptrans.forbidden;
     if (!forbidden) {
       // alternative condition: (coll_strength > -1.5) i.e. to catch -1
-      const double trans_osc_strength = uptr.osc_strength;
+      const double trans_osc_strength = uptrans.osc_strength;
       // permitted E1 electric dipole transitions
       // collisional excitation: formula valid only for atoms!!!!!!!!!!!
       // Rutten script eq. 3.32. p.50
@@ -937,7 +935,7 @@ auto col_excitation_ratecoeff(const float T_e, const float nne, const int elemen
 
     // forbidden transitions: magnetic dipole, electric quadropole...
     // Axelrod's approximation (thesis 1980)
-    const int upper = uptr.targetlevelindex;
+    const int upper = uptrans.targetlevelindex;
     const double upperstatweight = stat_weight(element, ion, upper);
     return nne * 8.629e-6 * 0.01 * std::exp(-eoverkt) * upperstatweight / std::sqrt(T_e);
   }
diff --git a/macroatom.h b/macroatom.h
index 69e13f6da..06b4d48f1 100644
--- a/macroatom.h
+++ b/macroatom.h
@@ -13,9 +13,9 @@ void do_macroatom(Packet &pkt, const MacroAtomState &pktmastate);
                                               float A_ul, double upperstatweight, double nnlevelupper, double t_current)
     -> double;
 
-[[nodiscard]] auto rad_excitation_ratecoeff(int nonemptymgi, int element, int ion, int lower, int uptransindex,
-                                            double epsilon_trans, double nnlevel_lower, int lineindex, double t_current)
-    -> double;
+[[nodiscard]] auto rad_excitation_ratecoeff(int nonemptymgi, int element, int ion, int lower,
+                                            const LevelTransition &uptrans, double epsilon_trans, double nnlevel_lower,
+                                            int lineindex, double t_current) -> double;
 
 [[nodiscard]] auto rad_recombination_ratecoeff(float T_e, float nne, int element, int upperion, int upperionlevel,
                                                int lowerionlevel, int nonemptymgi) -> double;
@@ -32,7 +32,7 @@ void do_macroatom(Packet &pkt, const MacroAtomState &pktmastate);
 [[nodiscard]] auto col_deexcitation_ratecoeff(float T_e, float nne, double epsilon_trans, int element, int ion,
                                               int upper, const LevelTransition &downtransition) -> double;
 
-[[nodiscard]] auto col_excitation_ratecoeff(float T_e, float nne, int element, int ion, int lower, int uptransindex,
+[[nodiscard]] auto col_excitation_ratecoeff(float T_e, float nne, int element, int ion, const LevelTransition &uptrans,
                                             double epsilon_trans, double lowerstatweight) -> double;
 
 #endif  // MACROATOM_H
diff --git a/nltepop.cc b/nltepop.cc
index b1b0b6b1c..756a91f76 100644
--- a/nltepop.cc
+++ b/nltepop.cc
@@ -484,18 +484,19 @@ void nltepop_matrix_add_boundbound(const int nonemptymgi, const int element, con
     const auto *const leveluptranslist = get_uptranslist(element, ion, level);
     const auto nuptransindices = std::ranges::iota_view{0, nuptrans};
     std::for_each(nuptransindices.begin(), nuptransindices.end(), [&](const auto i) {
-      const int lineindex = leveluptranslist[i].lineindex;
-      const int upper = leveluptranslist[i].targetlevelindex;
+      const auto &uptrans = leveluptranslist[i];
+      const int lineindex = uptrans.lineindex;
+      const int upper = uptrans.targetlevelindex;
       const double epsilon_trans = epsilon(element, ion, upper) - epsilon_level;
 
-      const double R =
-          rad_excitation_ratecoeff(nonemptymgi, element, ion, level, i, epsilon_trans, nnlevel, lineindex, t_mid) *
-          s_renorm[level];
+      const double R = rad_excitation_ratecoeff(nonemptymgi, element, ion, level, uptrans, epsilon_trans, nnlevel,
+                                                lineindex, t_mid) *
+                       s_renorm[level];
       assert_always(R >= 0);
       assert_always(std::isfinite(R));
 
       const double C =
-          col_excitation_ratecoeff(T_e, nne, element, ion, level, i, epsilon_trans, statweight) * s_renorm[level];
+          col_excitation_ratecoeff(T_e, nne, element, ion, uptrans, epsilon_trans, statweight) * s_renorm[level];
       assert_always(C >= 0);
       assert_always(std::isfinite(C));
 
diff --git a/nonthermal.cc b/nonthermal.cc
index 9400bde0a..6f505f298 100644
--- a/nonthermal.cc
+++ b/nonthermal.cc
@@ -541,14 +541,15 @@ auto get_approx_shell_occupancies(const int nbound, const int ioncharge) {
 }
 
 auto get_shell_occupancies(const int nbound, const int ioncharge) {
-  assert_always(nbound > 0);
-  assert_always(ioncharge >= 0);
-  const int Z = nbound + ioncharge;
+  assert_testmodeonly(nbound > 0);
+  assert_testmodeonly(ioncharge >= 0);
 
   if constexpr (!NT_WORKFUNCTION_USE_SHELL_OCCUPANCY_FILE) {
     return get_approx_shell_occupancies(nbound, ioncharge);
   }
 
+  const int Z = nbound + ioncharge;
+
   const auto &element_shells_q_neutral = elements_shells_q.at(Z - 1);
   const size_t shellcount = std::min(element_shells_q_neutral.size(), elements_electron_binding[Z - 1].size());
   auto element_shells_q = std::vector<int>(shellcount);
@@ -1504,9 +1505,9 @@ auto select_nt_ionization(const int nonemptymgi) -> std::tuple<int, int> {
 
 auto get_uptransindex(const int element, const int ion, const int lower, const int upper) {
   const int nuptrans = get_nuptrans(element, ion, lower);
-  const auto *const leveluptrans = get_uptranslist(element, ion, lower);
+  const auto *const leveluptranslist = get_uptranslist(element, ion, lower);
   for (int t = 0; t < nuptrans; t++) {
-    if (upper == leveluptrans[t].targetlevelindex) {
+    if (upper == leveluptranslist[t].targetlevelindex) {
       return t;
     }
   }
@@ -1725,16 +1726,17 @@ void analyse_sf_solution(const int nonemptymgi, const int timestep, const bool e
         const double epsilon_trans = epsilon(element, ion, upper) - epsilon(element, ion, lower);
 
         const double ntcollexc_ratecoeff = ntexc.ratecoeffperdeposition * deposition_rate_density;
+        const auto &uptrans = get_uptranslist(element, ion, lower)[uptransindex];
 
         const double t_mid = globals::timesteps[timestep].mid;
-        const double radexc_ratecoeff = rad_excitation_ratecoeff(nonemptymgi, element, ion, lower, uptransindex,
+        const double radexc_ratecoeff = rad_excitation_ratecoeff(nonemptymgi, element, ion, lower, uptrans,
                                                                  epsilon_trans, nnlevel_lower, lineindex, t_mid);
 
-        const double collexc_ratecoeff = col_excitation_ratecoeff(T_e, nne, element, ion, lower, uptransindex,
-                                                                  epsilon_trans, stat_weight(element, ion, lower));
+        const double collexc_ratecoeff =
+            col_excitation_ratecoeff(T_e, nne, element, ion, uptrans, epsilon_trans, stat_weight(element, ion, lower));
 
         const double exc_ratecoeff = radexc_ratecoeff + collexc_ratecoeff + ntcollexc_ratecoeff;
-        const auto coll_str = get_uptranslist(element, ion, lower)[uptransindex].coll_str;
+        const auto coll_str = uptrans.coll_str;
 
         printout(
             "    frac_deposition %.3e Z=%2d ionstage %d lower %4d upper %4d rad_exc %.1e coll_exc %.1e nt_exc %.1e "