Skip to content

Commit

Permalink
Improve hybrid NLTE performance (#133)
Browse files Browse the repository at this point in the history
Total ion photoionisation rate was being recalculated at every iteration
of the T_e and nne for ions of elements without NLTE levels, which
caused major slowdowns (4x runtime for simple one-zone). The total ion
photoionisation rate coefficient for ions of elements without NLTE
levels is now calculated only once per T_e change.
  • Loading branch information
lukeshingles authored Oct 28, 2024
1 parent 4a02324 commit bdc6959
Show file tree
Hide file tree
Showing 11 changed files with 119 additions and 127 deletions.
4 changes: 2 additions & 2 deletions atomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ inline auto get_nlevels(const int element, const int ion) -> int {
}

// Return the number of levels associated with an ion that have energies below the ionisation threshold.
[[nodiscard]] inline auto get_ionisinglevels(const int element, const int ion) -> int {
[[nodiscard]] inline auto get_nlevels_ionising(const int element, const int ion) -> int {
assert_testmodeonly(element < get_nelements());
assert_testmodeonly(ion < get_nions(element));
return globals::elements[element].ions[ion].ionisinglevels;
Expand All @@ -81,7 +81,7 @@ inline auto get_nphixstargets(const int element, const int ion, const int level)
assert_testmodeonly(ion < get_nions(element));
const auto nphixstargets = globals::elements[element].ions[ion].levels[level].nphixstargets;
assert_testmodeonly(nphixstargets == 0 ||
((ion < (get_nions(element) - 1)) && (level < get_ionisinglevels(element, ion))));
((ion < (get_nions(element) - 1)) && (level < get_nlevels_ionising(element, ion))));
return nphixstargets;
}

Expand Down
3 changes: 1 addition & 2 deletions grid.cc
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ void allocate_nonemptymodelcells() {
const auto ionestimcount = nonempty_npts_model * globals::nbfcontinua_ground;
const auto ionestimsize = ionestimcount * sizeof(double);

if (USE_LUT_PHOTOION && ionestimsize > 0) {
if (ionestimsize > 0) {
#ifdef MPI_ON
const auto [_, noderank_nonemptycellcount] =
get_range_chunk(nonempty_npts_model, globals::node_nprocs, globals::rank_in_node);
Expand Down Expand Up @@ -1385,7 +1385,6 @@ void setup_grid_cartesian_3d() {
for (int axis = 0; axis < 3; axis++) {
assert_always(nxyz[axis] == get_cellcoordpointnum(n, axis));
cell[n].pos_min[axis] = -globals::rmax + (2 * nxyz[axis] * globals::rmax / ncoordgrid[axis]);
// cell[n].xyz[axis] = nxyz[axis];
}

assert_always(n == nxyz[2] * ncoordgrid[1] * ncoordgrid[2] + nxyz[1] * ncoordgrid[0] + nxyz[0]);
Expand Down
65 changes: 31 additions & 34 deletions input.cc
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ void read_phixs_file(const int phixs_file_version, std::vector<float> &tmpallphi
assert_always(lowerlevel >= 0);

// store only photoionization crosssections for ions that are part of the current model atom
if (lowerion >= 0 && upperion < get_nions(element) && lowerlevel < get_ionisinglevels(element, lowerion)) {
if (lowerion >= 0 && upperion < get_nions(element) && lowerlevel < get_nlevels_ionising(element, lowerion)) {
read_phixs_data_table(phixsfile, nphixspoints_inputtable, element, lowerion, lowerlevel, upperion,
upperlevel_in, tmpallphixs, &mem_usage_phixs, phixs_file_version);

Expand Down Expand Up @@ -710,30 +710,28 @@ void setup_phixs_list() {
printout("[info] read_atomicdata: number of bfcontinua %d\n", globals::nbfcontinua);
printout("[info] read_atomicdata: number of ground-level bfcontinua %d\n", globals::nbfcontinua_ground);

if constexpr (USE_LUT_PHOTOION || USE_LUT_BFHEATING) {
globals::groundcont.resize(globals::nbfcontinua_ground);
globals::groundcont.resize(globals::nbfcontinua_ground);

int groundcontindex = 0;
for (int element = 0; element < get_nelements(); element++) {
const int nions = get_nions(element);
for (int ion = 0; ion < nions - 1; ion++) {
const int level = 0;
const int nphixstargets = get_nphixstargets(element, ion, level);
if (nphixstargets == 0) {
continue;
}
const double E_threshold = get_phixs_threshold(element, ion, level, 0);
const double nu_edge = E_threshold / H;
assert_always(groundcontindex < globals::nbfcontinua_ground);
int nextgroundcontindex = 0;
for (int element = 0; element < get_nelements(); element++) {
const int nions = get_nions(element);
for (int ion = 0; ion < nions - 1; ion++) {
const int level = 0;
const int nphixstargets = get_nphixstargets(element, ion, level);
if (nphixstargets == 0) {
continue;
}
const double E_threshold = get_phixs_threshold(element, ion, level, 0);
const double nu_edge = E_threshold / H;
assert_always(nextgroundcontindex < globals::nbfcontinua_ground);

globals::groundcont[groundcontindex] = {.nu_edge = nu_edge, .element = element, .ion = ion};
globals::groundcont[nextgroundcontindex] = {.nu_edge = nu_edge, .element = element, .ion = ion};

groundcontindex++;
}
nextgroundcontindex++;
}
assert_always(groundcontindex == globals::nbfcontinua_ground);
std::ranges::SORT_OR_STABLE_SORT(globals::groundcont, std::ranges::less{}, &GroundPhotoion::nu_edge);
}
assert_always(nextgroundcontindex == globals::nbfcontinua_ground);
std::ranges::SORT_OR_STABLE_SORT(globals::groundcont, std::ranges::less{}, &GroundPhotoion::nu_edge);

auto *nonconstallcont =
static_cast<FullPhotoionTransition *>(malloc(globals::nbfcontinua * sizeof(FullPhotoionTransition)));
Expand All @@ -743,18 +741,16 @@ void setup_phixs_list() {
for (int element = 0; element < get_nelements(); element++) {
const int nions = get_nions(element);
for (int ion = 0; ion < nions - 1; ion++) {
if constexpr (USE_LUT_PHOTOION || USE_LUT_BFHEATING) {
globals::elements[element].ions[ion].groundcontindex =
static_cast<int>(std::ranges::find_if(globals::groundcont,
[=](const auto &groundcont) {
return (groundcont.element == element) && (groundcont.ion == ion);
}) -
globals::groundcont.begin());
if (globals::elements[element].ions[ion].groundcontindex >= globals::nbfcontinua_ground) {
globals::elements[element].ions[ion].groundcontindex = -1;
}
globals::elements[element].ions[ion].groundcontindex =
static_cast<int>(std::ranges::find_if(globals::groundcont,
[=](const auto &groundcont) {
return (groundcont.element == element) && (groundcont.ion == ion);
}) -
globals::groundcont.begin());
if (globals::elements[element].ions[ion].groundcontindex >= globals::nbfcontinua_ground) {
globals::elements[element].ions[ion].groundcontindex = -1;
}
const int nlevels = get_ionisinglevels(element, ion);
const int nlevels = get_nlevels_ionising(element, ion);
for (int level = 0; level < nlevels; level++) {
const int nphixstargets = get_nphixstargets(element, ion, level);

Expand Down Expand Up @@ -1455,7 +1451,7 @@ void write_bflist_file() {
for (int element = 0; element < get_nelements(); element++) {
const int nions = get_nions(element);
for (int ion = 0; ion < nions; ion++) {
const int nlevels = get_ionisinglevels(element, ion);
const int nlevels = get_nlevels_ionising(element, ion);
for (int level = 0; level < nlevels; level++) {
const auto nphixstargets = get_nphixstargets(element, ion, level);
for (int phixstargetindex = 0; phixstargetindex < nphixstargets; phixstargetindex++) {
Expand Down Expand Up @@ -1572,9 +1568,10 @@ void read_atomicdata() {
"[input] ionstage %d: %4d levels (%d in groundterm, %4d ionising) %7d lines %6d bf transitions "
"(epsilon_ground: %7.2f eV)\n",
get_ionstage(element, ion), get_nlevels(element, ion), get_nlevels_groundterm(element, ion),
get_ionisinglevels(element, ion), ion_bbtransitions, ion_photoiontransitions, epsilon(element, ion, 0) / EV);
get_nlevels_ionising(element, ion), ion_bbtransitions, ion_photoiontransitions,
epsilon(element, ion, 0) / EV);

includedionisinglevels += get_ionisinglevels(element, ion);
includedionisinglevels += get_nlevels_ionising(element, ion);
includedphotoiontransitions += ion_photoiontransitions;
includedboundboundtransitions += ion_bbtransitions;
}
Expand Down
6 changes: 3 additions & 3 deletions kpkt.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ auto calculate_cooling_rates_ion(const int modelgridindex, const int element, co
double C_ion = 0.;
int i = indexionstart; // NOLINT(misc-const-correctness)

const int nionisinglevels = get_ionisinglevels(element, ion);
const int nionisinglevels = get_nlevels_ionising(element, ion);
const double nncurrention = get_nnion(modelgridindex, element, ion);

// ff creation of rpkt
Expand Down Expand Up @@ -192,7 +192,7 @@ void set_ncoolingterms() {
// All the levels add number of col excitations
const int nlevels = get_nlevels(element, ion);
for (int level = 0; level < nlevels; level++) {
// if (ion < nions - 1) and (level < get_ionisinglevels(element,ion))
// if (ion < nions - 1) and (level < get_nlevels_ionising(element,ion))
if (ion < nions - 1) {
ionterms += 2 * get_nphixstargets(element, ion, level);
}
Expand Down Expand Up @@ -309,7 +309,7 @@ void setup_coolinglist() {
for (int ion = 0; ion < nions; ion++) {
const int nlevels_currention = get_nlevels(element, ion);

const int nionisinglevels = get_ionisinglevels(element, ion);
const int nionisinglevels = get_nlevels_ionising(element, ion);

// ff creation of rpkt
// -------------------
Expand Down
16 changes: 7 additions & 9 deletions ltepop.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,19 +73,12 @@ auto phi_ion_equilib(const int element, const int ion, const int modelgridindex,

const auto T_e = grid::get_Te(modelgridindex);

double Gamma = 0.;
Gamma = globals::gammaestimator[get_ionestimindex_nonemptymgi(nonemptymgi, element, ion)];
// photoionisation plus collisional ionisation rate coefficient per ground level pop
const double Gamma = globals::gammaestimator[get_ionestimindex_nonemptymgi(nonemptymgi, element, ion)];

// Gamma is the photoionization rate per ground level pop
const double Gamma_ion = Gamma * stat_weight(element, ion, 0) / partfunc_ion;

if (Gamma == 0. && (!NT_ON || (globals::dep_estimator_gamma[nonemptymgi] == 0. &&
grid::get_modelinitnucmassfrac(modelgridindex, decay::get_nucindex(24, 48)) == 0. &&
grid::get_modelinitnucmassfrac(modelgridindex, decay::get_nucindex(28, 56)) == 0.))) {
printout("Fatal: Gamma = 0 for element %d, ion %d in phi ... abort\n", element, ion);
std::abort();
}

const double Alpha_sp = interpolate_ions_spontrecombcoeff(uniqueionindex, T_e);

// const double Col_rec = calculate_ionrecombcoeff(modelgridindex, T_e, element, ion + 1, false, true, false, false,
Expand All @@ -94,6 +87,11 @@ auto phi_ion_equilib(const int element, const int ion, const int modelgridindex,

const double gamma_nt = NT_ON ? nonthermal::nt_ionization_ratecoeff(modelgridindex, element, ion) : 0.;

if ((Gamma + gamma_nt) == 0) {
printout("Fatal: Gamma = 0 for element %d, ion %d in phi ... abort\n", element, ion);
std::abort();
}

const double phi = (Alpha_sp + Col_rec) / (Gamma_ion + gamma_nt);

// Y_nt should generally be higher than the Gamma term for nebular epoch
Expand Down
12 changes: 5 additions & 7 deletions macroatom.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,7 @@ auto calculate_macroatom_transitionrates(const int modelgridindex, const int ele
double sum_radrecomb = 0.;
double sum_colrecomb = 0.;
if (ion > 0 && level <= get_maxrecombininglevel(element, ion)) {
// nlevels = get_nlevels(element,ion-1);
const int nlevels = get_ionisinglevels(element, ion - 1);
// nlevels = get_ionisinglevels(element,ion-1);
const int nlevels = get_nlevels_ionising(element, ion - 1);
for (int lower = 0; lower < nlevels; lower++) {
const double epsilon_target = epsilon(element, ion - 1, lower);
const double epsilon_trans = epsilon_current - epsilon_target;
Expand All @@ -129,7 +127,7 @@ auto calculate_macroatom_transitionrates(const int modelgridindex, const int ele
// Transitions to higher ionisation stages
double sum_up_highernt = 0.;
double sum_up_higher = 0.;
const int ionisinglevels = get_ionisinglevels(element, ion);
const int ionisinglevels = get_nlevels_ionising(element, ion);
if (ion < get_nions(element) - 1 && level < ionisinglevels) {
if (NT_ON) {
sum_up_highernt = nonthermal::nt_ionization_ratecoeff(modelgridindex, element, ion) * epsilon_current;
Expand Down Expand Up @@ -231,7 +229,7 @@ void do_macroatom_raddeexcitation(Packet &pkt, const int element, const int ion,
// Randomly select a continuum
const double targetval = rng_uniform() * rad_recomb;
double rate = 0;
const int nlevels = get_ionisinglevels(element, upperion - 1);
const int nlevels = get_nlevels_ionising(element, upperion - 1);
int lowerionlevel = 0;
for (lowerionlevel = 0; lowerionlevel < nlevels; lowerionlevel++) {
const double epsilon_trans = epsilon_current - epsilon(element, upperion - 1, lowerionlevel);
Expand Down Expand Up @@ -496,8 +494,8 @@ __host__ __device__ void do_macroatom(Packet &pkt, const MacroAtomState &pktmast
double rate = 0.;
// nlevels = get_nlevels(element,ion-1);

const int nlevels = get_ionisinglevels(element, ion - 1);
// nlevels = get_ionisinglevels(element,ion-1);
const int nlevels = get_nlevels_ionising(element, ion - 1);
// nlevels = get_nlevels_ionising(element,ion-1);
int lower = 0;
for (lower = 0; lower < nlevels; lower++) {
const double epsilon_target = epsilon(element, ion - 1, lower);
Expand Down
2 changes: 1 addition & 1 deletion nltepop.cc
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ void nltepop_matrix_add_ionisation(const int modelgridindex, const int element,
assert_always(ion + 1 < get_nions(element)); // can't ionise the top ion
const auto T_e = grid::get_Te(modelgridindex);
const float nne = grid::get_nne(modelgridindex);
const int nionisinglevels = get_ionisinglevels(element, ion);
const int nionisinglevels = get_nlevels_ionising(element, ion);
const int maxrecombininglevel = get_maxrecombininglevel(element, ion + 1);

for (int level = 0; level < nionisinglevels; level++) {
Expand Down
29 changes: 14 additions & 15 deletions ratecoeff.cc
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ auto read_ratecoeff_dat(FILE *ratecoeff_file) -> bool
assert_always(
fscanf(ratecoeff_file, "%d %d %d %d\n", &in_element, &in_ionstage, &in_levels, &in_ionisinglevels) == 4);
const int nlevels = get_nlevels(element, ion);
const int ionisinglevels = get_ionisinglevels(element, ion);
const int ionisinglevels = get_nlevels_ionising(element, ion);
if (get_atomicnumber(element) != in_element || get_ionstage(element, ion) != in_ionstage ||
nlevels != in_levels || ionisinglevels != in_ionisinglevels) {
printout(
Expand All @@ -169,9 +169,7 @@ auto read_ratecoeff_dat(FILE *ratecoeff_file) -> bool
const int nions = get_nions(element) - 1;
for (int ion = 0; ion < nions; ion++) {
// nlevels = get_nlevels(element,ion);
const int nlevels = get_ionisinglevels(element, ion); // number of ionising levels associated with current ion
// int nbfcont = get_ionisinglevels(element,ion); // number of ionising levels of the current ion which
// are used in the simulation
const int nlevels = get_nlevels_ionising(element, ion); // number of ionising levels associated with current ion
for (int level = 0; level < nlevels; level++) {
// Loop over the phixs target states
const int nphixstargets = get_nphixstargets(element, ion, level);
Expand Down Expand Up @@ -234,15 +232,15 @@ void write_ratecoeff_dat() {
const int nions = get_nions(element);
for (int ion = 0; ion < nions; ion++) {
fprintf(ratecoeff_file, "%d %d %d %d\n", get_atomicnumber(element), get_ionstage(element, ion),
get_nlevels(element, ion), get_ionisinglevels(element, ion));
get_nlevels(element, ion), get_nlevels_ionising(element, ion));
}
}

for (int element = 0; element < get_nelements(); element++) {
const int nions = get_nions(element) - 1;
for (int ion = 0; ion < nions; ion++) {
// nlevels = get_nlevels(element,ion);
const int nlevels = get_ionisinglevels(element, ion);
const int nlevels = get_nlevels_ionising(element, ion);
for (int level = 0; level < nlevels; level++) {
// Loop over the phixs targets
const auto nphixstargets = get_nphixstargets(element, ion, level);
Expand Down Expand Up @@ -353,7 +351,7 @@ void precalculate_rate_coefficient_integrals() {
for (int ion = 0; ion < nions; ion++) {
const int atomic_number = get_atomicnumber(element);
const int ionstage = get_ionstage(element, ion);
const int nlevels = get_ionisinglevels(element, ion);
const int nlevels = get_nlevels_ionising(element, ion);
printout("Performing rate integrals for Z = %d, ionstage %d...\n", atomic_number, ionstage);

gsl_error_handler_t *previous_handler = gsl_set_error_handler(gsl_error_handler_printout);
Expand Down Expand Up @@ -563,7 +561,7 @@ void read_recombrate_file() {
assert_always(T_highestbelow.log_Te > 0);
assert_always(T_lowestabove.log_Te > 0);

const int nlevels = get_ionisinglevels(element, ion - 1);
const int nlevels = get_nlevels_ionising(element, ion - 1);

const double x = (log_Te_estimate - T_highestbelow.log_Te) / (T_lowestabove.log_Te - T_highestbelow.log_Te);
const double input_rrc_low_n = (x * T_highestbelow.rrc_low_n) + ((1 - x) * T_lowestabove.rrc_low_n);
Expand Down Expand Up @@ -654,7 +652,7 @@ void precalculate_ion_alpha_sp() {
const int nions = get_nions(element) - 1;
for (int ion = 0; ion < nions; ion++) {
const auto uniqueionindex = get_uniqueionindex(element, ion);
const int nionisinglevels = get_ionisinglevels(element, ion);
const int nionisinglevels = get_nlevels_ionising(element, ion);
double zeta = 0.;
for (int level = 0; level < nionisinglevels; level++) {
const auto nphixstargets = get_nphixstargets(element, ion, level);
Expand Down Expand Up @@ -824,15 +822,13 @@ auto get_nlevels_important(const int modelgridindex, const int element, const in
}

double nnlevelsum = 0.;
int nlevels_important = get_ionisinglevels(element, ion); // levels needed to get majority of ion pop
int nlevels_important = get_nlevels_ionising(element, ion); // levels needed to get majority of ion pop

// debug: treat all ionising levels as important
// *nnlevelsum_out = nnion_real;
// return nlevels_important;

for (int lower = 0;
(nnlevelsum / nnion_real < IONGAMMA_POPFRAC_LEVELS_INCLUDED) && (lower < get_ionisinglevels(element, ion));
lower++) {
for (int lower = 0; lower < get_nlevels_ionising(element, ion); lower++) {
double nnlowerlevel{NAN};
if (assume_lte) {
const double T_exc = T_e; // remember, other parts of the code in LTE mode use TJ, not T_e
Expand All @@ -847,6 +843,9 @@ auto get_nlevels_important(const int modelgridindex, const int element, const in
}
nnlevelsum += nnlowerlevel;
nlevels_important = lower + 1;
if ((nnlevelsum / nnion_real) >= IONGAMMA_POPFRAC_LEVELS_INCLUDED) {
break;
}
}
assert_always(nlevels_important <= get_nlevels(element, ion));
return {nlevels_important, nnlevelsum};
Expand Down Expand Up @@ -1285,7 +1284,7 @@ auto iongamma_is_zero(const int nonemptymgi, const int element, const int ion) -
}
const int modelgridindex = grid::get_mgi_of_nonemptymgi(nonemptymgi);

if constexpr (USE_LUT_PHOTOION) {
if (USE_LUT_PHOTOION || !elem_has_nlte_levels(element)) {
return (globals::gammaestimator[get_ionestimindex_nonemptymgi(nonemptymgi, element, ion)] == 0);
}

Expand Down Expand Up @@ -1331,7 +1330,7 @@ auto calculate_iongamma_per_gspop(const int modelgridindex, const int element, c

double Col_ion = 0.;
for (int level = 0; level < nlevels_important; level++) {
const double nnlevel = get_levelpop(modelgridindex, element, ion, level);
const double nnlevel = calculate_levelpop(modelgridindex, element, ion, level);
const int nphixstargets = get_nphixstargets(element, ion, level);
for (int phixstargetindex = 0; phixstargetindex < nphixstargets; phixstargetindex++) {
const int upperlevel = get_phixsupperlevel(element, ion, level, phixstargetindex);
Expand Down
1 change: 0 additions & 1 deletion rpkt.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1116,7 +1116,6 @@ void calculate_chi_rpkt_cont(const double nu_cmf, Rpkt_continuum_absorptioncoeff
} else {
// in the other cases chi_grey is an mass absorption coefficient
// therefore use the mass density
// sigma = globals::cell[pkt.where].chi_grey * globals::cell[pkt.where].rho;
// sigma = SIGMA_T * nne;

chi_escat = 0.;
Expand Down
Loading

0 comments on commit bdc6959

Please sign in to comment.