diff --git a/artisatomic/readmonsdata.py b/artisatomic/readmonsdata.py index fa568ad..2e1cdd4 100644 --- a/artisatomic/readmonsdata.py +++ b/artisatomic/readmonsdata.py @@ -6,6 +6,7 @@ import numpy as np import pandas as pd +import polars as pl from astropy import constants as const from astropy import units as u @@ -72,6 +73,80 @@ def extend_ion_list(ion_handlers): return ion_handlers +def get_transition_data(atomic_number, ion_stage, energiesabovegsinpercm, g_arr, parquet_filepath, flog): + ziparchive_outggf = zipfile.ZipFile(datafilepath / "outggf_Ln_V--VII.zip", "r") + datafilename_transitions = ( + f"outggf_Ln_V--VII/outggf_sorted_{artisatomic.elsymbols[atomic_number]}_{artisatomic.roman_numerals[ion_stage]}" + ) + + with ziparchive_outggf.open(datafilename_transitions) as datafile_transitions: + transition_wavelength_A, energy_levels_lower_1000percm, oscillator_strength = np.loadtxt( + datafile_transitions, unpack=True, delimiter="," + ) + artisatomic.log_and_print(flog, f"transitions: {len(energy_levels_lower_1000percm)}") + + energy_levels_lower_percm = energy_levels_lower_1000percm * 1000 + + # Get index of lower level of transition + lowerlevels = [ + ( + np.abs(energiesabovegsinpercm - energylevellower) # get closest energy in energy level array to lower level + ).argmin() # then get the index with argmin() + for energylevellower in energy_levels_lower_percm + ] + + # get energy of upper level of transition + energy_levels_lower_ev = energy_levels_lower_percm * hc_in_ev_cm + transitionenergyev = hc_in_ev_angstrom / transition_wavelength_A + + energy_levels_upper_ev = transitionenergyev + energy_levels_lower_ev + energy_levels_upper_percm = energy_levels_upper_ev / hc_in_ev_cm + + # Get index of upper level of transition + upperlevels = [ + ( + np.abs(energiesabovegsinpercm - energylevelupper) # get closest energy in energy level array + ).argmin() # then get the index with argmin() + for energylevelupper in energy_levels_upper_percm + ] + + # Get A value from oscillator strength + OSCSTRENGTHCONVERSION = 1.3473837e21 + c_angps = 2.99792458e18 + A_ul = np.array( + [ + osc / (g_arr[upper] / g_arr[lower] * OSCSTRENGTHCONVERSION / (c_angps / lambda_A) ** 2) + for lambda_A, osc, lower, upper in zip( + transition_wavelength_A, oscillator_strength, lowerlevels, upperlevels, strict=False + ) + ] + ) + + dict_transitions = { + "lowerlevels": lowerlevels, + "upperlevels": upperlevels, + "oscillator_strength": oscillator_strength, + "g_lower": [g_arr[lower] for lower in lowerlevels], + "A_ul": A_ul, + "energy_lower_level_ev": energy_levels_lower_ev, + "transitionenergyev": transitionenergyev, + } + df_transitions = pd.DataFrame.from_dict(dict_transitions) + + n_transitions = len(df_transitions) + # n_transitions = df_transitions.shape[0] + assert n_transitions == len( + energy_levels_lower_1000percm + ) # check number of transitions is the same as the number read in + + # Save DataFrame to a Parquet file + df_transitions.to_parquet(parquet_filepath, engine="pyarrow") # or engine="fastparquet" + print(f"Parquet file created ({parquet_filepath})") + # quit() + + return df_transitions, n_transitions + + def read_levels_and_transitions(atomic_number, ion_stage, flog): # Read first file ziparchive_outglv = zipfile.ZipFile(datafilepath / "outglv_Ln_V--VII.zip", "r") @@ -103,34 +178,22 @@ def read_levels_and_transitions(atomic_number, ion_stage, flog): for g, energyabovegsinpercm in zip(g_arr, energiesabovegsinpercm, strict=True) ] - # Read next file - ziparchive_outggf = zipfile.ZipFile(datafilepath / "outggf_Ln_V--VII.zip", "r") - datafilename_transitions = ( - f"outggf_Ln_V--VII/outggf_sorted_{artisatomic.elsymbols[atomic_number]}_{artisatomic.roman_numerals[ion_stage]}" - ) - - with ziparchive_outggf.open(datafilename_transitions) as datafile_transitions: - transition_wavelength_A, energy_levels_lower_1000percm, oscillator_strength = np.loadtxt( - datafile_transitions, unpack=True, delimiter="," + # Get next file + parquet_filename = f"outggf_{atomic_number}_{ion_stage}.parquet" + parquet_filepath = datafilepath / parquet_filename + if parquet_filepath.is_file(): + df_transitions = pd.read_parquet(parquet_filepath, engine="pyarrow") + n_transitions = len(df_transitions) + artisatomic.log_and_print(flog, f"transitions: {n_transitions} \n Read transitions from {parquet_filename}") + else: + df_transitions, n_transitions = get_transition_data( + atomic_number, ion_stage, energiesabovegsinpercm, g_arr, parquet_filepath, flog ) - artisatomic.log_and_print(flog, f"transitions: {len(energy_levels_lower_1000percm)}") - - energy_levels_lower_percm = energy_levels_lower_1000percm * 1000 - - # Get index of lower level of transition - lowerlevels = [ - ( - np.abs(energiesabovegsinpercm - energylevellower) # get closest energy in energy level array to lower level - ).argmin() # then get the index with argmin() - for energylevellower in energy_levels_lower_percm - ] + # Get ionization energy ionization_energy_in_ev_nist = artisatomic.get_nist_ionization_energies_ev()[(atomic_number, ion_stage)] - # get energy of upper level of transition - energy_levels_lower_ev = energy_levels_lower_percm * hc_in_ev_cm - transitionenergyev = hc_in_ev_angstrom / transition_wavelength_A - ionization_energy_in_ev = max(transitionenergyev) + ionization_energy_in_ev = max(df_transitions["transitionenergyev"]) artisatomic.log_and_print( flog, f"ionization energy: {ionization_energy_in_ev} eV (NIST: {ionization_energy_in_ev_nist} eV)" ) @@ -143,49 +206,62 @@ def read_levels_and_transitions(atomic_number, ion_stage, flog): flog, f"Energies do not match -- using NIST value of {ionization_energy_in_ev_nist} eV" ) - energy_levels_upper_ev = transitionenergyev + energy_levels_lower_ev - energy_levels_upper_percm = energy_levels_upper_ev / hc_in_ev_cm + df_transitions = pl.from_pandas(df_transitions) - # Get index of upper level of transition - upperlevels = [ - ( - np.abs(energiesabovegsinpercm - energylevelupper) # get closest energy in energy level array - ).argmin() # then get the index with argmin() - for energylevelupper in energy_levels_upper_percm - ] + cut_on_log_gf = False + if cut_on_log_gf: + df_transitions = df_transitions.with_columns( + (pl.col("oscillator_strength") * pl.col("g_lower")).log10().alias("log(gf)") + ) + cut_value = -3 + df_transitions = df_transitions.filter(pl.col("log(gf)") >= cut_value) + n_new_transitions = df_transitions.shape[0] + artisatomic.log_and_print( + flog, + f"Cut placed to reduce number of transitions: log(gf) > {cut_value} \n" + f"{n_transitions} transitions reduced to {n_new_transitions} transitions" + f" (removed {n_transitions-n_new_transitions})", + ) - # Get A value from oscillator strength - OSCSTRENGTHCONVERSION = 1.3473837e21 - c_angps = 2.99792458e18 - A_ul = np.array( - [ - osc / (g_arr[upper] / g_arr[lower] * OSCSTRENGTHCONVERSION / (c_angps / lambda_A) ** 2) - for lambda_A, osc, lower, upper in zip( - transition_wavelength_A, oscillator_strength, lowerlevels, upperlevels, strict=False - ) - ] - ) + cut_on_excitation_energy = False + if cut_on_excitation_energy: + cut_temperature = 150000 # K + + KB = 8.617e-5 # /// Boltzmann constant eV/K + thermal_energy = KB * cut_temperature + + n_old_transitions = df_transitions.shape[0] + df_transitions = df_transitions.filter(pl.col("energy_lower_level_ev") < thermal_energy) + n_new_transitions = df_transitions.shape[0] + artisatomic.log_and_print( + flog, + f"Cut placed to reduce number of transitions: lower level energy < kT " + f"(T={cut_temperature} K, kT={thermal_energy} eV) \n" + f"{n_old_transitions} transitions reduced to {n_new_transitions} transitions" + f" (removed {n_old_transitions - n_new_transitions})", + ) transitions = [ TransitionTuple( - lowerlevel=lower + 1, - upperlevel=upper + 1, - A=A, + lowerlevel=int(row["lowerlevels"]) + 1, + upperlevel=int(row["upperlevels"]) + 1, + A=row["A_ul"], coll_str=-1, ) - for A, lower, upper in zip(A_ul, lowerlevels, upperlevels, strict=False) + for row in df_transitions.to_dicts() ] transition_count_of_level_name = defaultdict(int) - for lower, upper in zip(lowerlevels, upperlevels, strict=True): + for row in df_transitions.iter_rows(named=True): + lower = row["lowerlevels"] + upper = row["upperlevels"] transition_count_of_level_name[energy_levels[lower + 1].levelname] += 1 transition_count_of_level_name[energy_levels[upper + 1].levelname] += 1 - assert len(transitions) == len( - energy_levels_lower_1000percm - ) # check number of transitions is the same as the number read in + if cut_on_log_gf: + assert len(transitions) == n_new_transitions + else: + assert len(transitions) == n_transitions + # check number of transitions is what we expect return ionization_energy_in_ev, energy_levels, transitions, transition_count_of_level_name - - -# read_levels_and_transitions(atomic_number=57, ion_stage=5, flog=None) diff --git a/requirements.txt b/requirements.txt index 5706c5a..d7c4b95 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,7 @@ mypy>=1.6.1 numpy>=1.26.1 pandas>=2.1.2 pidly>=0.2.7 +polars>=1.12.0 pre-commit>=3.5.0 pytest>=7.4.3 pytest-cov>=4.1.0