diff --git a/cmlm/utils/input_file.py b/cmlm/utils/input_file.py index bcb270e..695055f 100644 --- a/cmlm/utils/input_file.py +++ b/cmlm/utils/input_file.py @@ -1,86 +1,479 @@ -"""A module for interacting with (loading data from) input files.""" +"""A module for interacting with TOML format input files based on tomlkit.""" +import argparse +import os import sys +import textwrap -import toml +import tomlkit + + +def recursively_update_dict(base, new): + """ + Update items and subitems in one nested dict-like object based on another. + + For each key in new.keys(), replace base[key] with new[key], unless + new[key] also has a .keys() attribute, in which case this function + is recursively called to update the sub-dict-like. + + Parameters + ---------- + base: dict-like + Input (nested) dictionary to update + new: dict-like + (Nested) dictionary entries to update in base + """ + for key in new: + if key not in base: + base[key] = new[key] + else: + if hasattr(new[key], "keys"): + if not hasattr(base[key], "keys"): + base[key] = {} + recursively_update_dict(base[key], new[key]) + else: + base[key] = new[key] + + +def beautify_document(doc, max_inline_length=20, max_line_length=88): + """ + Move inline comments top line above item in TOML string. + + Parameters + ---------- + doc: str (TOML format) + string representation of document being updated + max_inline_length: int, default 20 + Maximum length of comment to leave as inline, but total line + length must still be less than max_line_length or comment will be moved + max_line_length: int, default 88 + Maximum line length for wrapping comment text + """ + fancy_line = "# " + "=" * (max_line_length - 4) + " #" + lines = doc.split("\n") + new_lines = [] + for line in lines: + is_category = False + if line.startswith("["): + is_category = True + if len(new_lines) != 0: + if new_lines[-1] != "": + new_lines.append("") + new_lines.append("") + new_lines.append(fancy_line) + + if "#" in line: + text, comment = line.split("#", 1) + if ( + len(comment) > max_inline_length + or len(line) > max_line_length + or is_category + ): + comment_lines = textwrap.wrap(comment, max_line_length - 2) + if len(new_lines) != 0: + if new_lines[-1] != "" and not is_category: + new_lines.append("") # newline + for cl in comment_lines: + if cl.startswith(" "): + cl = cl[1:] + new_lines.append("# " + cl) + if is_category: + new_lines.append(fancy_line) + new_lines.append(text) + new_lines.append("") + else: + new_lines.append(line) + else: + new_lines.append(line) + return "\n".join(new_lines) class TomlParmParse: """ - Query or get parameters from a TOML input file. + Wrapper for tomlkit document class. + + Enables a few new features: + - Easier access to nested parameters + - Autodocumentation of inputs + - Combining inputs from the command line and an input file + - Saving the config that was actually used + - Optionally raise errors for unused inputs - Inspired by the ParmParse class from AMReX + Inspired by the ParmParse class from AMReX, but quite different. Parameters ---------- - file_name: str - path to input file - allow_cl_override: bool, optional - if True, sys.argv[1] replaces `file_name`. Default True. + datadict: tomlkit.TOMLDocument + input TOMLDocument that has been read in + accessed_data: tomlkit.TOMLDocument, optional + shows which entries from data_dict have already been accessed + Default None. + base: TomlParmParse, optional + Parent TomlParmParse object. If None, this is a base TomlParmParse + object and output will occur during garbage collecting if requested. + Default None. + output: string, optional kwarg + Directory in which to save output. Default None (no output saved). + output_type: str, optional kwarg + Type of output to save: "clean" will save only inputs used with no comments. + "doc" will save only inputs used with comments generated based on doc info + provided when accessing variables. "original" keeps all variables, comments, + and formatting from the provided input file. Default "clean". + no_overwrite: bool, optional kwarg + Raise an error if multiple different values are set/accessed for a variable. + Default False. + error_unused: bool, optional kwarg + Raise an error for unused variables in input file. Default False. + live_update: bool, optional kwarg + Continuously update output file as code runs. Default False. """ - def __init__(self, file_name=None, allow_cl_override=True): + def __init__( + self, datadict, accessed_data=None, name="", base=None, **kwargs + ): + self.data = datadict - if file_name is None and (len(sys.argv) <= 1 or not allow_cl_override): - raise RuntimeError( - "TomlParmParse: must provide input file to initializer or on command_line" + if accessed_data is not None: + self.accessed_data = accessed_data + else: + accessed_data = tomlkit.document() + self.accessed_data = accessed_data + + self.name = name + self.base = base + self.output = kwargs.get("output", None) + if self.output is not None: + self.output_dir = os.path.split(self.output)[0] + if self.base is None: + if len(self.output_dir) > 0 and not os.path.exists(self.output_dir): + os.makedirs(self.output_dir) + self.output_type = kwargs.get("output_type", "clean") + self.no_overwrite = kwargs.get("no_overwrite", False) + self.error_unused = kwargs.get("error_unused", False) + self.live_update = kwargs.get("live_update", False) + output_types = ["clean", "doc", "original"] + if self.output_type not in output_types: + raise ValueError( + f"Output type ({self.output_type}) must be one of {output_types}" ) + self.kwargs = kwargs - if len(sys.argv) > 1 and allow_cl_override: - load_file = sys.argv[1] + @classmethod + def parse_file(cls, file_name=None, additional_args=None, **kwargs): + """ + Parse a file and/or string into a TomlParmParse object. + + Parameters + ---------- + file_name: str (path-like), optional + File to load as a tomlkit Document + additional_args: str (toml), optional + TOML format string of parameters to add to the file + kwargs: optional + Passed to TomlParmParse constructor + + Returns + ------- + tpp: TomlParmParse + A TOML ParmParser + """ + if file_name is not None: + try: + with open(file_name) as tomlfile: + data = tomlkit.load(tomlfile) + except FileNotFoundError: + raise FileNotFoundError( + f"TomlParmParse: Input file <{file_name}> not found!" + ) + name = f"<{file_name}>" + if additional_args is not None: + addl_data = tomlkit.loads(additional_args) + recursively_update_dict(data, addl_data) + name = f"<{file_name}+cl_args>" + + elif additional_args is not None: + data = tomlkit.loads(additional_args) + name = "" else: - load_file = file_name - - try: - with open(load_file) as tomlfile: - self.data = toml.load(tomlfile) - except FileNotFoundError: - raise FileNotFoundError( - f"TomlParmParse: Input file < {load_file} > not found!" + raise RuntimeError( + "TomlParmParse: must provide input file or arguments on command line" ) - def query(self, prefix, var, default): + return cls(data, name=name, base=None, **kwargs) + + @classmethod + def parse_args(cls, description=None, infile=None): """ - Look up a value from the input file, if not present use default. + Parse command line arguments specifying file and arguments to create a TPP. Parameters ---------- - prefix: str - section of TOML file - var: str - entry in TOML file - default: any type - default to use if entry not found + description: str, optional + Short description of program for which config is being loaded + infile: str, optional + Default TOML input file to use Returns ------- - value: any type - value from TOML file or default if not present + tpp: TomlParmParse + A TOML ParmParser """ - if prefix in self.data.keys(): - if var in self.data[prefix].keys(): - return self.data[prefix][var] - return default + if description is None: + description = "A tool using the TomlParmParse class to parse inputs" + description += ( + " --- This program uses the TomlParmParse utility to manage " + "input/config files and command line arguments. Arguments can " + "be auto-documented by enabling output of the 'doc' type." + ) + if infile is None: + infile = sys.argv[0].replace(".py", ".toml") + parser = argparse.ArgumentParser(description=description) + parser.add_argument( + "infile", nargs="?", default=infile, help="Input file to parse inputs from" + ) + parser.add_argument( + "-o", + "--output", + default=None, + help="File in which to write used inputs/outputs", + ) + parser.add_argument( + "-t", + "--output_type", + default="doc", + help="Type of output file: *doc*: include comments documenting used inputs " + "*clean*: no comments, only used inputs *original*: input file in " + "original formatting", + ) + parser.add_argument( + "-a", "--args", default=None, help="Override arguments, as a toml string" + ) + parser.add_argument( + "-w", + "--no_overwrite", + action="store_true", + help="Disallow overwriting entries once they have been used or set", + ) + parser.add_argument( + "-e", + "--error_unused", + action="store_true", + help="Raise error if there are unused inputs", + ) + parser.add_argument( + "-l", + "--live_update", + action="store_true", + help="Update output file continuously as code runs", + ) + args = parser.parse_args() + if args.args is not None: + args.args = args.args.replace("\\n", "\n").replace(";", "\n") + return cls.parse_file( + args.infile, + additional_args=args.args, + output=args.output, + output_type=args.output_type, + no_overwrite=args.no_overwrite, + error_unused=args.error_unused, + live_update=args.live_update, + ) + + def __repr__(self): + """Provide string representation of data (as nested dict).""" + return self.data.__repr__() - def get(self, prefix, var): + def __getitem__(self, item_name): + """Return a sub-TomlParmParse object, or value if requesting a leaf.""" + # Use periods to separate hierarchy levels in item_name. + # if none, we're at the last requested level + if item_name.count(".") == 0: + if item_name in self.data: + item = self.data.item(item_name) + if hasattr(item, "keys") and hasattr(item, "values"): + # dict-like: return a sub-tomlparmparse object + if item_name not in self.accessed_data: + self.accessed_data[item_name] = tomlkit.document() + return TomlParmParse( + self.data[item_name], + self.accessed_data[item_name], + f"{self.name}.{item_name}", + base=self, + **self.kwargs, + ) + else: + # we're at a leaf with a value to return + if item_name not in self.accessed_data: + self.accessed_data[item_name] = item + if self.output_type == "clean": + item.trivia.comment = "" + return item + else: + return None + else: + # split keyword on the first period + split_loc = item_name.index(".") + prefix = item_name[:split_loc] + suffix = item_name[split_loc + 1 :] + subtpp = self[prefix] + if subtpp is not None: + return subtpp[suffix] + else: + return subtpp + + def __setitem__(self, item_name, value): + """Set the value of a leaf in the table hierarchy.""" + # Use periods to separate hierarchy levels in item_name + # if none, we're at the last requested level + if item_name.count(".") == 0: + if (item_name in self.accessed_data) and self.no_overwrite: + if value != self.accessed_data[item_name]: + raise ValueError( + "cannot set a new value for an already accessed " + f"item <{item_name}> in TomlParmParse object {self.name}" + ) + else: + self.accessed_data[item_name] = value + self.data[item_name] = self.accessed_data.item(item_name) + if self.live_update and self.output is not None: + self.dump() + else: + # split keyword on the first period + split_loc = item_name.index(".") + prefix = item_name[:split_loc] + suffix = item_name[split_loc + 1 :] + if prefix not in self.data: + self.data[prefix] = tomlkit.document() + self[prefix][suffix] = value + + def get(self, item_name, default=None, doc=None): """ - Look up a value from the input file, if not present raise error. + Retrieve a leaf or subtable form the TomlParmParse table. + + Can optionally specify a default, otherwise an error will be raised if + not found. Can also optionally specify a doc string to add to the + TOML file for the input being retrieved. Parameters ---------- - prefix: str - section of TOML file - var: str - entry in TOML file + item_name: str + key to lookup in table. May parse through multiple levels in hierarchy + by separating different levels with periods, e.g. lev1name.level2name.lev3name + default: optional + value to use if item_name is not found in table + doc: optional + string to add as a comment in the TOML file Returns ------- - value: any type - value from TOML file + retval: TomlParmParse or any + If item_name is not a leaf, a subtable of the TomlParmParse object, otherwise + the leaf (which may be a string, int, list, etc.) + """ + retval = self[item_name] + if retval is None: + if default is not None: + retval = default + else: + raise RuntimeError( + f"Required item <{item_name}> (doc: {doc}) not found in" + f"TomlParmParse object {self.name}." + ) + self[item_name] = retval + + if doc is not None and self.output_type == "doc": + if default is not None: + doc += f" | optional, default: {default}" + self[item_name].comment(doc) + + return retval + + def set(self, item_name, value, doc=None): """ - if var in self.data[prefix].keys(): - return self.data[prefix][var] + Set value for a leaf in the table. + + Parameters + ---------- + item_name: str + key for the item being added + value: int, str, list, etc. + value for the item being added (must be TOML format compatible type) + doc: str + comment to add for item in TOML file + """ + self[item_name] = value + if doc is not None and self.output_type != "clean": + self[item_name].comment(doc) + + def doc(self, doc): + """ + Add a high level comment to the TOML document. + + Parameters + ---------- + doc: optional + string to add as a comment in the TOML file + + Returns + ------- + self: TomlParmParse + The updated TomlParmParse object + """ + if self.output_type == "doc": + self.data.comment(doc) + self.accessed_data.comment(doc) + return self + + def dump(self, outfile=None): + """ + Output TomlParmParse data to file. + + Parameters + ---------- + outfile: str, optional + file name to save to + """ + if self.base is None: + if outfile is not None: + write_to = outfile + elif self.output is not None: + write_to = self.output + else: + raise RuntimeError( + "Cannot dump with no outfile unless initialized with one" + ) + + with open(write_to, "w") as tomlfile: + if self.output_type == "original": + tomlfile.write(tomlkit.dumps(self.data)) + elif self.output_type == "doc": + tomlfile.write(beautify_document(tomlkit.dumps(self.accessed_data))) + else: + tomlfile.write(tomlkit.dumps(self.accessed_data)) else: - raise RuntimeError( - f"TomlParmParse: Requested Variable: {prefix}.{var} not found in input file" - ) + self.base.dump(outfile) + + def check_unused_inputs(self): + """Return any keys in table that have not been used.""" + unused = [] + for key in self.data: + if key not in self.accessed_data: + unused.append(key) + else: + item = self[key] + if hasattr(item, "check_unused_inputs"): + unused_subkeys = item.check_unused_inputs() + unused += [f"{key}.{subkey}" for subkey in unused_subkeys] + return unused + + def __del__(self): + """When destroying, optionally dump output to file, raise error for unused.""" + if self.base is None: + if self.output is not None: + self.dump() + unused = self.check_unused_inputs() + if len(unused) > 0 and self.error_unused: + print(f"Unused TomlParmParse inputs for {self.name}:") + for key in unused: + print(f" {key}") + raise RuntimeError("Aborting due to unused inputs.") diff --git a/pyproject.toml b/pyproject.toml index 92eb440..b52b285 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ dependencies = [ "pandas >= 2.0.3", "scikit-learn >= 1.7.0", "h5py >= 3.14.0", - "toml >= 0.10.0" + "tomlkit >= 0.13.0" ] [project.optional-dependencies] diff --git a/run_scripts/ctable/create_dummy_table_nd.py b/run_scripts/ctable/create_dummy_table_nd.py index 4dad946..6d4cc35 100644 --- a/run_scripts/ctable/create_dummy_table_nd.py +++ b/run_scripts/ctable/create_dummy_table_nd.py @@ -28,10 +28,10 @@ from cmlm.utils import TomlParmParse # Load inputs - tpp = TomlParmParse("create_dummy_table_nd.toml", allow_cl_override=True) - ndim = tpp.get("table", "ndim") - ngrid = tpp.get("table", "ngrid") - outfi_pref = tpp.get("table", "outfile_prefix") + tpp = TomlParmParse.parse_args("Create dummy chemtables with fake data.") + ndim = tpp["table"].get("ndim", doc="Number of table dimensions to create") + ngrid = tpp["table"].get("ngrid", doc="Grid points for each table dimension") + outfi_pref = tpp["table"].get("outfile_prefix", doc="Filename prefix for output") # Create empty tables dimnames = ["dim" + str(idim) for idim in range(ndim)] diff --git a/run_scripts/ctable/create_dummy_table_nd.toml b/run_scripts/ctable/create_dummy_table_nd.toml index e5d5aa7..2031060 100644 --- a/run_scripts/ctable/create_dummy_table_nd.toml +++ b/run_scripts/ctable/create_dummy_table_nd.toml @@ -1,4 +1,12 @@ +# ==================================================================================== # [table] -ndim = 1 -ngrid = 20 -outfile_prefix = "peletable" + +# Number of table dimensions to create +ndim = 1 + +# Grid points for each table dimension +ngrid = 20 + +# Filename prefix for output +outfile_prefix = "peletable" + diff --git a/run_scripts/ctable/create_spray_table_nd.py b/run_scripts/ctable/create_spray_table_nd.py index 31dd1d4..586b50a 100644 --- a/run_scripts/ctable/create_spray_table_nd.py +++ b/run_scripts/ctable/create_spray_table_nd.py @@ -22,6 +22,10 @@ python create_spray_table_nd.py +To see additional runtime options you can run:: + + python create_spray_table_nd.py -h + Input File ---------- @@ -40,17 +44,23 @@ from cmlm.utils import TomlParmParse # Load inputs - tpp = TomlParmParse("create_spray_table_nd.toml", allow_cl_override=True) + pp = TomlParmParse.parse_args( + description="Create chemtable for nonreacting" + " multicomponent spray evaporation problems." + ) # Create mixing streams - mechanism = tpp.get("phys", "mechanism") - T_ox = tpp.get("phys", "T_ox") - X_ox = tpp.get("phys", "X_ox") - pressure = tpp.get("phys", "pressure") - liq_temp_fuel = tpp.get("phys", "liq_temp_fuel") - X_fuel = tpp.get("phys", "X_fuel") + + ppp = pp["phys"].doc("Physical conditions/models/parameters") + mechanism = ppp.get("mechanism", doc="path to mechanism file (yaml)") + T_ox = ppp.get("T_ox", doc="ambient temp, K") + X_ox = ppp.get("X_ox", doc="Cantera composition string") + pressure = ppp.get("pressure", doc="ambient pressure, Pa") + liq_temp_fuel = ppp.get("liq_temp_fuel", doc="liquid temps for each fuel, K") + X_fuel = ppp.get("X_fuel", doc="Cantera composition string") species_list = ["O2"] + [spec.split(":")[0] for spec in X_fuel] - delta_h_vap = tpp.get("phys", "delta_h_vap") + delta_h_vap = ppp.get("delta_h_vap", doc="Latent heats for each fuel, J/kg") + ox = ct.Solution(mechanism) ox.TPX = T_ox, pressure, X_ox oxstream = ct.Quantity(ox, constant="HP") @@ -63,7 +73,7 @@ # note fuel stream does not yet account for enthalpy of vaporization # here we do a test just to see what the temperature will be fu_vap = ct.Solution(mechanism) - T_min = tpp.get("phys", "T_min") + T_min = ppp.get("T_min", doc="min temperature allowed in gas phase, K") try: fu_vap.HPY = fu.enthalpy_mass - delta_h_vap[ii], fu.P, fu.Y print( @@ -82,10 +92,18 @@ streams = [oxstream] + fuelstreams # Create table + ppt = pp["table"].doc("Table setup inputs") grids = [] - for grid in tpp.get("table", "grid")[:Nfuel]: + grid_sizes = ppt.get( + "grid", doc="number of grid points for each table dimension (length Nfuels)" + ) + for grid in grid_sizes[:Nfuel]: grids.append(np.linspace(0.0, 1.0, grid)) - use_fmix = tpp.get("table", "use_fmix") + use_fmix = ppt.get( + "use_fmix", + default=False, + doc="Tabulate in terms of fuel premixing fractions rather than mixture fractions", + ) if use_fmix: dimnames = ["ZMIX"] for ii in range(Nfuel - 1): @@ -161,7 +179,9 @@ # Meta data generation species_idx_list = [mixture.species_index(sp) for sp in species_list] - mdatfi = tpp.get("table", "metadata_file") + mdatfi = ppt.get( + "metadata_file", doc="output file path/name for the table metadata" + ) with open(mdatfi, "w") as fi: fi.write("manifold.has_species_mw = true\n") for i in range(len(species_list)): @@ -180,5 +200,5 @@ ctable_tools.print_chemtable(df) print(df) - ofi = tpp.get("table", "filename") + ofi = ppt.get("filename", doc="output file path/name for the table") ctable_tools.write_chemtable_binary(ofi, df, "mixing-only") diff --git a/run_scripts/ctable/create_spray_table_nd.toml b/run_scripts/ctable/create_spray_table_nd.toml index 5574bdb..522b40e 100644 --- a/run_scripts/ctable/create_spray_table_nd.toml +++ b/run_scripts/ctable/create_spray_table_nd.toml @@ -1,15 +1,47 @@ +# ==================================================================================== # +# Physical conditions/models/parameters +# ==================================================================================== # [phys] -mechanism = "/path/to/PelePhysics/Mechanisms/liquid_fuels_nonreacting/mechanism.yaml" -pressure = 101325 # Pa + +# path to mechanism file (yaml) +mechanism = '/path/to/PelePhysics/Mechanisms/liquid_fuels_nonreacting/mechanism.yaml' + +T_ox = 400 # ambient temp, K + +# Cantera composition string X_ox = "O2:0.21, N2:0.79" -T_ox = 400 # K -X_fuel = ["NC7H16:1.0"] # should all be pure components -liq_temp_fuel = [300.0, 300.0, 300.0, 300.0] # 1 per fuel, K -delta_h_vap = [300000, 330000, 340000, 300000] # 1 per fuel, J/kg + +# ambient pressure, Pa +pressure = 101325 + +# liquid temps for each fuel, K +liq_temp_fuel = [300.0] + +# Cantera composition string +X_fuel = ["NC7H16:1.0"] + +# Latent heats for each fuel, J/kg +delta_h_vap = [300000] + +# min temperature allowed in gas phase, K T_min = 100.0 + + +# ==================================================================================== # +# Table setup inputs +# ==================================================================================== # [table] + +# number of grid points for each table dimension (length Nfuels) +grid = [20] + +# Tabulate in terms of fuel premixing fractions rather than mixture fractions | +# optional, default: False use_fmix = false -grid = [20, 21, 22] # must have N_fuels dimensions + +# output file path/name for the table metadata +metadata_file = "manifold_metadata_heptane.text" + +# output file path/name for the table filename = "spray_heptane.ctb" -metadata_file = "manifold_metadata_3d.text" \ No newline at end of file