diff --git a/README.rst b/README.rst index bbe43a5..c97cf36 100644 --- a/README.rst +++ b/README.rst @@ -93,6 +93,21 @@ To view the basic usage and available commands, type :: pdfbl.sequential -h +Examples +-------- + +To run a temperature sequential refinement, :: + + from pdfbl.sequential.sequential_cmi_runner import SequentialCMIRunner + runner = SequentialCMIRunner() + runner.load_inputs( + input_data_dir="path/to/inputs", + output_result_dir="path/to/outputs", + structure_path="path/to/structure.cif", + filename_order_pattern=r"(\d+)K\.gr", # regex pattern to extract the temperature from the filename + ) + runner.run(mode="batch") # or mode="stream" for running sequentially as data becomes available + Getting Started --------------- diff --git a/docs/source/api/pdfbl.sequential.rst b/docs/source/api/pdfbl.sequential.rst index 23bd884..70029d0 100644 --- a/docs/source/api/pdfbl.sequential.rst +++ b/docs/source/api/pdfbl.sequential.rst @@ -11,9 +11,6 @@ :show-inheritance: -Submodules ----------- - pdfbl.sequential.pdfadapter module ---------------------------------- diff --git a/news/add-readme-example.rst b/news/add-readme-example.rst new file mode 100644 index 0000000..dd0b829 --- /dev/null +++ b/news/add-readme-example.rst @@ -0,0 +1,23 @@ +**Added:** + +* No news added: Add example in the ``README.rst``. + +**Changed:** + +* + +**Deprecated:** + +* + +**Removed:** + +* + +**Fixed:** + +* + +**Security:** + +* diff --git a/src/pdfbl/sequential/pdfadapter.py b/src/pdfbl/sequential/pdfadapter.py index 38e7b58..f64ead3 100644 --- a/src/pdfbl/sequential/pdfadapter.py +++ b/src/pdfbl/sequential/pdfadapter.py @@ -29,16 +29,16 @@ class PDFAdapter: Methods ------- - init_profile(profile_path, qmin=None, qmax=None, xmin=None, xmax=None, dx=None) + initialize_profile(profile_path, qmin=None, qmax=None, xmin=None, xmax=None, dx=None) Load and initialize the PDF profile from the given file path with some optional parameters. - init_structures(structure_paths : list[str], run_parallel=True) + initialize_structures(structure_paths : list[str], run_parallel=True) Load and initialize the structures from the given file paths, and generate corresponding PDFGenerator objects. - init_contribution(equation_string=None) + initialize_contribution(equation_string=None) Initialize the FitContribution object combining the PDF generators and the profile. - init_recipe() + initialize_recipe() Initialize the FitRecipe object for the fitting process. set_initial_variable_values(variable_name_to_value : dict) Update parameter values from the provided dictionary. @@ -54,7 +54,7 @@ def __init__(self): self.intermediate_results = {} self.iter_count = 0 - def moniter_intermediate_results( + def monitor_intermediate_results( self, key: str, step: int = 10, queue: Queue = None ): """Store an intermediate result during the fitting process. @@ -72,7 +72,7 @@ def moniter_intermediate_results( queue = Queue() self.intermediate_results[(key, step)] = queue - def init_profile( + def initialize_profile( self, profile_path: str, qmin=None, @@ -119,7 +119,9 @@ def init_profile( profile.setCalculationRange(xmin=xmin, xmax=xmax, dx=dx) self.profile = profile - def init_structures(self, structure_paths: list[str], run_parallel=True): + def initialize_structures( + self, structure_paths: list[str], run_parallel=True + ): """Load and initialize the structures from the given file paths, and generate corresponding PDFGenerator objects. @@ -127,7 +129,7 @@ def init_structures(self, structure_paths: list[str], run_parallel=True): PDFGenerator objects, and a FitContribution object combining them. This method creates the PDFGenerator objects from the structure files. - Must be called after init_profile. + Must be called after initialize_profile. Parameters ---------- @@ -182,7 +184,7 @@ def init_structures(self, structure_paths: list[str], run_parallel=True): self.spacegroups = spacegroups self.pdfgenerators = pdfgenerators - def init_contribution(self, equation_string=None): + def initialize_contribution(self, equation_string=None): """Initialize the FitContribution object combining the PDF generators and the profile. @@ -191,7 +193,7 @@ def init_contribution(self, equation_string=None): method creates the FitContribution object combining the profile and PDF generators. - Must be called after init_profile and init_structures. + Must be called after initialize_profile and initialize_structures. Parameters ---------- @@ -230,7 +232,7 @@ def init_contribution(self, equation_string=None): self.contribution = contribution return self.contribution - def init_recipe( + def initialize_recipe( self, ): """Initialize the FitRecipe object for the fitting process. @@ -240,7 +242,7 @@ def init_recipe( method creates the FitRecipe object combining the profile, PDF generators, and contribution. - Must be called after init_contribution. + Must be called after initialize_contribution. Notes ----- @@ -311,17 +313,18 @@ def residual(self, p=[]): The residual array. """ residual = self.recipe.residual(p) - fitresults = FitResults(self.recipe) - for (key, step), values in self.intermediate_results.items(): - if (self.iter_count % step) == 0: - value = getattr(fitresults, key) - values.put(value) + if self.intermediate_results is not None: + fitresults = FitResults(self.recipe) + for (key, step), values in self.intermediate_results.items(): + if (self.iter_count % step) == 0: + value = getattr(fitresults, key) + values.put(value) self.iter_count += 1 return residual def refine_variables(self, variable_names: list[str]): """Refine the parameters specified in the list and in that - order. Must be called after init_recipe. + order. Must be called after initialize_recipe. Parameters ---------- @@ -357,7 +360,7 @@ def save_results( self, mode: Literal["str", "dict"] = "str", filename=None ): """Save the fitting results. Must be called after - refine_parameters. + refine_variables. Parameters ---------- diff --git a/src/pdfbl/sequential/sequential_cmi_runner.py b/src/pdfbl/sequential/sequential_cmi_runner.py index 338a96e..ad2742e 100644 --- a/src/pdfbl/sequential/sequential_cmi_runner.py +++ b/src/pdfbl/sequential/sequential_cmi_runner.py @@ -27,7 +27,7 @@ def __init__(self): self.adapter = PDFAdapter() self.visualization_data = {} - def validate_inputs(self): + def _validate_inputs(self): for path_name in [ "input_data_dir", "output_result_dir", @@ -62,10 +62,10 @@ def validate_inputs(self): "or the input files." ) tmp_adatper = PDFAdapter() - tmp_adatper.init_profile(str(tmp_file_path)) - tmp_adatper.init_structures([self.inputs["structure_path"]]) - tmp_adatper.init_contribution() - tmp_adatper.init_recipe() + tmp_adatper.initialize_profile(str(tmp_file_path)) + tmp_adatper.initialize_structures([self.inputs["structure_path"]]) + tmp_adatper.initialize_contribution() + tmp_adatper.initialize_recipe() allowed_variable_names = list( tmp_adatper.recipe._parameters.keys() ) @@ -133,6 +133,93 @@ def load_inputs( qmax=None, show_plot=True, ): + """Load and validate input configuration for sequential PDF + refinement. + + This method initializes the sequential CMI runner with input data, + structure information, and refinement parameters, and the plotting + configuration. + + Parameters + ---------- + input_data_dir : str + The path to the directory containing input PDF profile files. + structure_path : str + The path to the structure file (e.g., CIF format) used for + refinement. + output_result_dir : str + The path to the directory for storing refinement results. + Default is "results". + filename_order_pattern : str + The regular expression pattern to extract ordering information + from filenames. + Default is r"(\d+)K\.gr" to extract temperature values from + filenames. + refinable_variable_names : list of str + The list of variable names to refine. + Must exist in the recipe. + Default variable names are all possible variables that can + be created from the input structure and profile. + initial_variable_values : dict + The dictionary mapping variable names to their initial values. + Default is None. + xmin : float + The minimum x-value for the PDF profile. + Default is the value parsed from the input file. + xmax : float + The maximum x-value for the PDF profile. + Default is the value parsed from the input file. + dx : float + The step size for the PDF profile. + Default is the value parsed from the input file. + qmin : float + The minimum q-value for the PDF profile. + Default is the value parsed from the input file. + qmax : float + The maximum q-value for the PDF profile. + Default is the value parsed from the input file. + show_plot : bool + Whether to display plots during refinement. Default is True. + whether_plot_y : bool + Whether to plot the experimental PDF data (y). Default is False. + whether_plot_ycalc : bool + Whether to plot the calculated PDF data (ycalc). Default is False. + plot_variable_names : list of str + The list of variable names to plot during refinement. + Default is None. + plot_result_names : list of str + The list of fit result entries to plot. + Allowed values: "residual", "contributions", "restraints", "chi2", + "reduced_chi2". Default is None. + plot_intermediate_result_names : list of str + The list of intermediate result entries to plot during refinement. + Allowed values: "residual", "contributions", "restraints", "chi2", + "reduced_chi2". Default is None. + + Raises + ------ + FileNotFoundError + If the input data directory, output result directory, or structure + file does not exist. + NotADirectoryError + If input_data_dir or output_result_dir is not a directory. + ValueError + If a refinable variable name is not found in the recipe, or if a + plot result name is not valid. + + Examples + -------- + >>> runner = SequentialCMIRunner() + >>> runner.load_inputs( + ... input_data_dir="./data", + ... structure_path="./structure.cif", + ... output_result_dir="./results", + ... refinable_variable_names=["a", "all"], + ... plot_variable_names=["a"], + ... plot_result_names=["chi2"], + ... plot_intermediate_result_names=["residual"], + ... ) + """ # noqa: W605 self.inputs = { "input_data_dir": input_data_dir, "structure_path": structure_path, @@ -150,13 +237,13 @@ def load_inputs( "plot_variable_names": plot_variable_names or [], "plot_result_names": plot_result_names or [], "plot_intermediate_result_names": plot_intermediate_result_names - or {}, + or [], } self.show_plot = show_plot - self.validate_inputs() - self.init_plots() + self._validate_inputs() + self._initialize_plots() - def init_plots(self): + def _initialize_plots(self): whether_plot_y = self.inputs["whether_plot_y"] whether_plot_ycalc = self.inputs["whether_plot_ycalc"] plot_variable_names = self.inputs["plot_variable_names"] @@ -212,7 +299,7 @@ def init_plots(self): fig.suptitle(f"{names[i].capitalize()}: {var_name}") if plot_intermediate_result_names is not None: for var_name in plot_intermediate_result_names: - self.adapter.moniter_intermediate_results( + self.adapter.monitor_intermediate_results( var_name, step=10, queue=self.visualization_data["intermediate_results"][ @@ -220,7 +307,7 @@ def init_plots(self): ]["ydata"], ) - def update_plot(self): + def _update_plot(self): for key, plot_pack in self.visualization_data.items(): if key in ["ycalc", "y"]: if not plot_pack["xdata"].empty(): @@ -249,7 +336,7 @@ def update_plot(self): line.axes.relim() line.axes.autoscale_view() - def check_for_new_data(self): + def _check_for_new_data(self): input_data_dir = self.inputs["input_data_dir"] filename_order_pattern = self.inputs["filename_order_pattern"] files = [file for file in Path(input_data_dir).glob("*")] @@ -281,7 +368,20 @@ def check_for_new_data(self): def set_start_input_file( self, input_filename, input_filename_to_result_filename ): - self.check_for_new_data() + """Set the starting input file for sequential refinement and + continue the interrupted sequential refinement from that point. + + Parameters + ---------- + input_filename : str + The name of the input file to start from. This file must be in the + input data directory. + input_filename_to_result_filename : function + The function that takes an input filename and returns the + corresponding result filename. This is used to locate the last + result file for loading variable values. + """ + self._check_for_new_data() input_file_path = Path(self.inputs["input_data_dir"]) / input_filename if input_file_path not in self.input_files_known: raise ValueError( @@ -313,8 +413,8 @@ def set_start_input_file( self.last_result_variables_values = last_result_variables_values print(f"Starting from input file: {self.input_files_running[0].name}") - def run_one_cycle(self, stop_event=SimpleNamespace(is_set=lambda: False)): - self.check_for_new_data() + def _run_one_cycle(self, stop_event=SimpleNamespace(is_set=lambda: False)): + self._check_for_new_data() xmin = self.inputs["xmin"] xmax = self.inputs["xmax"] dx = self.inputs["dx"] @@ -330,7 +430,7 @@ def run_one_cycle(self, stop_event=SimpleNamespace(is_set=lambda: False)): if stop_event.is_set(): break print(f"Processing {input_file.name}...") - self.adapter.init_profile( + self.adapter.initialize_profile( str(input_file), xmin=xmin, xmax=xmax, @@ -338,9 +438,9 @@ def run_one_cycle(self, stop_event=SimpleNamespace(is_set=lambda: False)): qmin=qmin, qmax=qmax, ) - self.adapter.init_structures([structure_path]) - self.adapter.init_contribution() - self.adapter.init_recipe() + self.adapter.initialize_structures([structure_path]) + self.adapter.initialize_contribution() + self.adapter.initialize_recipe() if not hasattr(self, "last_result_variables_values"): self.last_result_variables_values = initial_variable_values self.adapter.set_initial_variable_values( @@ -385,9 +485,22 @@ def run_one_cycle(self, stop_event=SimpleNamespace(is_set=lambda: False)): self.input_files_running = [] def run(self, mode: Literal["batch", "stream"]): + """Run the sequential refinement process in either batch or + streaming mode. + + Parameters + ---------- + mode : str + The mode to run the sequential refinement. Must be either "batch" + or "stream". In "batch" mode, the toolset will run through all + available input files once and then stop. In "stream" mode, the + runner will continuously monitor the input data directory for new + files and process them as they appear, until the user decides + to stop the process. + """ if mode == "batch": - self.run_one_cycle() - self.update_plot() + self._run_one_cycle() + self._update_plot() elif mode == "stream": stop_event = threading.Event() session = PromptSession() @@ -397,7 +510,7 @@ def run(self, mode: Literal["batch", "stream"]): def stream_loop(): while not stop_event.is_set(): - self.run_one_cycle(stop_event) + self._run_one_cycle(stop_event) stop_event.wait(1) # Check for new data every 1s def input_loop(): @@ -435,7 +548,7 @@ def input_loop(): fit_thread = threading.Thread(target=stream_loop) fit_thread.start() while not stop_event.is_set(): - self.update_plot() + self._update_plot() plt.pause(0.01) time.sleep(1) fit_thread.join() diff --git a/tests/test_pdfadapter.py b/tests/test_pdfadapter.py index 2f2a6e2..9d7a8ed 100644 --- a/tests/test_pdfadapter.py +++ b/tests/test_pdfadapter.py @@ -32,12 +32,12 @@ def test_pdfadapter(): diffpy_pv_dict[pname] = parameter.value # pdfadapter fitting adapter = PDFAdapter() - adapter.init_profile( + adapter.initialize_profile( str(profile_path), xmin=1.5, xmax=50, dx=0.01, qmax=25, qmin=0.1 ) - adapter.init_structures([str(structure_path)]) - adapter.init_contribution() - adapter.init_recipe() + adapter.initialize_structures([str(structure_path)]) + adapter.initialize_contribution() + adapter.initialize_recipe() initial_pdfadapter_pv_dict = { "s0": 0.4, "qdamp": 0.04,