Pyomo · sscini · Apr 23, 2025 · Apr 23, 2025 · Apr 23, 2025 · Apr 30, 2025
diff --git a/pyomo/contrib/parmest/parmest.py b/pyomo/contrib/parmest/parmest.py
@@ -235,6 +235,9 @@ def SSE(model):
     return expr
 
 
+'''Adding pseudocode for draft implementation of the estimator class, 
+incorporating multistart. 
+'''
 class Estimator(object):
     """
     Parameter estimation class
@@ -273,8 +276,18 @@ def __init__(
         tee=False,
         diagnostic_mode=False,
         solver_options=None,
+        # Add the extra arguments needed for running the multistart implement
+        # _validate_multistart_args:
+        # if n_restarts > 1 and theta_samplig_method is not None:
+        n_restarts=20,
+        multistart_sampling_method="random",
     ):
 
+        '''first theta would be provided by the user in the initialization of 
+        the Estimator class through the unknown parameter variables. Additional
+        would need to be generated using the sampling method provided by the user.
+        '''
+
         # check that we have a (non-empty) list of experiments
         assert isinstance(experiment_list, list)
         self.exp_list = experiment_list
@@ -300,6 +313,10 @@ def __init__(
         self.diagnostic_mode = diagnostic_mode
         self.solver_options = solver_options
 
+        # add the extra multistart arguments to the Estimator class
+        self.n_restarts = n_restarts
+        self.multistart_sampling_method = multistart_sampling_method
+
         # TODO: delete this when the deprecated interface is removed
         self.pest_deprecated = None
 
@@ -447,6 +464,88 @@ def TotalCost_rule(model):
         parmest_model = utils.convert_params_to_vars(model, theta_names, fix_vars=False)
 
         return parmest_model
+
+    # Make new private method, _generate_initial_theta:
+    # This method will be used to generate the initial theta values for multistart
+    # optimization. It will take the theta names and the initial theta values
+    # and return a dictionary of theta names and their corresponding values.
+    def _generate_initial_theta(self, parmest_model, seed=None):
+        if self.n_restarts == 1:
+            # If only one restart, return an empty list
+            return print("No multistart optimization needed. Please use normal theta_est()")
+
+        # Get the theta names and initial theta values
+        theta_names = self._return_theta_names()
+        initial_theta = [parmest_model.find_component(name)() for name in theta_names]
+
+        # Get the lower and upper bounds for the theta values
+        lower_bound = np.array([parmest_model.find_component(name).lb for name in theta_names])
+        upper_bound = np.array([parmest_model.find_component(name).ub for name in theta_names])
+        # Check if the lower and upper bounds are defined
+        if np.any(np.isnan(lower_bound)) or np.any(np.isnan(upper_bound)):
+            raise ValueError(
+                "The lower and upper bounds for the theta values must be defined."
+            )
+
+        # Check the length of theta_names and initial_theta, and make sure bounds are defined
+        if len(theta_names) != len(initial_theta):
+            raise ValueError(
+                "The length of theta_names and initial_theta must be the same."
+            )
+
+        if self.method == "random":
+            np.random.seed(seed)
+            # Generate random theta values
+            theta_vals_multistart = np.random.uniform(lower_bound, upper_bound, size=len(theta_names))
+
+            # Generate theta values using Latin hypercube sampling or Sobol sampling
+            return theta_vals_multistart
+
+        elif self.method == "latin_hypercube":
+            # Generate theta values using Latin hypercube sampling
+            sampler = scipy.stats.qmc.LatinHypercube(d=len(theta_names), seed=seed)
+            samples = sampler.random(n=self.n_restarts+1)[1:]  # Skip the first sample
+            theta_vals_multistart = np.array([lower_bound + (upper_bound - lower_bound) * theta for theta in samples])
+
+
+        elif self.method == "sobol":
+            sampler = scipy.stats.qmc.Sobol(d=len(theta_names), seed=seed)
+            samples = sampler.random(n=self.n_restarts+1)[1:]
+            theta_vals_multistart = np.array([lower_bound + (upper_bound - lower_bound) * theta for theta in samples])
+
+        # elif self.method == "prior":
+        #     # Still working on this
+        #     theta_vals_multistart = np.array([lower_bound + (upper_bound - lower_bound) * theta for theta in initial_theta])
+
+        else:
+            raise ValueError(
+                "Invalid sampling method. Choose 'random', 'latin_hypercube', 'sobol'." # or 'prior'."
+            )
+
+        # Make an output dataframe with the theta names and their corresponding values for each restart, 
+        # and nan for the output info values
+        df_multistart = pd.DataFrame(
+            theta_vals_multistart, columns=theta_names
+        )
+        df_multistart["initial objective"] = np.nan
+        df_multistart["final objective"] = np.nan
+        df_multistart["solver termination"] = np.nan
+        df_multistart["solve_time"] = np.nan
+
+        # Add the initial theta values to the first row of the dataframe
+        for i in self.n_restarts:
+            df_multistart.iloc[i, :] = theta_vals_multistart[i, :]
+        df_multistart.iloc[0, :] = initial_theta
+        # # Add the initial objective value to the first row of the dataframe
+        # df_multistart.iloc[0, -1] = self._Q_at_theta(initial_theta, initialize_parmest_model=True)[0]
+        # # Add the final objective value to the first row of the dataframe
+        # df_multistart.iloc[0, -2] = self._Q_at_theta(initial_theta, initialize_parmest_model=True)[0]
+        # # Add the solver termination value to the first row of the dataframe
+        # df_multistart.iloc[0, -3] = self._Q_at_theta(initial_theta, initialize_parmest_model=True)[2]
+        # # Add the solve time to the first row of the dataframe
+        # df_multistart.iloc[0, -4] = self._Q_at_theta(initial_theta, initialize_parmest_model=True)[3]
+
+        return theta_vals_multistart, df_multistart
 
     def _instance_creation_callback(self, experiment_number=None, cb_data=None):
         model = self._create_parmest_model(experiment_number)
@@ -921,6 +1020,116 @@ def theta_est(
             cov_n=cov_n,
         )
 
+    def theta_est_multistart(
+        self,
+        buffer=10,
+        save_results=False,
+        theta_vals=None,
+        solver="ef_ipopt",
+        return_values=[],
+    ):
+        """
+        Parameter estimation using multistart optimization
+
+        Parameters
+        ----------
+        n_restarts: int, optional
+            Number of restarts for multistart. Default is 1.
+        theta_sampling_method: string, optional
+            Method used to sample theta values. Options are "random", "latin_hypercube", or "sobol".
+            Default is "random".
+        solver: string, optional
+            Currently only "ef_ipopt" is supported. Default is "ef_ipopt".
+        return_values: list, optional
+            List of Variable names, used to return values from the model for data reconciliation
+
+
+        Returns
+        -------
+        objectiveval: float
+            The objective function value
+        thetavals: pd.Series
+            Estimated values for theta
+        variable values: pd.DataFrame
+            Variable values for each variable name in return_values (only for solver='ef_ipopt')
+
+        """
+
+        # check if we are using deprecated parmest
+        if self.pest_deprecated is not None:
+            return print(
+                "Multistart is not supported in the deprecated parmest interface"
+            )
+
+        assert isinstance(self.n_restarts, int)
+        assert isinstance(self.multistart_sampling_method, str)
+        assert isinstance(solver, str)
+        assert isinstance(return_values, list)
+
+        if self.n_restarts > 1 and self.multistart_sampling_method is not None:
+            # Generate theta values using the sampling method
+            theta_vals, results_df = self._generate_initial_theta(
+                self.estimator_theta_names, self.initial_theta, self.n_restarts, self.multistart_sampling_method
+            )
+
+            # make empty list to store results
+            for i in range(self.n_restarts):
+            # for number of restarts, call the self._Q_opt method
+            # with the theta values generated using the _generalize_initial_theta method
+
+                # Call the _Q_opt method with the generated theta values
+                objectiveval, thetavals[i], variable_values = self._Q_opt(
+                    ThetaVals=theta_vals,
+                    solver=solver,
+                    return_values=return_values,
+                )
+
+                # Check if the solver terminated successfully
+                if variable_values.solver.termination_condition != pyo.TerminationCondition.optimal:
+                    # If not, set the objective value to NaN
+                    solver_termination = variable_values.solver.termination_condition
+                    solve_time = variable_values.solver.time
+                    thetavals = np.nan
+
+                else:
+
+                    # If the solver terminated successfully, set the objective value
+                    init_objectiveval = objectiveval
+                    final_objectiveval = variable_values.solver.objective()
+                    solver_termination = variable_values.solver.termination_condition
+                    solve_time = variable_values.solver.time
+
+                # Check if the objective value is better than the best objective value  
+                if final_objectiveval < best_objectiveval:
+                    best_objectiveval = objectiveval
+                    best_theta = thetavals
+
+                # Store the results in a list or DataFrame
+                # depending on the number of restarts
+                results_df.iloc[i, :-4] = theta_vals
+                results_df.iloc[i, -4] = init_objectiveval
+                results_df.iloc[i, -3] = objectiveval
+                results_df.iloc[i, -2] = variable_values.solver.termination_condition
+                results_df.iloc[i, -1] = variable_values.solver.time
+
+                # Add buffer to save the dataframe dynamically, if save_results is True
+                if save_results and (i + 1) % buffer == 0:
+                    mode = 'w' if i + 1 == buffer else 'a'
+                    header = i + 1 == buffer
+                    results_df.to_csv(
+                        f"multistart_results.csv", mode=mode, header=header, index=False
+                    )
+                    print(f"Intermediate results saved after {i + 1} iterations.")
+
+            # Final save after all iterations
+            if save_results:
+                results_df.to_csv("multistart_results.csv", mode='a', header=False, index=False)
+                print("Final results saved.")
+
+            return results_df, best_theta, best_objectiveval
+
+
+
     def theta_est_bootstrap(
         self,
         bootstrap_samples,