-
Notifications
You must be signed in to change notification settings - Fork 544
Implementing multistart version of theta_est using multiple sampling methods #3575
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 7 commits
cdd7d52
eca0ba8
2160aec
266beea
b877ada
9f1ffe5
43f1ab3
ea067c8
3b839ef
3a7aa1d
c688f2d
50c36bc
8e5f078
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -235,6 +235,9 @@ def SSE(model): | |
return expr | ||
|
||
|
||
'''Adding pseudocode for draft implementation of the estimator class, | ||
incorporating multistart. | ||
''' | ||
class Estimator(object): | ||
""" | ||
Parameter estimation class | ||
|
@@ -273,8 +276,18 @@ def __init__( | |
tee=False, | ||
diagnostic_mode=False, | ||
solver_options=None, | ||
# Add the extra arguments needed for running the multistart implement | ||
# _validate_multistart_args: | ||
# if n_restarts > 1 and theta_samplig_method is not None: | ||
n_restarts=20, | ||
multistart_sampling_method="random", | ||
): | ||
|
||
'''first theta would be provided by the user in the initialization of | ||
the Estimator class through the unknown parameter variables. Additional | ||
would need to be generated using the sampling method provided by the user. | ||
''' | ||
|
||
# check that we have a (non-empty) list of experiments | ||
assert isinstance(experiment_list, list) | ||
self.exp_list = experiment_list | ||
|
@@ -300,6 +313,10 @@ def __init__( | |
self.diagnostic_mode = diagnostic_mode | ||
self.solver_options = solver_options | ||
|
||
# add the extra multistart arguments to the Estimator class | ||
self.n_restarts = n_restarts | ||
self.multistart_sampling_method = multistart_sampling_method | ||
|
||
# TODO: delete this when the deprecated interface is removed | ||
self.pest_deprecated = None | ||
|
||
|
@@ -447,6 +464,88 @@ def TotalCost_rule(model): | |
parmest_model = utils.convert_params_to_vars(model, theta_names, fix_vars=False) | ||
|
||
return parmest_model | ||
|
||
# Make new private method, _generate_initial_theta: | ||
# This method will be used to generate the initial theta values for multistart | ||
# optimization. It will take the theta names and the initial theta values | ||
# and return a dictionary of theta names and their corresponding values. | ||
def _generate_initial_theta(self, parmest_model, seed=None): | ||
if self.n_restarts == 1: | ||
# If only one restart, return an empty list | ||
return print("No multistart optimization needed. Please use normal theta_est()") | ||
|
||
# Get the theta names and initial theta values | ||
theta_names = self._return_theta_names() | ||
sscini marked this conversation as resolved.
Show resolved
Hide resolved
|
||
initial_theta = [parmest_model.find_component(name)() for name in theta_names] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it better to use the suffix for this? The suffix value shouldn't change, but the theta value may if the model has been solved for some reason. I don't know if this is a potential issue but I think that grabbing these values from the suffixes would be more dummy-proof. |
||
|
||
# Get the lower and upper bounds for the theta values | ||
lower_bound = np.array([parmest_model.find_component(name).lb for name in theta_names]) | ||
upper_bound = np.array([parmest_model.find_component(name).ub for name in theta_names]) | ||
# Check if the lower and upper bounds are defined | ||
if np.any(np.isnan(lower_bound)) or np.any(np.isnan(upper_bound)): | ||
raise ValueError( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You probably already know this, but you will need to check all the errors are raised when expected. |
||
"The lower and upper bounds for the theta values must be defined." | ||
) | ||
|
||
# Check the length of theta_names and initial_theta, and make sure bounds are defined | ||
if len(theta_names) != len(initial_theta): | ||
raise ValueError( | ||
"The length of theta_names and initial_theta must be the same." | ||
) | ||
|
||
if self.method == "random": | ||
np.random.seed(seed) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you want to skip setting the random seed if seed=None (default)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The default is none for all the functions I use that set seed, so if it receives seed = None, it would work as expected. Would skipping it still be best practice? |
||
# Generate random theta values | ||
theta_vals_multistart = np.random.uniform(lower_bound, upper_bound, size=len(theta_names)) | ||
sscini marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# Generate theta values using Latin hypercube sampling or Sobol sampling | ||
return theta_vals_multistart | ||
|
||
elif self.method == "latin_hypercube": | ||
# Generate theta values using Latin hypercube sampling | ||
sampler = scipy.stats.qmc.LatinHypercube(d=len(theta_names), seed=seed) | ||
samples = sampler.random(n=self.n_restarts+1)[1:] # Skip the first sample | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are you skipping the first sample? Please explain in the comments. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will add a comment in code to explain as well. The first sample generated using qmc.sobol is always the origin (zero vector). I thought logic applied to all qmc methods, but no only sobol. So to get nonzero points, you need to skip first sample |
||
theta_vals_multistart = np.array([lower_bound + (upper_bound - lower_bound) * theta for theta in samples]) | ||
sscini marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
|
||
elif self.method == "sobol": | ||
sampler = scipy.stats.qmc.Sobol(d=len(theta_names), seed=seed) | ||
samples = sampler.random(n=self.n_restarts+1)[1:] | ||
sscini marked this conversation as resolved.
Show resolved
Hide resolved
|
||
theta_vals_multistart = np.array([lower_bound + (upper_bound - lower_bound) * theta for theta in samples]) | ||
sscini marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# elif self.method == "prior": | ||
sscini marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# # Still working on this | ||
# theta_vals_multistart = np.array([lower_bound + (upper_bound - lower_bound) * theta for theta in initial_theta]) | ||
|
||
else: | ||
raise ValueError( | ||
"Invalid sampling method. Choose 'random', 'latin_hypercube', 'sobol'." # or 'prior'." | ||
) | ||
|
||
# Make an output dataframe with the theta names and their corresponding values for each restart, | ||
# and nan for the output info values | ||
df_multistart = pd.DataFrame( | ||
theta_vals_multistart, columns=theta_names | ||
sscini marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
df_multistart["initial objective"] = np.nan | ||
df_multistart["final objective"] = np.nan | ||
df_multistart["solver termination"] = np.nan | ||
df_multistart["solve_time"] = np.nan | ||
|
||
# Add the initial theta values to the first row of the dataframe | ||
for i in self.n_restarts: | ||
df_multistart.iloc[i, :] = theta_vals_multistart[i, :] | ||
df_multistart.iloc[0, :] = initial_theta | ||
# # Add the initial objective value to the first row of the dataframe | ||
# df_multistart.iloc[0, -1] = self._Q_at_theta(initial_theta, initialize_parmest_model=True)[0] | ||
# # Add the final objective value to the first row of the dataframe | ||
# df_multistart.iloc[0, -2] = self._Q_at_theta(initial_theta, initialize_parmest_model=True)[0] | ||
# # Add the solver termination value to the first row of the dataframe | ||
# df_multistart.iloc[0, -3] = self._Q_at_theta(initial_theta, initialize_parmest_model=True)[2] | ||
# # Add the solve time to the first row of the dataframe | ||
# df_multistart.iloc[0, -4] = self._Q_at_theta(initial_theta, initialize_parmest_model=True)[3] | ||
|
||
return theta_vals_multistart, df_multistart | ||
|
||
def _instance_creation_callback(self, experiment_number=None, cb_data=None): | ||
model = self._create_parmest_model(experiment_number) | ||
|
@@ -921,6 +1020,116 @@ def theta_est( | |
cov_n=cov_n, | ||
) | ||
|
||
def theta_est_multistart( | ||
sscini marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self, | ||
buffer=10, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need to explain the buffer in the doc string. |
||
save_results=False, | ||
theta_vals=None, | ||
solver="ef_ipopt", | ||
return_values=[], | ||
): | ||
""" | ||
Parameter estimation using multistart optimization | ||
|
||
Parameters | ||
---------- | ||
n_restarts: int, optional | ||
Number of restarts for multistart. Default is 1. | ||
sscini marked this conversation as resolved.
Show resolved
Hide resolved
|
||
theta_sampling_method: string, optional | ||
Method used to sample theta values. Options are "random", "latin_hypercube", or "sobol". | ||
Default is "random". | ||
solver: string, optional | ||
sscini marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Currently only "ef_ipopt" is supported. Default is "ef_ipopt". | ||
return_values: list, optional | ||
List of Variable names, used to return values from the model for data reconciliation | ||
|
||
|
||
Returns | ||
------- | ||
objectiveval: float | ||
sscini marked this conversation as resolved.
Show resolved
Hide resolved
|
||
The objective function value | ||
thetavals: pd.Series | ||
Estimated values for theta | ||
variable values: pd.DataFrame | ||
Variable values for each variable name in return_values (only for solver='ef_ipopt') | ||
|
||
""" | ||
|
||
# check if we are using deprecated parmest | ||
if self.pest_deprecated is not None: | ||
return print( | ||
"Multistart is not supported in the deprecated parmest interface" | ||
) | ||
|
||
assert isinstance(self.n_restarts, int) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Replace all of these with more descriptive error messages. Remember that we need tests for each error message. |
||
assert isinstance(self.multistart_sampling_method, str) | ||
assert isinstance(solver, str) | ||
assert isinstance(return_values, list) | ||
|
||
if self.n_restarts > 1 and self.multistart_sampling_method is not None: | ||
# Generate theta values using the sampling method | ||
theta_vals, results_df = self._generate_initial_theta( | ||
sscini marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self.estimator_theta_names, self.initial_theta, self.n_restarts, self.multistart_sampling_method | ||
) | ||
|
||
# make empty list to store results | ||
for i in range(self.n_restarts): | ||
# for number of restarts, call the self._Q_opt method | ||
# with the theta values generated using the _generalize_initial_theta method | ||
|
||
# Call the _Q_opt method with the generated theta values | ||
objectiveval, thetavals[i], variable_values = self._Q_opt( | ||
ThetaVals=theta_vals, | ||
solver=solver, | ||
return_values=return_values, | ||
) | ||
|
||
# Check if the solver terminated successfully | ||
if variable_values.solver.termination_condition != pyo.TerminationCondition.optimal: | ||
# If not, set the objective value to NaN | ||
solver_termination = variable_values.solver.termination_condition | ||
solve_time = variable_values.solver.time | ||
thetavals = np.nan | ||
sscini marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
else: | ||
|
||
# If the solver terminated successfully, set the objective value | ||
init_objectiveval = objectiveval | ||
final_objectiveval = variable_values.solver.objective() | ||
solver_termination = variable_values.solver.termination_condition | ||
solve_time = variable_values.solver.time | ||
|
||
# Check if the objective value is better than the best objective value | ||
if final_objectiveval < best_objectiveval: | ||
best_objectiveval = objectiveval | ||
best_theta = thetavals | ||
|
||
# Store the results in a list or DataFrame | ||
# depending on the number of restarts | ||
results_df.iloc[i, :-4] = theta_vals | ||
results_df.iloc[i, -4] = init_objectiveval | ||
results_df.iloc[i, -3] = objectiveval | ||
results_df.iloc[i, -2] = variable_values.solver.termination_condition | ||
results_df.iloc[i, -1] = variable_values.solver.time | ||
|
||
# Add buffer to save the dataframe dynamically, if save_results is True | ||
if save_results and (i + 1) % buffer == 0: | ||
mode = 'w' if i + 1 == buffer else 'a' | ||
header = i + 1 == buffer | ||
results_df.to_csv( | ||
f"multistart_results.csv", mode=mode, header=header, index=False | ||
sscini marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
print(f"Intermediate results saved after {i + 1} iterations.") | ||
|
||
# Final save after all iterations | ||
if save_results: | ||
results_df.to_csv("multistart_results.csv", mode='a', header=False, index=False) | ||
print("Final results saved.") | ||
|
||
return results_df, best_theta, best_objectiveval | ||
|
||
|
||
|
||
def theta_est_bootstrap( | ||
self, | ||
bootstrap_samples, | ||
|
Uh oh!
There was an error while loading. Please reload this page.