From 81032624b04400eebd8936b2285747b104e11b96 Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 7 Jan 2025 17:36:01 -0700 Subject: [PATCH 01/51] Add source modifications to the trainer to be hable to handle stochastic reporters. --- framework/src/bcs/FunctionDirichletBC.C | 1 + framework/src/bcs/NeumannBC.C | 1 + .../controls/LibtorchNeuralNetControl.C | 7 +- .../FunctionValuePostprocessor.C | 3 + framework/src/postprocessors/PointValue.C | 1 + .../surrogates/LibtorchDRLControlTrainer.h | 31 ++--- .../libtorch/controls/LibtorchDRLControl.C | 11 +- .../trainers/LibtorchDRLControlTrainer.C | 109 +++++++++++------- 8 files changed, 105 insertions(+), 59 deletions(-) diff --git a/framework/src/bcs/FunctionDirichletBC.C b/framework/src/bcs/FunctionDirichletBC.C index 7ea9617fd960..f6f6fec8c1dc 100644 --- a/framework/src/bcs/FunctionDirichletBC.C +++ b/framework/src/bcs/FunctionDirichletBC.C @@ -31,5 +31,6 @@ FunctionDirichletBC::FunctionDirichletBC(const InputParameters & parameters) Real FunctionDirichletBC::computeQpValue() { + std::cout << "Environment temperature: " << _t << " " << _func.value(_t, *_current_node) << std::endl; return _func.value(_t, *_current_node); } diff --git a/framework/src/bcs/NeumannBC.C b/framework/src/bcs/NeumannBC.C index c76597c0de20..c2313a1fe142 100644 --- a/framework/src/bcs/NeumannBC.C +++ b/framework/src/bcs/NeumannBC.C @@ -38,6 +38,7 @@ template GenericReal NeumannBCTempl::computeQpResidual() { + std::cout << "The control value that I see: " << _value << std::endl; return -_test[_i][_qp] * _value; } diff --git a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C index 5502ce168647..709bc957f5bf 100644 --- a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C +++ b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C @@ -172,8 +172,11 @@ LibtorchNeuralNetControl::execute() // We add the curent solution to the old solutions and move everything in there one step // backward - std::rotate(_old_responses.rbegin(), _old_responses.rbegin() + 1, _old_responses.rend()); - _old_responses[0] = _current_response; + if (_old_responses.size()) + { + std::rotate(_old_responses.rbegin(), _old_responses.rbegin() + 1, _old_responses.rend()); + _old_responses[0] = _current_response; + } } } diff --git a/framework/src/postprocessors/FunctionValuePostprocessor.C b/framework/src/postprocessors/FunctionValuePostprocessor.C index 71159ce07ca2..0fc7999d3ae1 100644 --- a/framework/src/postprocessors/FunctionValuePostprocessor.C +++ b/framework/src/postprocessors/FunctionValuePostprocessor.C @@ -83,5 +83,8 @@ FunctionValuePostprocessor::getValue() const p(j) = *_point[j]; if (_time_pp) return _scale_factor * _function.value(*_time_pp, p); + + std::cout << name() << " " << _t << " " << _scale_factor * _function.value(_t, p) << std::endl; + return _scale_factor * _function.value(_t, p); } diff --git a/framework/src/postprocessors/PointValue.C b/framework/src/postprocessors/PointValue.C index 8abe943b9f4c..48175d7eb16a 100644 --- a/framework/src/postprocessors/PointValue.C +++ b/framework/src/postprocessors/PointValue.C @@ -72,5 +72,6 @@ PointValue::execute() Real PointValue::getValue() const { + std::cout << "Point value " << _value << std::endl; return _value; } diff --git a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h index 93bb35e8c1db..b0e7d39532c9 100644 --- a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h +++ b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h @@ -77,7 +77,8 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase torch::Tensor evaluateAction(torch::Tensor & input, torch::Tensor & output); /// Compute the return value by discounting the rewards and summing them - void computeRewardToGo(); + void computeRewardToGo(std::vector & data, + const std::vector> * const reporter_link); /// Reset data after updating the neural network void resetData(); @@ -86,7 +87,8 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase const std::vector _response_names; /// Pointers to the current values of the responses - std::vector *> _response_value_pointers; + /// We can have multiple responses, multiple samples, multiple timesteps + std::vector> *> _response_value_pointers; /// Shifting constants for the responses const std::vector _response_shift_factors; @@ -98,19 +100,22 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase const std::vector _control_names; /// Pointers to the current values of the control signals - std::vector *> _control_value_pointers; + /// We can have multiple control signals, multiple samples, multiple timesteps + std::vector> *> _control_value_pointers; /// Log probability reporter names const std::vector _log_probability_names; /// Pointers to the current values of the control log probabilities - std::vector *> _log_probability_value_pointers; + /// We can have multiple control signals, multiple samples, multiple timesteps + std::vector> *> _log_probability_value_pointers; /// Reward reporter name const ReporterName _reward_name; /// Pointer to the current values of the reward - const std::vector * _reward_value_pointer; + /// We can have multiple samples, multiple timesteps + const std::vector> * _reward_value_pointer; /// Number of timesteps to fetch from the reporters to be the input of then eural nets const unsigned int _input_timesteps; @@ -205,17 +210,17 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase * @param reporter_names The names of the reporters which need to be extracted * @param num_timesteps The number of timesteps we want to use for training */ - void getInputDataFromReporter(std::vector> & data, - const std::vector *> & reporter_links, - const unsigned int num_timesteps); + void getResponseDataFromReporter(std::vector> & data, + const std::vector> *> & reporter_links, + const unsigned int num_timesteps); /** - * Extract the output (actions, logarithmic probabilities) values from the postprocessors + * Extract the signal (actions, logarithmic probabilities) values from the postprocessors * of the controlled system. This assumes that they are stored in an AccumulateReporter * @param data The data where we would like to store the output values * @param reporter_names The names of the reporters which need to be extracted */ - void getOutputDataFromReporter(std::vector> & data, - const std::vector *> & reporter_links); + void getSignalDataFromReporter(std::vector> & data, + const std::vector> *> & reporter_links); /** * Extract the reward values from the postprocessors of the controlled system @@ -224,11 +229,11 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase * @param reporter_names The name of the reporter which need to be extracted */ void getRewardDataFromReporter(std::vector & data, - const std::vector * const reporter_link); + const std::vector> * const reporter_link); /// Getting reporter pointers with given names void getReporterPointers(const std::vector & reporter_names, - std::vector *> & pointer_storage); + std::vector> *> & pointer_storage); /// Counter for number of transient simulations that have been run before updating the controller unsigned int _update_counter; diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C index 6d9c313b6a12..240f455218f8 100644 --- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C +++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C @@ -86,8 +86,12 @@ LibtorchDRLControl::execute() log_probability.data_ptr() + log_probability.size(1)}; + std::cout << "Setting control signal to: " << Moose::stringify(_current_control_signals) << std::endl; + std::cout << "Setting log probability to: " << Moose::stringify(_current_control_signal_log_probabilities) << std::endl; + for (unsigned int control_i = 0; control_i < n_controls; ++control_i) { + // We scale the controllable value for physically meaningful control action setControllableValueByName(_control_names[control_i], _current_control_signals[control_i] * @@ -96,8 +100,11 @@ LibtorchDRLControl::execute() // We add the curent solution to the old solutions and move everything in there one step // backward - std::rotate(_old_responses.rbegin(), _old_responses.rbegin() + 1, _old_responses.rend()); - _old_responses[0] = _current_response; + if (_old_responses.size()) + { + std::rotate(_old_responses.rbegin(), _old_responses.rbegin() + 1, _old_responses.rend()); + _old_responses[0] = _current_response; + } } } diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index c7461db9baa9..cc48a9140917 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -137,7 +137,7 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _control_names(getParam>("control")), _log_probability_names(getParam>("log_probability")), _reward_name(getParam("reward")), - _reward_value_pointer(&getReporterValueByName>(_reward_name)), + _reward_value_pointer(&getReporterValueByName>>(_reward_name)), _input_timesteps(getParam("input_timesteps")), _num_inputs(_input_timesteps * _response_names.size()), _num_outputs(_control_names.size()), @@ -242,13 +242,13 @@ void LibtorchDRLControlTrainer::execute() { // Extract data from the reporters - getInputDataFromReporter(_input_data, _response_value_pointers, _input_timesteps); - getOutputDataFromReporter(_output_data, _control_value_pointers); - getOutputDataFromReporter(_log_probability_data, _log_probability_value_pointers); + getResponseDataFromReporter(_input_data, _response_value_pointers, _input_timesteps); + getSignalDataFromReporter(_output_data, _control_value_pointers); + getSignalDataFromReporter(_log_probability_data, _log_probability_value_pointers); getRewardDataFromReporter(_reward_data, _reward_value_pointer); // Calculate return from the reward (discounting the reward) - computeRewardToGo(); + computeRewardToGo(_return_data, _reward_value_pointer); _update_counter--; @@ -285,27 +285,38 @@ LibtorchDRLControlTrainer::computeAverageEpisodeReward() } void -LibtorchDRLControlTrainer::computeRewardToGo() +LibtorchDRLControlTrainer::computeRewardToGo(std::vector & data, + const std::vector> * const reporter_link) { // Get reward data from one simulation std::vector reward_data_per_sim; std::vector return_data_per_sim; - getRewardDataFromReporter(reward_data_per_sim, _reward_value_pointer); + getRewardDataFromReporter(reward_data_per_sim, reporter_link); // Discount the reward to get the return value, we need this to be able to anticipate - // rewards based on the current behavior. - Real discounted_reward(0.0); - for (int i = reward_data_per_sim.size() - 1; i >= 0; --i) + // rewards based on the current behavior. We go backwards in samples and backwards in + // accumulation. + unsigned int reward_i = reward_data_per_sim.size(); + for (unsigned int sample_i = reporter_link->size() - 1; sample_i >= 0; --sample_i) { - discounted_reward = reward_data_per_sim[i] + discounted_reward * _decay_factor; + Real discounted_reward(0.0); + const auto history_size = (*reporter_link)[sample_i].size() - _shift_outputs; - // We are inserting to the front of the vector and push the rest back, this will - // ensure that the first element of the vector is the discounter reward for the whole transient - return_data_per_sim.insert(return_data_per_sim.begin(), discounted_reward); + for (int i = 0; i < history_size; ++i) + { + discounted_reward = reward_data_per_sim[reward_i - 1 - i] + discounted_reward * _decay_factor; + + // We are inserting to the front of the vector and push the rest back, this will + // ensure that the first element of the vector is the discounter reward for the whole transient + return_data_per_sim.insert(return_data_per_sim.begin(), discounted_reward); + } + + // Update the global index + reward_i -= history_size; } // Save and accumulate the return values - _return_data.insert(_return_data.end(), return_data_per_sim.begin(), return_data_per_sim.end()); + data.insert(_return_data.end(), return_data_per_sim.begin(), return_data_per_sim.end()); } void @@ -423,66 +434,80 @@ LibtorchDRLControlTrainer::resetData() } void -LibtorchDRLControlTrainer::getInputDataFromReporter( +LibtorchDRLControlTrainer::getResponseDataFromReporter( std::vector> & data, - const std::vector *> & reporter_links, + const std::vector> *> & reporter_links, const unsigned int num_timesteps) { + // We have multiple reporters, each has a time series for each sample for (const auto & rep_i : index_range(reporter_links)) { - std::vector reporter_data = *reporter_links[rep_i]; + // Fetch the vector of time series for a given reporter + const std::vector> & reporter_data = *reporter_links[rep_i]; - // We shift and scale the inputs to get better training efficiency - std::transform( - reporter_data.begin(), - reporter_data.end(), - reporter_data.begin(), - [this, &rep_i](Real value) -> Real - { return (value - _response_shift_factors[rep_i]) * _response_scaling_factors[rep_i]; }); - - // Fill the corresponding containers + // We might consider using older time steps too which requires adding new + // rows and populating them with staggered data for (const auto & start_step : make_range(num_timesteps)) { unsigned int row = reporter_links.size() * start_step + rep_i; - for (unsigned int fill_i = 1; fill_i < num_timesteps - start_step; ++fill_i) - data[row].push_back(reporter_data[0]); - data[row].insert(data[row].end(), - reporter_data.begin(), - reporter_data.begin() + start_step + reporter_data.size() - + // Made it to the inner loop which is just the different samples + for (const auto sample_i : index_range(reporter_data)) + { + for (unsigned int fill_i = 1; fill_i < num_timesteps - start_step; ++fill_i) + data[row].push_back(reporter_data[sample_i][0]); + + data[row].insert(data[row].end(), + reporter_data[sample_i].begin(), + reporter_data[sample_i].begin() + start_step + reporter_data[sample_i].size() - (num_timesteps - 1) - _shift_outputs); + } + } + + // We shift and scale the inputs to get better training efficiency + for (const auto & start_step : make_range(num_timesteps)) + { + unsigned int row = reporter_links.size() * start_step + rep_i; + std::transform( + data[row].begin(), + data[row].end(), + data[row].begin(), + [this, &rep_i](Real value) -> Real + { return (value - _response_shift_factors[rep_i]) * _response_scaling_factors[rep_i]; }); } } } void -LibtorchDRLControlTrainer::getOutputDataFromReporter( +LibtorchDRLControlTrainer::getSignalDataFromReporter( std::vector> & data, - const std::vector *> & reporter_links) + const std::vector> *> & reporter_links) { for (const auto & rep_i : index_range(reporter_links)) - // Fill the corresponding containers - data[rep_i].insert(data[rep_i].end(), - reporter_links[rep_i]->begin() + _shift_outputs, - reporter_links[rep_i]->end()); + for (const auto sample_i : index_range(*reporter_links[rep_i])) + // Fill the corresponding containers + data[rep_i].insert(data[rep_i].end(), + (*reporter_links[rep_i])[sample_i].begin() + _shift_outputs, + (*reporter_links[rep_i])[sample_i].end()); } void LibtorchDRLControlTrainer::getRewardDataFromReporter(std::vector & data, - const std::vector * const reporter_link) + const std::vector> * const reporter_link) { // Fill the corresponding container - data.insert(data.end(), reporter_link->begin() + _shift_outputs, reporter_link->end()); + for (const auto sample_i : index_range(*reporter_link)) + data.insert(data.end(), (*reporter_link)[sample_i].begin() + _shift_outputs, (*reporter_link)[sample_i].end()); } void LibtorchDRLControlTrainer::getReporterPointers( const std::vector & reporter_names, - std::vector *> & pointer_storage) + std::vector> *> & pointer_storage) { pointer_storage.clear(); for (const auto & name : reporter_names) - pointer_storage.push_back(&getReporterValueByName>(name)); + pointer_storage.push_back(&getReporterValueByName>>(name)); } #endif From 5b720676ff2caf18d94a18bb73c6ffdeb498e937 Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 7 Jan 2025 17:36:25 -0700 Subject: [PATCH 02/51] Move examples to stochastic reporters. --- .../libtorch_drl_control_sub.i | 36 +++++------ .../libtorch_drl_control_trainer.i | 61 +++++++++---------- 2 files changed, 48 insertions(+), 49 deletions(-) diff --git a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i index 2dad8e5f504f..412feab5fd0f 100644 --- a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i +++ b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i @@ -10,8 +10,8 @@ air_effective_k = 0.5 # W/(m K) xmax = 7.0 ymin = 0.0 ymax = 5.0 - nx = 35 - ny = 25 + nx = 10 + ny = 10 [] [] @@ -97,7 +97,7 @@ air_effective_k = 0.5 # W/(m K) type = FunctionValuePostprocessor function = reward_function execute_on = 'INITIAL TIMESTEP_END' - indirect_dependencies = 'center_temp_tend env_temp' + indirect_dependencies = 'center_temp_tend' [] [top_flux] type = LibtorchControlValuePostprocessor @@ -112,7 +112,7 @@ air_effective_k = 0.5 # W/(m K) [Reporters] [T_reporter] type = AccumulateReporter - reporters = 'center_temp_tend/value env_temp/value reward/value top_flux/value log_prob_top_flux/value' + reporters = 'center_temp_tend/value reward/value top_flux/value log_prob_top_flux/value' [] [] @@ -121,14 +121,14 @@ air_effective_k = 0.5 # W/(m K) [src_control] type = LibtorchDRLControl parameters = "BCs/top_flux/value" - responses = 'center_temp_tend env_temp' + responses = 'center_temp_tend' # keep consistent with LibtorchDRLControlTrainer - input_timesteps = 2 - response_scaling_factors = '0.03 0.03' - response_shift_factors = '290 290' + input_timesteps = 1 + response_scaling_factors = '0.03' + response_shift_factors = '290' action_standard_deviations = '0.02' - action_scaling_factors = 200 + action_scaling_factors = 20 execute_on = 'TIMESTEP_BEGIN' [] @@ -140,14 +140,14 @@ air_effective_k = 0.5 # W/(m K) activation_function = 'relu' parameters = "BCs/top_flux/value" - responses = 'center_temp_tend env_temp' + responses = 'center_temp_tend' # keep consistent with LibtorchDRLControlTrainer - input_timesteps = 2 - response_scaling_factors = '0.03 0.03' - response_shift_factors = '290 290' - action_standard_deviations = '0.02' - action_scaling_factors = 200 + input_timesteps = 1 + response_scaling_factors = '0.03' + response_shift_factors = '290' + action_standard_deviations = '0.01' + action_scaling_factors = 20 execute_on = 'TIMESTEP_BEGIN' [] @@ -165,13 +165,13 @@ air_effective_k = 0.5 # W/(m K) start_time = 0.0 end_time = 86400 - dt = 900.0 + dt = ${fparse 86400/4} [] [Outputs] - console = false + # console = false [c] - type = CSV + type = JSON execute_on = FINAL [] [] diff --git a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i index 341a251214d3..52b2262b5bde 100644 --- a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i +++ b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i @@ -4,7 +4,7 @@ [Samplers] [dummy] type = CartesianProduct - linear_space_items = '0 0.01 1' + linear_space_items = '0 0.01 2' [] [] @@ -24,38 +24,39 @@ control_name = src_control [] [r_transfer] - type = MultiAppReporterTransfer + type = SamplerReporterTransfer from_multi_app = runner - to_reporters = 'results/center_temp results/env_temp results/reward results/top_flux results/log_prob_top_flux' - from_reporters = 'T_reporter/center_temp_tend:value T_reporter/env_temp:value T_reporter/reward:value T_reporter/top_flux:value T_reporter/log_prob_top_flux:value' + sampler = dummy + stochastic_reporter = storage + from_reporter = 'T_reporter/center_temp_tend:value T_reporter/reward:value T_reporter/top_flux:value T_reporter/log_prob_top_flux:value' [] [] [Trainers] [nn_trainer] type = LibtorchDRLControlTrainer - response = 'results/center_temp results/env_temp' - control = 'results/top_flux' - log_probability = 'results/log_prob_top_flux' - reward = 'results/reward' + response = 'storage/r_transfer:T_reporter:center_temp_tend:value' + control = 'storage/r_transfer:T_reporter:top_flux:value' + log_probability = 'storage/r_transfer:T_reporter:log_prob_top_flux:value' + reward = 'storage/r_transfer:T_reporter:reward:value' - num_epochs = 1000 - update_frequency = 10 - decay_factor = 0.0 + num_epochs = 400 + update_frequency = 1 + decay_factor = 0.8 - loss_print_frequency = 10 + loss_print_frequency = 40 - critic_learning_rate = 0.0001 - num_critic_neurons_per_layer = '64 27' + critic_learning_rate = 0.0005 + num_critic_neurons_per_layer = '32 16' control_learning_rate = 0.0005 num_control_neurons_per_layer = '16 6' # keep consistent with LibtorchNeuralNetControl - input_timesteps = 2 - response_scaling_factors = '0.03 0.03' - response_shift_factors = '290 290' - action_standard_deviations = '0.02' + input_timesteps = 1 + response_scaling_factors = '0.03' + response_shift_factors = '290' + action_standard_deviations = '0.01' standardize_advantage = true @@ -64,26 +65,24 @@ [] [Reporters] - [results] - type = ConstantReporter - real_vector_names = 'center_temp env_temp reward top_flux log_prob_top_flux' - real_vector_values = '0; 0; 0; 0; 0' - outputs = csv - execute_on = timestep_begin - [] - [reward] - type = DRLRewardReporter - drl_trainer_name = nn_trainer + [storage] + type = StochasticReporter + parallel_type = ROOT [] + # [reward] + # type = DRLRewardReporter + # drl_trainer_name = nn_trainer + # [] [] [Executioner] type = Transient - num_steps = 440 + num_steps = 1 [] [Outputs] file_base = output/train_out - csv = true - time_step_interval = 10 + json = true + time_step_interval = 1 + execute_on = TIMESTEP_END [] From 67231ed66cd510ccd6c24529a4e998194538dad9 Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 14 Jan 2025 10:47:46 -0700 Subject: [PATCH 03/51] Fix indexing issue in reward to go. --- .../libtorch/trainers/LibtorchDRLControlTrainer.C | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index cc48a9140917..e214506e4c4c 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -112,7 +112,7 @@ LibtorchDRLControlTrainer::validParams() "read_from_file", false, "Switch to read the neural network parameters from a file."); params.addParam( "shift_outputs", - true, + false, "If we would like to shift the outputs the realign the input-output pairs."); params.addParam( "standardize_advantage", @@ -297,14 +297,15 @@ LibtorchDRLControlTrainer::computeRewardToGo(std::vector & data, // rewards based on the current behavior. We go backwards in samples and backwards in // accumulation. unsigned int reward_i = reward_data_per_sim.size(); - for (unsigned int sample_i = reporter_link->size() - 1; sample_i >= 0; --sample_i) + for (const auto sample_i : index_range(*reporter_link)) { + const auto backward_sample_i = reporter_link->size() - sample_i - 1; Real discounted_reward(0.0); - const auto history_size = (*reporter_link)[sample_i].size() - _shift_outputs; + const auto history_size = (*reporter_link)[backward_sample_i].size() - _shift_outputs; - for (int i = 0; i < history_size; ++i) - { - discounted_reward = reward_data_per_sim[reward_i - 1 - i] + discounted_reward * _decay_factor; + for (const auto i : make_range(history_size)) + { + discounted_reward = reward_data_per_sim[reward_i - i - 1] + discounted_reward * _decay_factor; // We are inserting to the front of the vector and push the rest back, this will // ensure that the first element of the vector is the discounter reward for the whole transient @@ -312,7 +313,7 @@ LibtorchDRLControlTrainer::computeRewardToGo(std::vector & data, } // Update the global index - reward_i -= history_size; + reward_i -= (history_size + _shift_outputs); } // Save and accumulate the return values From bfa7fa61bf98ecb0402fbedb2052a7bd8af94a69 Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 14 Jan 2025 13:25:56 -0700 Subject: [PATCH 04/51] Add sampler transfer. --- .../libtorch_drl_control_trainer.i | 3 +- .../SamplerNeuralNetControlTransfer.h | 50 ++++++++ .../trainers/LibtorchDRLControlTrainer.C | 7 + .../SamplerNeuralNetControlTransfer.C | 121 ++++++++++++++++++ 4 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 modules/stochastic_tools/include/libtorch/transfers/SamplerNeuralNetControlTransfer.h create mode 100644 modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C diff --git a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i index 52b2262b5bde..a06f8cb67677 100644 --- a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i +++ b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i @@ -18,10 +18,11 @@ [Transfers] [nn_transfer] - type = LibtorchNeuralNetControlTransfer + type = SamplerNeuralNetControlTransfer to_multi_app = runner trainer_name = nn_trainer control_name = src_control + sampler = dummy [] [r_transfer] type = SamplerReporterTransfer diff --git a/modules/stochastic_tools/include/libtorch/transfers/SamplerNeuralNetControlTransfer.h b/modules/stochastic_tools/include/libtorch/transfers/SamplerNeuralNetControlTransfer.h new file mode 100644 index 000000000000..1f9f750ba8b8 --- /dev/null +++ b/modules/stochastic_tools/include/libtorch/transfers/SamplerNeuralNetControlTransfer.h @@ -0,0 +1,50 @@ +//* This file is part of the MOOSE framework +//* https://www.mooseframework.org +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef LIBTORCH_ENABLED + +#pragma once + +// torch-based includes +#include "LibtorchDRLControlTrainer.h" + +#include "StochasticToolsTransfer.h" +#include "SurrogateModelInterface.h" + +class SamplerNeuralNetControlTransfer : public StochasticToolsTransfer, public SurrogateModelInterface +{ +public: + static InputParameters validParams(); + + SamplerNeuralNetControlTransfer(const InputParameters & parameters); + + virtual void execute() override; + + ///@{ + /** + * Methods used when running in batch mode (see SamplerFullSolveMultiApp) + */ + virtual void initializeFromMultiapp() override; + virtual void executeFromMultiapp() override; + virtual void finalizeFromMultiapp() override; + + virtual void initializeToMultiapp() override; + virtual void executeToMultiapp() override; + virtual void finalizeToMultiapp() override; + ///@} + +protected: + /// The name of the control object on the other app where we want to copy our neural net + const std::string _control_name; + + /// The trainer object which will contains the control neural net + const LibtorchDRLControlTrainer & _trainer; +}; + +#endif diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index e214506e4c4c..83f7b7fabd4d 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -263,9 +263,16 @@ LibtorchDRLControlTrainer::execute() convertDataToTensor(_output_data, _output_tensor); convertDataToTensor(_log_probability_data, _log_probability_tensor); + std::cout << "Input tensor" << std::endl << _input_tensor << std::endl; + std::cout << "Signal tensor" << std::endl << _output_tensor << std::endl; + std::cout << "Logprob tensor" << std::endl << _log_probability_tensor << std::endl; + std::cout << "reward" << std::endl << Moose::stringify(_reward_data) << std::endl; + // Discard (detach) the gradient info for return data LibtorchUtils::vectorToTensor(_return_data, _return_tensor, true); + std::cout << "Return tensor" << std::endl << _return_tensor << std::endl; + // We train the controller using the emulator to get a good control strategy trainController(); diff --git a/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C b/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C new file mode 100644 index 000000000000..5af81737a3a8 --- /dev/null +++ b/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C @@ -0,0 +1,121 @@ +//* This file is part of the MOOSE framework +//* https://www.mooseframework.org +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef LIBTORCH_ENABLED + +#include "SamplerNeuralNetControlTransfer.h" +#include "LibtorchNeuralNetControl.h" + +registerMooseObject("StochasticToolsApp", SamplerNeuralNetControlTransfer); + +InputParameters +SamplerNeuralNetControlTransfer::validParams() +{ + InputParameters params = StochasticToolsTransfer::validParams(); + params += SurrogateModelInterface::validParams(); + + params.addClassDescription("Copies a neural network from a trainer object on the main app to a " + "LibtorchNeuralNetControl object on the subapp."); + + params.suppressParameter("from_multi_app"); + // params.suppressParameter("multi_app"); + // params.suppressParameter("direction"); + + params.addRequiredParam("trainer_name", + "Trainer object that contains the neural networks." + " for different samples."); + params.addRequiredParam("control_name", "Controller object name."); + return params; +} + +SamplerNeuralNetControlTransfer::SamplerNeuralNetControlTransfer( + const InputParameters & parameters) + : StochasticToolsTransfer(parameters), + SurrogateModelInterface(this), + _control_name(getParam("control_name")), + _trainer(getSurrogateTrainerByName( + getParam("trainer_name"))) +{ +} + +void +SamplerNeuralNetControlTransfer::execute() +{ + for (dof_id_type i = _sampler_ptr->getLocalRowBegin(); i < _sampler_ptr->getLocalRowEnd(); ++i) + if (getToMultiApp()->hasLocalApp(i)) + { + // Get the control neural net from the trainer + const Moose::LibtorchArtificialNeuralNet & trainer_nn = _trainer.controlNeuralNet(); + + // Get the control object from the other app + FEProblemBase & app_problem = _multi_app->appProblemBase(i); + auto & control_warehouse = app_problem.getControlWarehouse(); + std::shared_ptr control_ptr = control_warehouse.getActiveObject(_control_name); + LibtorchNeuralNetControl * control_object = + dynamic_cast(control_ptr.get()); + + if (!control_object) + paramError("control_name", "The given control is not a LibtorchNeuralNetrControl!"); + + // Copy and the neural net and execute it to get the initial values + control_object->loadControlNeuralNet(trainer_nn); + control_object->execute(); + } +} + +void +SamplerNeuralNetControlTransfer::initializeFromMultiapp() +{ +} + +void +SamplerNeuralNetControlTransfer::executeFromMultiapp() +{ +} + +void +SamplerNeuralNetControlTransfer::finalizeFromMultiapp() +{ +} + +void +SamplerNeuralNetControlTransfer::initializeToMultiapp() +{ +} + +void +SamplerNeuralNetControlTransfer::executeToMultiapp() +{ + if (getToMultiApp()->hasLocalApp(_global_index)) + { + // Get the control neural net from the trainer + const Moose::LibtorchArtificialNeuralNet & trainer_nn = _trainer.controlNeuralNet(); + + // Get the control object from the other app + FEProblemBase & app_problem = _multi_app->appProblemBase(_global_index); + auto & control_warehouse = app_problem.getControlWarehouse(); + std::shared_ptr control_ptr = control_warehouse.getActiveObject(_control_name); + LibtorchNeuralNetControl * control_object = + dynamic_cast(control_ptr.get()); + + if (!control_object) + paramError("control_name", "The given control is not a LibtorchNeuralNetrControl!"); + + // Copy and the neural net and execute it to get the initial values + control_object->loadControlNeuralNet(trainer_nn); + control_object->execute(); + } +} + +void +SamplerNeuralNetControlTransfer::finalizeToMultiapp() +{ +} + +#endif From 74f5e12ae72d7ca367aab68959ee39913cc2fb20 Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 14 Jan 2025 13:56:14 -0700 Subject: [PATCH 05/51] Transition toward sampler and change seeds per application in this case. --- .../libtorch_drl_control_trainer.i | 1 + .../transfers/SamplerNeuralNetControlTransfer.h | 1 + .../transfers/SamplerNeuralNetControlTransfer.C | 10 ++++++++++ 3 files changed, 12 insertions(+) diff --git a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i index a06f8cb67677..803d38cfb8fc 100644 --- a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i +++ b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i @@ -13,6 +13,7 @@ type = SamplerFullSolveMultiApp sampler = dummy input_files = 'libtorch_drl_control_sub.i' + mode = batch-reset [] [] diff --git a/modules/stochastic_tools/include/libtorch/transfers/SamplerNeuralNetControlTransfer.h b/modules/stochastic_tools/include/libtorch/transfers/SamplerNeuralNetControlTransfer.h index 1f9f750ba8b8..0fe3ab97052b 100644 --- a/modules/stochastic_tools/include/libtorch/transfers/SamplerNeuralNetControlTransfer.h +++ b/modules/stochastic_tools/include/libtorch/transfers/SamplerNeuralNetControlTransfer.h @@ -30,6 +30,7 @@ class SamplerNeuralNetControlTransfer : public StochasticToolsTransfer, public S /** * Methods used when running in batch mode (see SamplerFullSolveMultiApp) */ + virtual void initialSetup() override; virtual void initializeFromMultiapp() override; virtual void executeFromMultiapp() override; virtual void finalizeFromMultiapp() override; diff --git a/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C b/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C index 5af81737a3a8..ea580ae2a2cf 100644 --- a/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C +++ b/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C @@ -44,6 +44,16 @@ SamplerNeuralNetControlTransfer::SamplerNeuralNetControlTransfer( { } +void +SamplerNeuralNetControlTransfer::initialSetup() +{ + const auto multi_app = getToMultiApp(); + const dof_id_type n = multi_app->numGlobalApps(); + for (MooseIndex(n) i = 0; i < n; i++) + if (multi_app->hasLocalApp(i)) + torch::manual_seed(i); +} + void SamplerNeuralNetControlTransfer::execute() { From 4442115867f290cd7729a7dd19a548b7d094a3bb Mon Sep 17 00:00:00 2001 From: Peter German Date: Wed, 15 Jan 2025 14:24:18 -0700 Subject: [PATCH 06/51] Fix normalization problem. --- framework/src/bcs/FunctionDirichletBC.C | 1 - framework/src/bcs/NeumannBC.C | 1 - .../FunctionValuePostprocessor.C | 2 +- framework/src/postprocessors/PointValue.C | 2 +- .../libtorch_drl_control_sub.i | 8 ++--- .../libtorch_drl_control_trainer.i | 27 +++++++++-------- .../surrogates/LibtorchDRLControlTrainer.h | 2 ++ .../libtorch/controls/LibtorchDRLControl.C | 4 +-- .../trainers/LibtorchDRLControlTrainer.C | 30 +++++++++++++------ 9 files changed, 45 insertions(+), 32 deletions(-) diff --git a/framework/src/bcs/FunctionDirichletBC.C b/framework/src/bcs/FunctionDirichletBC.C index f6f6fec8c1dc..7ea9617fd960 100644 --- a/framework/src/bcs/FunctionDirichletBC.C +++ b/framework/src/bcs/FunctionDirichletBC.C @@ -31,6 +31,5 @@ FunctionDirichletBC::FunctionDirichletBC(const InputParameters & parameters) Real FunctionDirichletBC::computeQpValue() { - std::cout << "Environment temperature: " << _t << " " << _func.value(_t, *_current_node) << std::endl; return _func.value(_t, *_current_node); } diff --git a/framework/src/bcs/NeumannBC.C b/framework/src/bcs/NeumannBC.C index c2313a1fe142..c76597c0de20 100644 --- a/framework/src/bcs/NeumannBC.C +++ b/framework/src/bcs/NeumannBC.C @@ -38,7 +38,6 @@ template GenericReal NeumannBCTempl::computeQpResidual() { - std::cout << "The control value that I see: " << _value << std::endl; return -_test[_i][_qp] * _value; } diff --git a/framework/src/postprocessors/FunctionValuePostprocessor.C b/framework/src/postprocessors/FunctionValuePostprocessor.C index 0fc7999d3ae1..d3159730058c 100644 --- a/framework/src/postprocessors/FunctionValuePostprocessor.C +++ b/framework/src/postprocessors/FunctionValuePostprocessor.C @@ -84,7 +84,7 @@ FunctionValuePostprocessor::getValue() const if (_time_pp) return _scale_factor * _function.value(*_time_pp, p); - std::cout << name() << " " << _t << " " << _scale_factor * _function.value(_t, p) << std::endl; + // std::cout << name() << " " << _t << " " << _scale_factor * _function.value(_t, p) << std::endl; return _scale_factor * _function.value(_t, p); } diff --git a/framework/src/postprocessors/PointValue.C b/framework/src/postprocessors/PointValue.C index 48175d7eb16a..c9a8ce4cdfba 100644 --- a/framework/src/postprocessors/PointValue.C +++ b/framework/src/postprocessors/PointValue.C @@ -72,6 +72,6 @@ PointValue::execute() Real PointValue::getValue() const { - std::cout << "Point value " << _value << std::endl; + // std::cout << "Point value " << _value << std::endl; return _value; } diff --git a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i index 412feab5fd0f..850dea29154f 100644 --- a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i +++ b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i @@ -127,7 +127,7 @@ air_effective_k = 0.5 # W/(m K) input_timesteps = 1 response_scaling_factors = '0.03' response_shift_factors = '290' - action_standard_deviations = '0.02' + action_standard_deviations = '5e-5' action_scaling_factors = 20 execute_on = 'TIMESTEP_BEGIN' @@ -146,7 +146,7 @@ air_effective_k = 0.5 # W/(m K) input_timesteps = 1 response_scaling_factors = '0.03' response_shift_factors = '290' - action_standard_deviations = '0.01' + action_standard_deviations = '5e-5' action_scaling_factors = 20 execute_on = 'TIMESTEP_BEGIN' @@ -165,11 +165,11 @@ air_effective_k = 0.5 # W/(m K) start_time = 0.0 end_time = 86400 - dt = ${fparse 86400/4} + dt = ${fparse 86400/40} [] [Outputs] - # console = false + console = false [c] type = JSON execute_on = FINAL diff --git a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i index 803d38cfb8fc..37b03fe304e6 100644 --- a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i +++ b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i @@ -4,7 +4,7 @@ [Samplers] [dummy] type = CartesianProduct - linear_space_items = '0 0.01 2' + linear_space_items = '0 0.01 5' [] [] @@ -13,7 +13,7 @@ type = SamplerFullSolveMultiApp sampler = dummy input_files = 'libtorch_drl_control_sub.i' - mode = batch-reset + # mode = batch-reset [] [] @@ -43,22 +43,22 @@ reward = 'storage/r_transfer:T_reporter:reward:value' num_epochs = 400 - update_frequency = 1 - decay_factor = 0.8 + update_frequency = 10 + decay_factor = 0.9 loss_print_frequency = 40 critic_learning_rate = 0.0005 - num_critic_neurons_per_layer = '32 16' + num_critic_neurons_per_layer = '64 32' control_learning_rate = 0.0005 - num_control_neurons_per_layer = '16 6' + num_control_neurons_per_layer = '32 16' # keep consistent with LibtorchNeuralNetControl input_timesteps = 1 response_scaling_factors = '0.03' response_shift_factors = '290' - action_standard_deviations = '0.01' + action_standard_deviations = '5e-5' standardize_advantage = true @@ -71,20 +71,21 @@ type = StochasticReporter parallel_type = ROOT [] - # [reward] - # type = DRLRewardReporter - # drl_trainer_name = nn_trainer - # [] + [reward] + type = DRLRewardReporter + drl_trainer_name = nn_trainer + [] [] [Executioner] type = Transient - num_steps = 1 + num_steps = 4000 [] [Outputs] file_base = output/train_out - json = true + # json = true + csv = true time_step_interval = 1 execute_on = TIMESTEP_END [] diff --git a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h index b0e7d39532c9..f1f478b74606 100644 --- a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h +++ b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h @@ -222,6 +222,8 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase void getSignalDataFromReporter(std::vector> & data, const std::vector> *> & reporter_links); + void normalizeResponseData(std::vector> & data, const unsigned int num_reporters, const unsigned int num_timesteps); + /** * Extract the reward values from the postprocessors of the controlled system * This assumes that they are stored in an AccumulateReporter. diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C index 240f455218f8..51a47a085139 100644 --- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C +++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C @@ -86,8 +86,8 @@ LibtorchDRLControl::execute() log_probability.data_ptr() + log_probability.size(1)}; - std::cout << "Setting control signal to: " << Moose::stringify(_current_control_signals) << std::endl; - std::cout << "Setting log probability to: " << Moose::stringify(_current_control_signal_log_probabilities) << std::endl; + // std::cout << "Setting control signal to: " << Moose::stringify(_current_control_signals) << std::endl; + // std::cout << "Setting log probability to: " << Moose::stringify(_current_control_signal_log_probabilities) << std::endl; for (unsigned int control_i = 0; control_i < n_controls; ++control_i) { diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index 83f7b7fabd4d..2b60986ac7e9 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -112,7 +112,7 @@ LibtorchDRLControlTrainer::validParams() "read_from_file", false, "Switch to read the neural network parameters from a file."); params.addParam( "shift_outputs", - false, + true, "If we would like to shift the outputs the realign the input-output pairs."); params.addParam( "standardize_advantage", @@ -258,25 +258,27 @@ LibtorchDRLControlTrainer::execute() // We compute the average reward first computeAverageEpisodeReward(); + normalizeResponseData(_input_data, _response_value_pointers.size(), _input_timesteps); + // Transform input/output/return data to torch::Tensor convertDataToTensor(_input_data, _input_tensor); convertDataToTensor(_output_data, _output_tensor); convertDataToTensor(_log_probability_data, _log_probability_tensor); - std::cout << "Input tensor" << std::endl << _input_tensor << std::endl; - std::cout << "Signal tensor" << std::endl << _output_tensor << std::endl; - std::cout << "Logprob tensor" << std::endl << _log_probability_tensor << std::endl; - std::cout << "reward" << std::endl << Moose::stringify(_reward_data) << std::endl; + // std::cout << "Input tensor" << std::endl << _input_tensor << std::endl; + // std::cout << "Signal tensor" << std::endl << _output_tensor << std::endl; + // std::cout << "Logprob tensor" << std::endl << _log_probability_tensor << std::endl; + // std::cout << "reward" << std::endl << Moose::stringify(_reward_data) << std::endl; // Discard (detach) the gradient info for return data LibtorchUtils::vectorToTensor(_return_data, _return_tensor, true); - std::cout << "Return tensor" << std::endl << _return_tensor << std::endl; + // std::cout << "Return tensor" << std::endl << _return_tensor << std::endl; // We train the controller using the emulator to get a good control strategy trainController(); - // We clean the training data after contoller update and reset the counter + // We clean the training data after controller update and reset the counter resetData(); } } @@ -320,7 +322,7 @@ LibtorchDRLControlTrainer::computeRewardToGo(std::vector & data, } // Update the global index - reward_i -= (history_size + _shift_outputs); + reward_i -= history_size; } // Save and accumulate the return values @@ -453,6 +455,8 @@ LibtorchDRLControlTrainer::getResponseDataFromReporter( // Fetch the vector of time series for a given reporter const std::vector> & reporter_data = *reporter_links[rep_i]; + // std::cout << "Adding response: " << Moose::stringify(reporter_data) << std::endl; + // We might consider using older time steps too which requires adding new // rows and populating them with staggered data for (const auto & start_step : make_range(num_timesteps)) @@ -471,11 +475,19 @@ LibtorchDRLControlTrainer::getResponseDataFromReporter( (num_timesteps - 1) - _shift_outputs); } } + } +} +void LibtorchDRLControlTrainer::normalizeResponseData(std::vector> & data, const unsigned int num_reporters, const unsigned int num_timesteps) +{ + // std::cout << " Normalizing " << Moose::stringify(data) << std::endl; + // We have multiple reporters, each has a time series for each sample + for (const auto & rep_i : make_range(num_reporters)) + { // We shift and scale the inputs to get better training efficiency for (const auto & start_step : make_range(num_timesteps)) { - unsigned int row = reporter_links.size() * start_step + rep_i; + unsigned int row = num_reporters * start_step + rep_i; std::transform( data[row].begin(), data[row].end(), From 8ad68505b111493db6339a0126cb215d7aa48c0a Mon Sep 17 00:00:00 2001 From: Peter German Date: Wed, 22 Jan 2025 08:37:16 -0700 Subject: [PATCH 07/51] Check sensitivity to inputs, understand multi worker behavior. --- .../libtorch_drl_control_sub.i | 40 ++--- .../libtorch_drl_control_trainer.i | 20 +-- .../libtorch/controls/LibtorchDRLControl.C | 5 + .../trainers/LibtorchDRLControlTrainer.C | 142 +++++++++++------- .../SamplerNeuralNetControlTransfer.C | 17 ++- 5 files changed, 139 insertions(+), 85 deletions(-) diff --git a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i index 850dea29154f..97cf7ee691b6 100644 --- a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i +++ b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i @@ -117,7 +117,7 @@ air_effective_k = 0.5 # W/(m K) [] [Controls] - inactive = 'src_control_final' + # inactive = 'src_control_final' [src_control] type = LibtorchDRLControl parameters = "BCs/top_flux/value" @@ -127,41 +127,41 @@ air_effective_k = 0.5 # W/(m K) input_timesteps = 1 response_scaling_factors = '0.03' response_shift_factors = '290' - action_standard_deviations = '5e-5' + action_standard_deviations = '3e-2' action_scaling_factors = 20 execute_on = 'TIMESTEP_BEGIN' [] - [src_control_final] - type = LibtorchNeuralNetControl + # [src_control_final] + # type = LibtorchNeuralNetControl - filename = 'mynet_control.net' - num_neurons_per_layer = '16 6' - activation_function = 'relu' + # filename = 'mynet_control.net' + # num_neurons_per_layer = '16 6' + # activation_function = 'relu' - parameters = "BCs/top_flux/value" - responses = 'center_temp_tend' + # parameters = "BCs/top_flux/value" + # responses = 'center_temp_tend' - # keep consistent with LibtorchDRLControlTrainer - input_timesteps = 1 - response_scaling_factors = '0.03' - response_shift_factors = '290' - action_standard_deviations = '5e-5' - action_scaling_factors = 20 + # # keep consistent with LibtorchDRLControlTrainer + # input_timesteps = 1 + # response_scaling_factors = '0.03' + # response_shift_factors = '290' + # action_standard_deviations = '5e-5' + # action_scaling_factors = 20 - execute_on = 'TIMESTEP_BEGIN' - [] + # execute_on = 'TIMESTEP_BEGIN' + # [] [] [Executioner] type = Transient solve_type = 'NEWTON' - petsc_options_iname = '-pc_type -pc_factor_shift_type' - petsc_options_value = 'lu NONZERO' + petsc_options_iname = '-pc_type -pc_hypre_type' + petsc_options_value = 'hypre boomeramg' line_search = 'none' - nl_rel_tol = 1e-7 + nl_rel_tol = 1e-6 start_time = 0.0 end_time = 86400 diff --git a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i index 37b03fe304e6..4eaadebd228a 100644 --- a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i +++ b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i @@ -4,7 +4,7 @@ [Samplers] [dummy] type = CartesianProduct - linear_space_items = '0 0.01 5' + linear_space_items = '0 0.01 1' [] [] @@ -42,23 +42,23 @@ log_probability = 'storage/r_transfer:T_reporter:log_prob_top_flux:value' reward = 'storage/r_transfer:T_reporter:reward:value' - num_epochs = 400 - update_frequency = 10 - decay_factor = 0.9 + num_epochs = 120 + update_frequency = 2 + decay_factor = 0.8 - loss_print_frequency = 40 + loss_print_frequency = 10 - critic_learning_rate = 0.0005 - num_critic_neurons_per_layer = '64 32' + critic_learning_rate = 0.005 + num_critic_neurons_per_layer = '32 16' - control_learning_rate = 0.0005 + control_learning_rate = 0.005 num_control_neurons_per_layer = '32 16' # keep consistent with LibtorchNeuralNetControl input_timesteps = 1 response_scaling_factors = '0.03' response_shift_factors = '290' - action_standard_deviations = '5e-5' + action_standard_deviations = '3e-2' standardize_advantage = true @@ -79,7 +79,7 @@ [Executioner] type = Transient - num_steps = 4000 + num_steps = 4 [] [Outputs] diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C index 51a47a085139..040b585bcbf5 100644 --- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C +++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C @@ -73,9 +73,14 @@ LibtorchDRLControl::execute() // Evaluate the neural network to get the expected control value torch::Tensor output_tensor = _nn->forward(input_tensor); + // std::cout << "Input " << input_tensor << std::endl; + // std::cout << "Output " << output_tensor << std::endl; + // Sample control value (action) from Gaussian distribution torch::Tensor action = at::normal(output_tensor, _std); + // std::cout << "Action " << action << std::endl; + // Compute log probability torch::Tensor log_probability = computeLogProbability(action, output_tensor); diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index 2b60986ac7e9..d243915258fc 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -158,6 +158,7 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _filename_base(isParamValid("filename_base") ? getParam("filename_base") : ""), _read_from_file(getParam("read_from_file")), _shift_outputs(getParam("shift_outputs")), + _average_episode_reward(0.0), _standardize_advantage(getParam("standardize_advantage")), _loss_print_frequency(getParam("loss_print_frequency")), _update_counter(_update_frequency) @@ -265,16 +266,9 @@ LibtorchDRLControlTrainer::execute() convertDataToTensor(_output_data, _output_tensor); convertDataToTensor(_log_probability_data, _log_probability_tensor); - // std::cout << "Input tensor" << std::endl << _input_tensor << std::endl; - // std::cout << "Signal tensor" << std::endl << _output_tensor << std::endl; - // std::cout << "Logprob tensor" << std::endl << _log_probability_tensor << std::endl; - // std::cout << "reward" << std::endl << Moose::stringify(_reward_data) << std::endl; - // Discard (detach) the gradient info for return data LibtorchUtils::vectorToTensor(_return_data, _return_tensor, true); - // std::cout << "Return tensor" << std::endl << _return_tensor << std::endl; - // We train the controller using the emulator to get a good control strategy trainController(); @@ -332,56 +326,100 @@ LibtorchDRLControlTrainer::computeRewardToGo(std::vector & data, void LibtorchDRLControlTrainer::trainController() { - // Define the optimizers for the training - torch::optim::Adam actor_optimizer(_control_nn->parameters(), - torch::optim::AdamOptions(_control_learning_rate)); + // We only train on the rank 0 partition. Libtorch should still be able to + // fetch the local threads which are available. + if (processor_id() == 0) + { + // std::cout << "Training" << std::endl; + // std::cout << "Input tensor" << std::endl << _input_tensor << std::endl; + // std::cout << "Signal tensor" << std::endl << _output_tensor << std::endl; + // std::cout << "Logprob tensor" << std::endl << _log_probability_tensor << std::endl; + // std::cout << "reward" << std::endl << Moose::stringify(_reward_data) << std::endl; + // std::cout << "Return tensor" << std::endl << _return_tensor << std::endl; + + // Define the optimizers for the training + torch::optim::Adam actor_optimizer(_control_nn->parameters(), + torch::optim::AdamOptions(_control_learning_rate)); + + torch::optim::Adam critic_optimizer(_critic_nn->parameters(), + torch::optim::AdamOptions(_critic_learning_rate)); + + // Compute the approximate value (return) from the critic neural net and use it to compute an + // advantage + auto value = evaluateValue(_input_tensor).detach(); + auto advantage = _return_tensor - value; - torch::optim::Adam critic_optimizer(_critic_nn->parameters(), - torch::optim::AdamOptions(_critic_learning_rate)); + // If requested, standardize the advantage + if (_standardize_advantage) + advantage = (advantage - advantage.mean()) / (advantage.std() + 1e-10); - // Compute the approximate value (return) from the critic neural net and use it to compute an - // advantage - auto value = evaluateValue(_input_tensor).detach(); - auto advantage = _return_tensor - value; + for (unsigned int epoch = 0; epoch < _num_epochs; ++epoch) + { + // Get the approximate return from the neural net again (this one does have an associated + // gradient) + value = evaluateValue(_input_tensor); + // Get the approximate logarithmic action probability using the control neural net + auto curr_log_probability = evaluateAction(_input_tensor, _output_tensor); + + // Prepare the ratio by using the e^(logx-logy)=x/y expression + auto ratio = (curr_log_probability - _log_probability_tensor).exp(); + + // Use clamping for limiting + auto surr1 = ratio * advantage; + auto surr2 = torch::clamp(ratio, 1.0 - _clip_param, 1.0 + _clip_param) * advantage; + + // Compute loss values for the critic and the control neural net + auto actor_loss = -torch::min(surr1, surr2).mean(); + auto critic_loss = torch::mse_loss(value, _return_tensor); + + // Update the weights in the neural nets + actor_optimizer.zero_grad(); + actor_loss.backward(); + actor_optimizer.step(); + + critic_optimizer.zero_grad(); + critic_loss.backward(); + critic_optimizer.step(); + + // const auto & named_params = _control_nn->named_parameters(); + // for (const auto & param_i : make_range(named_params.size())) + // { + // // We cast the parameters into a 1D vector + // std::cout << Moose::stringify(std::vector( + // named_params[param_i].value().data_ptr(), + // named_params[param_i].value().data_ptr() + named_params[param_i].value().numel())) << std::endl; + // } + + // print loss per epoch + if (_loss_print_frequency) + if (epoch % _loss_print_frequency == 0) + { + + _console << "Epoch: " << epoch << " | Actor Loss: " << COLOR_GREEN + << actor_loss.item() << COLOR_DEFAULT << " | Critic Loss: " << COLOR_GREEN + << critic_loss.item() << COLOR_DEFAULT << std::endl; + } + } + } - // If requested, standardize the advantage - if (_standardize_advantage) - advantage = (advantage - advantage.mean()) / (advantage.std() + 1e-10); + // It is time to send the trained data to every other processor so that the neural networks + // are the same on all ranks. TODO: Make sure this can be done on a GPU as well. + for (auto & param : _control_nn->named_parameters()) + { + MPI_Bcast(param.value().data_ptr(), + param.value().numel(), + MPI_DOUBLE, + 0, + _communicator.get()); + } - for (unsigned int epoch = 0; epoch < _num_epochs; ++epoch) + for (auto & param : _critic_nn->named_parameters()) { - // Get the approximate return from the neural net again (this one does have an associated - // gradient) - value = evaluateValue(_input_tensor); - // Get the approximate logarithmic action probability using the control neural net - auto curr_log_probability = evaluateAction(_input_tensor, _output_tensor); - - // Prepare the ratio by using the e^(logx-logy)=x/y expression - auto ratio = (curr_log_probability - _log_probability_tensor).exp(); - - // Use clamping for limiting - auto surr1 = ratio * advantage; - auto surr2 = torch::clamp(ratio, 1.0 - _clip_param, 1.0 + _clip_param) * advantage; - - // Compute loss values for the critic and the control neural net - auto actor_loss = -torch::min(surr1, surr2).mean(); - auto critic_loss = torch::mse_loss(value, _return_tensor); - - // Update the weights in the neural nets - actor_optimizer.zero_grad(); - actor_loss.backward(); - actor_optimizer.step(); - - critic_optimizer.zero_grad(); - critic_loss.backward(); - critic_optimizer.step(); - - // print loss per epoch - if (_loss_print_frequency) - if (epoch % _loss_print_frequency == 0) - _console << "Epoch: " << epoch << " | Actor Loss: " << COLOR_GREEN - << actor_loss.item() << COLOR_DEFAULT << " | Critic Loss: " << COLOR_GREEN - << critic_loss.item() << COLOR_DEFAULT << std::endl; + MPI_Bcast(param.value().data_ptr(), + param.value().numel(), + MPI_DOUBLE, + 0, + _communicator.get()); } // Save the controller neural net so our controller can read it, we also save the critic if we diff --git a/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C b/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C index ea580ae2a2cf..5dc33a99724f 100644 --- a/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C +++ b/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C @@ -24,8 +24,6 @@ SamplerNeuralNetControlTransfer::validParams() "LibtorchNeuralNetControl object on the subapp."); params.suppressParameter("from_multi_app"); - // params.suppressParameter("multi_app"); - // params.suppressParameter("direction"); params.addRequiredParam("trainer_name", "Trainer object that contains the neural networks." @@ -57,7 +55,10 @@ SamplerNeuralNetControlTransfer::initialSetup() void SamplerNeuralNetControlTransfer::execute() { - for (dof_id_type i = _sampler_ptr->getLocalRowBegin(); i < _sampler_ptr->getLocalRowEnd(); ++i) + const auto n = getToMultiApp()->numGlobalApps(); + for (MooseIndex(n) i = 0; i < n; i++) + { + // std::cout << "Do I have this app? " << i << " " << getToMultiApp()->hasLocalApp(i) << std::endl; if (getToMultiApp()->hasLocalApp(i)) { // Get the control neural net from the trainer @@ -76,7 +77,17 @@ SamplerNeuralNetControlTransfer::execute() // Copy and the neural net and execute it to get the initial values control_object->loadControlNeuralNet(trainer_nn); control_object->execute(); + + // const auto & named_params = trainer_nn.named_parameters(); + // for (const auto & param_i : make_range(named_params.size())) + // { + // // We cast the parameters into a 1D vector + // std::cout << "Transferring " << Moose::stringify(std::vector( + // named_params[param_i].value().data_ptr(), + // named_params[param_i].value().data_ptr() + named_params[param_i].value().numel())) << std::endl; + // } } + } } void From ace272b5e7d1f4498e18cfd104f470fceddc536c Mon Sep 17 00:00:00 2001 From: Peter German Date: Sat, 1 Feb 2025 18:36:55 -0700 Subject: [PATCH 08/51] Add reward pp --- .../LiftDragRewardPostprocessor.h | 46 ++++++++++++ .../LiftDragRewardPostprocessor.C | 71 +++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 framework/include/postprocessors/LiftDragRewardPostprocessor.h create mode 100644 framework/src/postprocessors/LiftDragRewardPostprocessor.C diff --git a/framework/include/postprocessors/LiftDragRewardPostprocessor.h b/framework/include/postprocessors/LiftDragRewardPostprocessor.h new file mode 100644 index 000000000000..a122d66d31fb --- /dev/null +++ b/framework/include/postprocessors/LiftDragRewardPostprocessor.h @@ -0,0 +1,46 @@ +//* This file is part of the MOOSE framework +//* https://www.mooseframework.org +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#pragma once + +// MOOSE includes +#include "GeneralPostprocessor.h" + +class LiftDragRewardPostprocessor : public GeneralPostprocessor +{ +public: + static InputParameters validParams(); + LiftDragRewardPostprocessor(const InputParameters & parameters); + + virtual void execute() override; + virtual void initialize() override {} + using Postprocessor::getValue; + virtual Real getValue() const override; + +protected: + + const PostprocessorValue & _lift; + const PostprocessorValue & _drag; + + const unsigned int _averaging_window; + + const Real _coeff_1; + const Real _coeff_2; + + Real _avg_lift; + Real _avg_drag; + + std::vector _lift_history; + std::vector _drag_history; + + unsigned int _replace_counter; + + + +}; diff --git a/framework/src/postprocessors/LiftDragRewardPostprocessor.C b/framework/src/postprocessors/LiftDragRewardPostprocessor.C new file mode 100644 index 000000000000..7ad86dd830f0 --- /dev/null +++ b/framework/src/postprocessors/LiftDragRewardPostprocessor.C @@ -0,0 +1,71 @@ +//* This file is part of the MOOSE framework +//* https://www.mooseframework.org +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#include "LiftDragRewardPostprocessor.h" +#include "FEProblemBase.h" +#include "NonlinearSystemBase.h" +#include "MathUtils.h" +#include "TransientBase.h" +#include "Restartable.h" +#include "libmesh/enum_norm_type.h" + +registerMooseObject("MooseApp", LiftDragRewardPostprocessor); + +InputParameters +LiftDragRewardPostprocessor::validParams() +{ + InputParameters params = GeneralPostprocessor::validParams(); + + params.addRequiredParam("lift","Lift coeff"); + params.addRequiredParam("drag","Drag coeff"); + + params.addParam("averaging_window", 1, "The window"); + params.addParam("coeff_1", 0.2, "Coeff 1"); + params.addParam("coeff_2", 1.59, "Coeff 2"); + + params.addClassDescription("Blabla."); + + return params; +} + +LiftDragRewardPostprocessor::LiftDragRewardPostprocessor(const InputParameters & parameters) + : GeneralPostprocessor(parameters), + _lift(getPostprocessorValue("lift")), + _drag(getPostprocessorValue("drag")), + _averaging_window(getParam("averaging_window")), + _coeff_1(getParam("coeff_1")), + _coeff_2(getParam("coeff_2")), + _avg_lift(0.0), + _avg_drag(0.0), + _lift_history(std::vector(_averaging_window,0.0)), + _drag_history(std::vector(_averaging_window,0.0)), + _replace_counter(0) +{ +} + +Real +LiftDragRewardPostprocessor::getValue() const +{ + return _coeff_1 + _avg_drag - _coeff_2*std::abs(_avg_lift); +} + +void +LiftDragRewardPostprocessor::execute() +{ + auto rolling_index = _replace_counter % _averaging_window; + auto normalization = std::min(_replace_counter + 1, _averaging_window); + + _lift_history[rolling_index] = _lift; + _drag_history[rolling_index] = _drag; + + _avg_lift = std::reduce(_lift_history.begin(), _lift_history.end())/normalization; + _avg_drag = std::reduce(_drag_history.begin(), _drag_history.end())/normalization; + + _replace_counter++; +} From abe70b98d8c21fcd8906c4dbdfc0141a8b07b481 Mon Sep 17 00:00:00 2001 From: Peter German Date: Sat, 1 Feb 2025 18:38:37 -0700 Subject: [PATCH 09/51] Add files for vortex shedding example. --- .../flow_over_circle_linearfv.i | 275 ++++++++++++++++++ .../stochastic/vortex_control/header.i | 40 +++ .../vortex_control/libtorch_drl_control_sub.i | 177 +++++++++++ .../libtorch_drl_control_trainer.i | 88 ++++++ .../examples/stochastic/vortex_control/mesh.i | 240 +++++++++++++++ 5 files changed, 820 insertions(+) create mode 100644 modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i create mode 100644 modules/combined/examples/stochastic/vortex_control/header.i create mode 100644 modules/combined/examples/stochastic/vortex_control/libtorch_drl_control_sub.i create mode 100644 modules/combined/examples/stochastic/vortex_control/libtorch_drl_control_trainer.i create mode 100644 modules/combined/examples/stochastic/vortex_control/mesh.i diff --git a/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i b/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i new file mode 100644 index 000000000000..bb7a0847aad9 --- /dev/null +++ b/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i @@ -0,0 +1,275 @@ +# [Mesh] +# [fmg] +# type = FileMeshGenerator +# file = flow_over_circle_linearfv_out_orig.e +# use_for_exodus_restart = true +# [] +# [] + +[Problem] + linear_sys_names = 'u_system v_system pressure_system' + previous_nl_solution_required = true +[] + +[Functions] + [inlet_function] + type = ParsedFunction + expression = '4*U*(y-ymin)*(ymax-y)/(ymax-ymin)/(ymax-ymin)' + symbol_names = 'U ymax ymin' + symbol_values = '${inlet_velocity} ${y_max} ${y_min}' + [] + [gap_x] + type = ParsedFunction + expression = 'Q*x/(x^2+y^2)*abs(cos(pi/(20/180*pi)*atan(x/y)))' + symbol_names = 'Q' + symbol_values = '${Q}' + [] + [gap_y] + type = ParsedFunction + expression = 'if(y>0,Q,-Q)*y/(x^2+y^2)*abs(cos(pi/(20/180*pi)*atan(x/y)))' + symbol_names = 'Q' + symbol_values = '${Q}' + [] +[] + +[UserObjects] + [rc] + type = RhieChowMassFlux + u = vel_x + v = vel_y + pressure = pressure + rho = ${rho} + p_diffusion_kernel = p_diffusion + [] +[] + +[Variables] + [vel_x] + type = MooseLinearVariableFVReal + solver_sys = u_system + # initial_from_file_var = vel_x + # initial_from_file_timestep = LATEST + [] + [vel_y] + type = MooseLinearVariableFVReal + solver_sys = v_system + # initial_from_file_var = vel_y + # initial_from_file_timestep = LATEST + [] + [pressure] + type = MooseLinearVariableFVReal + initial_condition = 0 + solver_sys = pressure_system + # initial_from_file_var = pressure + # initial_from_file_timestep = LATEST + [] +[] + +[LinearFVKernels] + [u_time] + type = LinearFVTimeDerivative + variable = vel_x + factor = ${rho} + [] + [u_advection_stress] + type = LinearWCNSFVMomentumFlux + variable = vel_x + advected_interp_method = ${advected_interp_method} + mu = ${mu} + u = vel_x + v = vel_y + momentum_component = 'x' + rhie_chow_user_object = 'rc' + use_nonorthogonal_correction = true + [] + [u_pressure] + type = LinearFVMomentumPressure + variable = vel_x + pressure = pressure + momentum_component = 'x' + [] + + [v_time] + type = LinearFVTimeDerivative + variable = vel_y + factor = ${rho} + [] + [v_advection_stress] + type = LinearWCNSFVMomentumFlux + variable = vel_y + advected_interp_method = ${advected_interp_method} + mu = ${mu} + u = vel_x + v = vel_y + momentum_component = 'y' + rhie_chow_user_object = 'rc' + use_nonorthogonal_correction = true + [] + [v_pressure] + type = LinearFVMomentumPressure + variable = vel_y + pressure = pressure + momentum_component = 'y' + [] + + [p_diffusion] + type = LinearFVAnisotropicDiffusion + variable = pressure + diffusion_tensor = Ainv + use_nonorthogonal_correction = true + [] + [HbyA_divergence] + type = LinearFVDivergence + variable = pressure + face_flux = HbyA + force_boundary_execution = true + [] +[] + +[LinearFVBCs] + [inlet_x] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_x + boundary = 'left_boundary' + functor = 'inlet_function' + [] + [inlet_y] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_y + boundary = 'left_boundary' + functor = 0 + [] + [circle_x] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_x + boundary = 'circle' + functor = 0 + [] + [circle_y] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_y + boundary = 'circle' + functor = 0 + [] + [gap_x] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_x + boundary = 'top_gap bottom_gap' + functor = 'gap_x' + [] + [gap_y] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_y + boundary = 'top_gap bottom_gap' + functor = 'gap_y' + [] + [walls_x] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_x + boundary = 'top_boundary bottom_boundary' + functor = 0 + [] + [walls_y] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_y + boundary = 'top_boundary bottom_boundary' + functor = 0 + [] + [outlet_p] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + boundary = 'right_boundary' + variable = pressure + functor = 0 + [] + [outlet_u] + type = LinearFVAdvectionDiffusionOutflowBC + variable = vel_x + use_two_term_expansion = false + boundary = 'right_boundary' + [] + [outlet_v] + type = LinearFVAdvectionDiffusionOutflowBC + variable = vel_y + use_two_term_expansion = false + boundary = 'right_boundary' + [] +[] + +[Postprocessors] + [drag_force] + type = IntegralDirectedSurfaceForce + vel_x = vel_x + vel_y = vel_y + mu = ${mu} + pressure = pressure + principal_direction = '1 0 0' + boundary = 'circle' + outputs = none + execute_on = 'INITIAL TIMESTEP_END' + [] + [drag_coeff] + type = ParsedPostprocessor + expression = '2*drag_force/rho/(avgvel*avgvel)/D' + constant_names = 'rho avgvel D' + constant_expressions = '${rho} ${fparse 2/3*inlet_velocity} ${fparse 2*circle_radius}' + pp_names = 'drag_force' + execute_on = 'INITIAL TIMESTEP_END' + [] + [lift_force] + type = IntegralDirectedSurfaceForce + vel_x = vel_x + vel_y = vel_y + mu = ${mu} + pressure = pressure + principal_direction = '0 1 0' + boundary = 'circle' + outputs = none + execute_on = 'INITIAL TIMESTEP_END' + [] + [lift_coeff] + type = ParsedPostprocessor + expression = '2*lift_force/rho/(avgvel*avgvel)/D' + constant_names = 'rho avgvel D' + constant_expressions = '${rho} ${fparse 2/3*inlet_velocity} ${fparse 2*circle_radius}' + pp_names = 'lift_force' + execute_on = 'INITIAL TIMESTEP_END' + [] + [reward] + type = LiftDragRewardPostprocessor + lift = lift_coeff + drag = drag_coeff + averaging_window = 1 + coeff_1 = 0.2 + coeff_2 = 1.59 + execute_on = 'INITIAL TIMESTEP_END' + [] +[] + +[Executioner] + type = PIMPLE + momentum_l_abs_tol = 1e-7 + pressure_l_abs_tol = 1e-7 + momentum_l_tol = 1e-7 + pressure_l_tol = 1e-7 + rhie_chow_user_object = 'rc' + momentum_systems = 'u_system v_system' + pressure_system = 'pressure_system' + momentum_equation_relaxation = 0.9 + pressure_variable_relaxation = 0.6 + num_iterations = 100 + pressure_absolute_tolerance = 5e-6 + momentum_absolute_tolerance = 5e-6 + momentum_petsc_options_iname = '-pc_type -pc_hypre_type' + momentum_petsc_options_value = 'hypre boomeramg' + pressure_petsc_options_iname = '-pc_type -pc_hypre_type' + pressure_petsc_options_value = 'hypre boomeramg' + print_fields = false + continue_on_max_its = true + dt = 0.002 + num_steps = 50 +[] + +[Outputs] + exodus = true + csv = true +[] diff --git a/modules/combined/examples/stochastic/vortex_control/header.i b/modules/combined/examples/stochastic/vortex_control/header.i new file mode 100644 index 000000000000..27113960bfc5 --- /dev/null +++ b/modules/combined/examples/stochastic/vortex_control/header.i @@ -0,0 +1,40 @@ +# ----------------------------------------------------------------------------- +# Flow around a cylinder (2D) benchmark validation case +# This example showcases a flow around a cylinder which results in vortex +# shedding. The problem specification has been taken from the following paper: +# +# @incollection{schafer1996benchmark, +# title={Benchmark computations of laminar flow around a cylinder}, +# author={Sch{\"a}fer, Michael and Turek, Stefan and Durst, Franz and Krause, Egon and Rannacher, Rolf}, +# booktitle={Flow simulation with high-performance computers II}, +# pages={547--566}, +# year={1996}, +# publisher={Springer} +# } +# The Reyndols number is Re=100. +# The expected Strouhal number (St) is in the [0.2950, 0.3050] range, with +# refinement=8, we expect to get St=0.2941 with the model below. +# Run it using the following command: +# ./navier_stokes-opt -i header.i mesh.i flow_over_circle.i executioner_postprocessor.i +# ----------------------------------------------------------------------------- + +# Geometry parameters +circle_radius = 0.05 +pitch = 0.2 +x_min = -0.2 +x_max = 1.5 +y_min = -0.2 +y_max = 0.21 +rundoff = 1e-4 +refinement = 8 + +# Material properties +mu = 1e-3 +rho = 1 + +# Boundary conditions +inlet_velocity = 1.5 +Q = 0.0 + +# Numerical schemes +advected_interp_method = 'average' diff --git a/modules/combined/examples/stochastic/vortex_control/libtorch_drl_control_sub.i b/modules/combined/examples/stochastic/vortex_control/libtorch_drl_control_sub.i new file mode 100644 index 000000000000..eb7b672d202a --- /dev/null +++ b/modules/combined/examples/stochastic/vortex_control/libtorch_drl_control_sub.i @@ -0,0 +1,177 @@ +air_density = 1.184 # kg/m3 +air_cp = 1000 # J/(kg K) +air_effective_k = 0.5 # W/(m K) + +[Mesh] + [mesh] + type = GeneratedMeshGenerator + dim = 2 + xmin = 0.0 + xmax = 7.0 + ymin = 0.0 + ymax = 5.0 + nx = 10 + ny = 10 + [] +[] + +[Variables] + [T] + initial_condition = 297 + [] +[] + +[Kernels] + [time_derivative] + type = CoefTimeDerivative + variable = T + Coefficient = '${fparse air_density*air_cp}' + [] + [heat_conduction] + type = MatDiffusion + variable = T + diffusivity = 'k' + [] +[] + +[BCs] + [top_flux] + type = NeumannBC + value = 0.0 + boundary = 'top' + variable = T + [] + [dirichlet] + type = FunctionDirichletBC + function = temp_env + variable = T + boundary = 'left right' + [] +[] + +[Functions] + [temp_env] + type = ParsedFunction + value = '15.0*sin(t/86400.0*pi) + 273.0' + [] + [design_function] + type = ParsedFunction + value = '297' + [] + [reward_function] + type = ScaledAbsDifferenceDRLRewardFunction + design_function = design_function + observed_value = center_temp_tend + c1 = 1 + c2 = 10 + [] +[] + +[Materials] + [constant] + type = GenericConstantMaterial + prop_names = 'k' + prop_values = ${air_effective_k} + [] +[] + +[Postprocessors] + [center_temp] + type = PointValue + variable = T + point = '3.5 2.5 0.0' + execute_on = 'INITIAL TIMESTEP_BEGIN' + [] + [center_temp_tend] + type = PointValue + variable = T + point = '3.5 2.5 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [env_temp] + type = FunctionValuePostprocessor + function = temp_env + execute_on = 'INITIAL TIMESTEP_BEGIN' + [] + [reward] + type = FunctionValuePostprocessor + function = reward_function + execute_on = 'INITIAL TIMESTEP_END' + indirect_dependencies = 'center_temp_tend' + [] + [top_flux] + type = LibtorchControlValuePostprocessor + control_name = src_control + [] + [log_prob_top_flux] + type = LibtorchDRLLogProbabilityPostprocessor + control_name = src_control + [] +[] + +[Reporters] + [T_reporter] + type = AccumulateReporter + reporters = 'center_temp_tend/value reward/value top_flux/value log_prob_top_flux/value' + [] +[] + +[Controls] + inactive = 'src_control_final' + [src_control] + type = LibtorchDRLControl + parameters = "BCs/top_flux/value" + responses = 'center_temp_tend' + + # keep consistent with LibtorchDRLControlTrainer + input_timesteps = 1 + response_scaling_factors = '0.03' + response_shift_factors = '290' + action_standard_deviations = '0.02' + action_scaling_factors = 20 + + execute_on = 'TIMESTEP_BEGIN' + [] + [src_control_final] + type = LibtorchNeuralNetControl + + filename = 'mynet_control.net' + num_neurons_per_layer = '16 6' + activation_function = 'relu' + + parameters = "BCs/top_flux/value" + responses = 'center_temp_tend' + + # keep consistent with LibtorchDRLControlTrainer + input_timesteps = 1 + response_scaling_factors = '0.03' + response_shift_factors = '290' + action_standard_deviations = '0.01' + action_scaling_factors = 20 + + execute_on = 'TIMESTEP_BEGIN' + [] +[] + +[Executioner] + type = Transient + solve_type = 'NEWTON' + + petsc_options_iname = '-pc_type -pc_factor_shift_type' + petsc_options_value = 'lu NONZERO' + line_search = 'none' + + nl_rel_tol = 1e-7 + + start_time = 0.0 + end_time = 86400 + dt = ${fparse 86400/4} +[] + +[Outputs] + # console = false + [c] + type = JSON + execute_on = FINAL + [] +[] diff --git a/modules/combined/examples/stochastic/vortex_control/libtorch_drl_control_trainer.i b/modules/combined/examples/stochastic/vortex_control/libtorch_drl_control_trainer.i new file mode 100644 index 000000000000..52b2262b5bde --- /dev/null +++ b/modules/combined/examples/stochastic/vortex_control/libtorch_drl_control_trainer.i @@ -0,0 +1,88 @@ +[StochasticTools] +[] + +[Samplers] + [dummy] + type = CartesianProduct + linear_space_items = '0 0.01 2' + [] +[] + +[MultiApps] + [runner] + type = SamplerFullSolveMultiApp + sampler = dummy + input_files = 'libtorch_drl_control_sub.i' + [] +[] + +[Transfers] + [nn_transfer] + type = LibtorchNeuralNetControlTransfer + to_multi_app = runner + trainer_name = nn_trainer + control_name = src_control + [] + [r_transfer] + type = SamplerReporterTransfer + from_multi_app = runner + sampler = dummy + stochastic_reporter = storage + from_reporter = 'T_reporter/center_temp_tend:value T_reporter/reward:value T_reporter/top_flux:value T_reporter/log_prob_top_flux:value' + [] +[] + +[Trainers] + [nn_trainer] + type = LibtorchDRLControlTrainer + response = 'storage/r_transfer:T_reporter:center_temp_tend:value' + control = 'storage/r_transfer:T_reporter:top_flux:value' + log_probability = 'storage/r_transfer:T_reporter:log_prob_top_flux:value' + reward = 'storage/r_transfer:T_reporter:reward:value' + + num_epochs = 400 + update_frequency = 1 + decay_factor = 0.8 + + loss_print_frequency = 40 + + critic_learning_rate = 0.0005 + num_critic_neurons_per_layer = '32 16' + + control_learning_rate = 0.0005 + num_control_neurons_per_layer = '16 6' + + # keep consistent with LibtorchNeuralNetControl + input_timesteps = 1 + response_scaling_factors = '0.03' + response_shift_factors = '290' + action_standard_deviations = '0.01' + + standardize_advantage = true + + read_from_file = false + [] +[] + +[Reporters] + [storage] + type = StochasticReporter + parallel_type = ROOT + [] + # [reward] + # type = DRLRewardReporter + # drl_trainer_name = nn_trainer + # [] +[] + +[Executioner] + type = Transient + num_steps = 1 +[] + +[Outputs] + file_base = output/train_out + json = true + time_step_interval = 1 + execute_on = TIMESTEP_END +[] diff --git a/modules/combined/examples/stochastic/vortex_control/mesh.i b/modules/combined/examples/stochastic/vortex_control/mesh.i new file mode 100644 index 000000000000..2d962b51c9d2 --- /dev/null +++ b/modules/combined/examples/stochastic/vortex_control/mesh.i @@ -0,0 +1,240 @@ +[Mesh] + # ------------------------------------------ + # Middle layer + # ------------------------------------------ + [ccmg] + type = ConcentricCircleMeshGenerator + num_sectors = '${fparse refinement*2}' + radii = '${circle_radius} ${fparse 1.2*circle_radius}' + rings = '4 ${refinement} ${refinement}' + has_outer_square = on + pitch = ${pitch} + preserve_volumes = off + smoothing_max_it = 2 + [] + [in_between] + type = SideSetsBetweenSubdomainsGenerator + input = ccmg + primary_block = 2 + paired_block = 1 + new_boundary = 'circle' + [] + [delete] + type = BlockDeletionGenerator + input = in_between + block = '1' + [] + [final_ccmg] + type = RenameBlockGenerator + input = delete + old_block = '2 3' + new_block = '0 0' + [] + [left] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${x_min}' + xmax = '${fparse -pitch/2}' + ymin = '${fparse -pitch/2}' + ymax = '${fparse pitch/2}' + nx = '${fparse refinement*2}' + ny = '${fparse refinement*4+2}' + [] + [right] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${fparse pitch/2}' + xmax = '${x_max}' + ymin = '${fparse -pitch/2}' + ymax = '${fparse pitch/2}' + nx = '${fparse refinement*30}' + ny = '${fparse refinement*4+2}' + [] + [combined_middle] + type = StitchedMeshGenerator + inputs = 'final_ccmg left right' + stitch_boundaries_pairs = 'left right; right left' + clear_stitched_boundary_ids = false + prevent_boundary_ids_overlap = true + merge_boundaries_with_same_name = true + [] + + [middle_top_sideset] + input = combined_middle + type = ParsedGenerateSideset + combinatorial_geometry = 'y > ${fparse pitch/2-rundoff}' + normal = '0 1 0' + new_sideset_name = 'middle_top' + [] + [middle_bottom_sideset] + input = middle_top_sideset + type = ParsedGenerateSideset + combinatorial_geometry = 'y < ${fparse -pitch/2+rundoff}' + normal = '0 -1 0' + new_sideset_name = 'middle_bottom' + [] + # ------------------------------------------ + # Top layer + # ------------------------------------------ + [top_left_block] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${x_min}' + xmax = '${fparse -pitch/2}' + ymin = '${fparse pitch/2}' + ymax = '${y_max}' + nx = '${fparse refinement*2}' + ny = '${fparse refinement*2+1}' + [] + [top_middle_block] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${fparse -pitch/2}' + xmax = '${fparse pitch/2}' + ymin = '${fparse pitch/2}' + ymax = '${y_max}' + nx = '${fparse refinement*4+2}' + ny = '${fparse refinement*2+1}' + [] + [top_right_block] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${fparse pitch/2}' + xmax = '${x_max}' + ymin = '${fparse pitch/2}' + ymax = '${y_max}' + nx = '${fparse refinement*30}' + ny = '${fparse refinement*2+1}' + [] + [combined_top] + type = StitchedMeshGenerator + inputs = 'top_middle_block top_left_block top_right_block' + stitch_boundaries_pairs = 'left right; right left' + prevent_boundary_ids_overlap = true + merge_boundaries_with_same_name = true + [] + [top_bottom_sideset] + input = combined_top + type = ParsedGenerateSideset + combinatorial_geometry = 'y < ${fparse pitch/2+rundoff}' + normal = '0 -1 0' + new_sideset_name = 'top_bottom' + [] + [combined_middle_top] + type = StitchedMeshGenerator + inputs = 'top_bottom_sideset middle_bottom_sideset' + stitch_boundaries_pairs = 'top_bottom middle_top' + clear_stitched_boundary_ids = false + prevent_boundary_ids_overlap = true + merge_boundaries_with_same_name = true + [] + [create_fused_top_sideset] + input = combined_middle_top + type = ParsedGenerateSideset + combinatorial_geometry = 'y > ${fparse y_max-rundoff}' + normal = '0 1 0' + new_sideset_name = 'top_boundary' + [] + # ------------------------------------------ + # Bottom layer + # ------------------------------------------ + [bottom_left_block] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${x_min}' + xmax = '${fparse -pitch/2}' + ymin = '${y_min}' + ymax = '${fparse -pitch/2}' + nx = '${fparse refinement*2}' + ny = '${fparse refinement*2}' + [] + [bottom_middle_block] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${fparse -pitch/2}' + xmax = '${fparse pitch/2}' + ymin = '${y_min}' + ymax = '${fparse -pitch/2}' + nx = '${fparse refinement*4+2}' + ny = '${fparse refinement*2}' + [] + [bottom_right_block] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${fparse pitch/2}' + xmax = '${x_max}' + ymin = '${y_min}' + ymax = '${fparse -pitch/2}' + nx = '${fparse refinement*30}' + ny = '${fparse refinement*2}' + [] + [combined_bottom] + type = StitchedMeshGenerator + inputs = 'bottom_middle_block bottom_left_block bottom_right_block' + stitch_boundaries_pairs = 'left right; right left' + prevent_boundary_ids_overlap = true + merge_boundaries_with_same_name = true + [] + [bottom_top_sideset] + input = combined_bottom + type = ParsedGenerateSideset + combinatorial_geometry = 'y > ${fparse -pitch/2-rundoff}' + normal = '0 1 0' + new_sideset_name = 'bottom_top' + [] + [combined_final] + type = StitchedMeshGenerator + inputs = 'create_fused_top_sideset bottom_top_sideset' + stitch_boundaries_pairs = 'middle_bottom bottom_top' + clear_stitched_boundary_ids = false + prevent_boundary_ids_overlap = true + merge_boundaries_with_same_name = true + [] + [create_fused_bottom_sideset] + input = combined_final + type = ParsedGenerateSideset + combinatorial_geometry = 'y < ${fparse y_min+rundoff}' + normal = '0 -1 0' + new_sideset_name = 'bottom_boundary' + [] + # ------------------------------------------ + # Left and right boundaries + # ------------------------------------------ + [create_fused_left_sideset] + input = create_fused_bottom_sideset + type = ParsedGenerateSideset + combinatorial_geometry = 'x < ${fparse x_min+rundoff}' + normal = '-1 0 0' + new_sideset_name = 'left_boundary' + [] + [create_fused_right_sideset] + input = create_fused_left_sideset + type = ParsedGenerateSideset + combinatorial_geometry = 'x > ${fparse x_max-rundoff}' + normal = '1 0 0' + new_sideset_name = 'right_boundary' + [] + [sideset_removal] + input = create_fused_right_sideset + type = BoundaryDeletionGenerator + boundary_names = 'bottom top left right middle_bottom middle_top bottom_top top_bottom' + [] + [new_boundaries_1] + type = SideSetsFromBoundingBoxGenerator + input = sideset_removal + bottom_left = '-0.008682409 0.049240388 0' + top_right = '0.008682409 0.5 0' + boundary_new = 'top_gap' + included_boundaries = 'circle' + replace = true + [] + [new_boundaries_2] + type = SideSetsFromBoundingBoxGenerator + input = new_boundaries_1 + bottom_left = '-0.008682409 -0.5 0' + top_right = '0.008682409 -0.049240388 0' + boundary_new = 'bottom_gap' + included_boundaries = 'circle' + replace = true + [] +[] From a45d92cdb993e239518fcdda0b20a557c177058e Mon Sep 17 00:00:00 2001 From: Peter German Date: Sun, 2 Feb 2025 15:52:01 -0700 Subject: [PATCH 10/51] Add option for smoothing signal. --- .../libtorch/controls/LibtorchDRLControl.h | 8 +++ .../libtorch/controls/LibtorchDRLControl.C | 50 +++++++++++++++---- 2 files changed, 49 insertions(+), 9 deletions(-) diff --git a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h index fbfccd9eafcf..335fec39f864 100644 --- a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h +++ b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h @@ -58,6 +58,14 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl /// Standard deviations converted to a 2D diagonal tensor that can be used by Libtorch routines. torch::Tensor _std; + + std::vector _previous_control_signal; + std::vector _current_smoothed_signal; + + unsigned int _call_counter; + const unsigned int _num_steps_in_period; + const Real _smoother; + }; #endif diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C index 040b585bcbf5..586600f7087e 100644 --- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C +++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C @@ -25,15 +25,25 @@ LibtorchDRLControl::validParams() "Reinforcement Learning (DRL) neural network trained using a PPO algorithm."); params.addRequiredParam>( "action_standard_deviations", "Standard deviation value used while sampling the actions."); + params.addParam("seed", "Seed for the random number generator."); + params.addParam("num_stems_in_period", 1, "Blabla"); + params.addParam("smoother", 1.0, "Blabla"); + return params; } LibtorchDRLControl::LibtorchDRLControl(const InputParameters & parameters) : LibtorchNeuralNetControl(parameters), _current_control_signal_log_probabilities(std::vector(_control_names.size(), 0.0)), - _action_std(getParam>("action_standard_deviations")) + _action_std(getParam>("action_standard_deviations")), + _previous_control_signal(std::vector(_control_names.size(), 0.0)), + _current_smoothed_signal(std::vector(_control_names.size(), 0.0)), + _call_counter(0), + _num_steps_in_period(getParam("num_stems_in_period")), + _smoother(getParam("smoother")) + { if (_control_names.size() != _action_std.size()) paramError("action_standard_deviations", @@ -67,12 +77,15 @@ LibtorchDRLControl::execute() if (_old_responses.empty()) _old_responses.assign(num_old_timesteps, _current_response); - // Organize the old an current solution into a tensor so we can evaluate the neural net - torch::Tensor input_tensor = prepareInputTensor(); + if (_call_counter % _num_steps_in_period == 0) + { + // Organize the old an current solution into a tensor so we can evaluate the neural net + torch::Tensor input_tensor = prepareInputTensor(); - // Evaluate the neural network to get the expected control value - torch::Tensor output_tensor = _nn->forward(input_tensor); + // Evaluate the neural network to get the expected control value + torch::Tensor output_tensor = _nn->forward(input_tensor); +<<<<<<< HEAD // std::cout << "Input " << input_tensor << std::endl; // std::cout << "Output " << output_tensor << std::endl; @@ -83,23 +96,41 @@ LibtorchDRLControl::execute() // Compute log probability torch::Tensor log_probability = computeLogProbability(action, output_tensor); +======= + // Sample control value (action) from Gaussian distribution + torch::Tensor action = at::normal(output_tensor, _std); - // Convert data - _current_control_signals = {action.data_ptr(), action.data_ptr() + action.size(1)}; + // Compute log probability + torch::Tensor log_probability = computeLogProbability(action, output_tensor); +>>>>>>> 4b5d311c80 (Add option for smoothing signal.) + + _current_control_signals = {action.data_ptr(), action.data_ptr() + action.size(1)}; - _current_control_signal_log_probabilities = {log_probability.data_ptr(), + _current_control_signal_log_probabilities = {log_probability.data_ptr(), log_probability.data_ptr() + log_probability.size(1)}; + } + + // Convert data + _previous_control_signal = _current_smoothed_signal; + + + for (const auto i : index_range(_current_smoothed_signal)) + _current_smoothed_signal[i] = _previous_control_signal[i] + _smoother*(_current_control_signals[i] - _previous_control_signal[i]); + +<<<<<<< HEAD // std::cout << "Setting control signal to: " << Moose::stringify(_current_control_signals) << std::endl; // std::cout << "Setting log probability to: " << Moose::stringify(_current_control_signal_log_probabilities) << std::endl; +======= +>>>>>>> 4b5d311c80 (Add option for smoothing signal.) for (unsigned int control_i = 0; control_i < n_controls; ++control_i) { // We scale the controllable value for physically meaningful control action setControllableValueByName(_control_names[control_i], - _current_control_signals[control_i] * + _current_smoothed_signal[control_i] * _action_scaling_factors[control_i]); } @@ -110,6 +141,7 @@ LibtorchDRLControl::execute() std::rotate(_old_responses.rbegin(), _old_responses.rbegin() + 1, _old_responses.rend()); _old_responses[0] = _current_response; } + _call_counter++; } } From 4f06dcd7e58d93d23952a8aaf4d88b0019061387 Mon Sep 17 00:00:00 2001 From: Peter German Date: Sun, 2 Feb 2025 15:52:40 -0700 Subject: [PATCH 11/51] Adopt training for vortex shedding. --- .../flow_over_circle_linearfv.i | 115 +++++++++++++++--- .../stochastic/vortex_control/header.i | 1 - .../libtorch_drl_control_trainer.i | 88 -------------- .../stochastic/vortex_control/trainer.i | 91 ++++++++++++++ 4 files changed, 186 insertions(+), 109 deletions(-) delete mode 100644 modules/combined/examples/stochastic/vortex_control/libtorch_drl_control_trainer.i create mode 100644 modules/combined/examples/stochastic/vortex_control/trainer.i diff --git a/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i b/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i index bb7a0847aad9..73b3cbc10369 100644 --- a/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i +++ b/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i @@ -1,10 +1,12 @@ -# [Mesh] -# [fmg] -# type = FileMeshGenerator -# file = flow_over_circle_linearfv_out_orig.e -# use_for_exodus_restart = true -# [] -# [] +!include header.i + +[Mesh] + [fmg] + type = FileMeshGenerator + file = flow_over_circle_linearfv_out_orig.e + use_for_exodus_restart = true + [] +[] [Problem] linear_sys_names = 'u_system v_system pressure_system' @@ -22,13 +24,13 @@ type = ParsedFunction expression = 'Q*x/(x^2+y^2)*abs(cos(pi/(20/180*pi)*atan(x/y)))' symbol_names = 'Q' - symbol_values = '${Q}' + symbol_values = 'Q_signal' [] [gap_y] type = ParsedFunction expression = 'if(y>0,Q,-Q)*y/(x^2+y^2)*abs(cos(pi/(20/180*pi)*atan(x/y)))' symbol_names = 'Q' - symbol_values = '${Q}' + symbol_values = 'Q_signal' [] [] @@ -47,21 +49,21 @@ [vel_x] type = MooseLinearVariableFVReal solver_sys = u_system - # initial_from_file_var = vel_x - # initial_from_file_timestep = LATEST + initial_from_file_var = vel_x + initial_from_file_timestep = LATEST [] [vel_y] type = MooseLinearVariableFVReal solver_sys = v_system - # initial_from_file_var = vel_y - # initial_from_file_timestep = LATEST + initial_from_file_var = vel_y + initial_from_file_timestep = LATEST [] [pressure] type = MooseLinearVariableFVReal - initial_condition = 0 + # initial_condition = 0 solver_sys = pressure_system - # initial_from_file_var = pressure - # initial_from_file_timestep = LATEST + initial_from_file_var = pressure + initial_from_file_timestep = LATEST [] [] @@ -243,6 +245,74 @@ coeff_2 = 1.59 execute_on = 'INITIAL TIMESTEP_END' [] + [p1] + type = PointValue + variable = pressure + point = '0 0.07 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p2] + type = PointValue + variable = pressure + point = '0 -0.07 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p3] + type = PointValue + variable = pressure + point = '0.075 0.1 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p4] + type = PointValue + variable = pressure + point = '0.075 0.0 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p5] + type = PointValue + variable = pressure + point = '0.075 -0.1 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [Q_signal] + type = ConstantPostprocessor + value = 0.0 + [] + [Q] + type = LibtorchControlValuePostprocessor + control_name = src_control + [] + [log_prob_Q] + type = LibtorchDRLLogProbabilityPostprocessor + control_name = src_control + [] +[] + +[Reporters] + [results] + type = AccumulateReporter + reporters = 'p1/value p2/value p3/value p4/value p5/value reward/value Q/value log_prob_Q/value' + [] +[] + +[Controls] + [src_control] + type = LibtorchDRLControl + parameters = "Postprocessors/Q_signal/value" + responses = 'p1 p2 p3 p4 p5' + + # keep consistent with LibtorchDRLControlTrainer + input_timesteps = 1 + response_scaling_factors = '0.4 0.4 0.4 0.4 0.4' + response_shift_factors = '-0.4 -0.4 -0.4 -0.4 -0.4' + action_standard_deviations = '0.02' + action_scaling_factors = 0.01 + + execute_on = 'TIMESTEP_BEGIN' + smoother = 0.1 + num_stems_in_period = 25 + [] [] [Executioner] @@ -265,11 +335,16 @@ pressure_petsc_options_value = 'hypre boomeramg' print_fields = false continue_on_max_its = true - dt = 0.002 - num_steps = 50 + dt = 0.001 + num_steps = 500 [] [Outputs] - exodus = true - csv = true + # exodus = true + [json] + type = JSON + execute_on = final + [] + # console = false + # execute_on = FINAL [] diff --git a/modules/combined/examples/stochastic/vortex_control/header.i b/modules/combined/examples/stochastic/vortex_control/header.i index 27113960bfc5..6e5236531616 100644 --- a/modules/combined/examples/stochastic/vortex_control/header.i +++ b/modules/combined/examples/stochastic/vortex_control/header.i @@ -34,7 +34,6 @@ rho = 1 # Boundary conditions inlet_velocity = 1.5 -Q = 0.0 # Numerical schemes advected_interp_method = 'average' diff --git a/modules/combined/examples/stochastic/vortex_control/libtorch_drl_control_trainer.i b/modules/combined/examples/stochastic/vortex_control/libtorch_drl_control_trainer.i deleted file mode 100644 index 52b2262b5bde..000000000000 --- a/modules/combined/examples/stochastic/vortex_control/libtorch_drl_control_trainer.i +++ /dev/null @@ -1,88 +0,0 @@ -[StochasticTools] -[] - -[Samplers] - [dummy] - type = CartesianProduct - linear_space_items = '0 0.01 2' - [] -[] - -[MultiApps] - [runner] - type = SamplerFullSolveMultiApp - sampler = dummy - input_files = 'libtorch_drl_control_sub.i' - [] -[] - -[Transfers] - [nn_transfer] - type = LibtorchNeuralNetControlTransfer - to_multi_app = runner - trainer_name = nn_trainer - control_name = src_control - [] - [r_transfer] - type = SamplerReporterTransfer - from_multi_app = runner - sampler = dummy - stochastic_reporter = storage - from_reporter = 'T_reporter/center_temp_tend:value T_reporter/reward:value T_reporter/top_flux:value T_reporter/log_prob_top_flux:value' - [] -[] - -[Trainers] - [nn_trainer] - type = LibtorchDRLControlTrainer - response = 'storage/r_transfer:T_reporter:center_temp_tend:value' - control = 'storage/r_transfer:T_reporter:top_flux:value' - log_probability = 'storage/r_transfer:T_reporter:log_prob_top_flux:value' - reward = 'storage/r_transfer:T_reporter:reward:value' - - num_epochs = 400 - update_frequency = 1 - decay_factor = 0.8 - - loss_print_frequency = 40 - - critic_learning_rate = 0.0005 - num_critic_neurons_per_layer = '32 16' - - control_learning_rate = 0.0005 - num_control_neurons_per_layer = '16 6' - - # keep consistent with LibtorchNeuralNetControl - input_timesteps = 1 - response_scaling_factors = '0.03' - response_shift_factors = '290' - action_standard_deviations = '0.01' - - standardize_advantage = true - - read_from_file = false - [] -[] - -[Reporters] - [storage] - type = StochasticReporter - parallel_type = ROOT - [] - # [reward] - # type = DRLRewardReporter - # drl_trainer_name = nn_trainer - # [] -[] - -[Executioner] - type = Transient - num_steps = 1 -[] - -[Outputs] - file_base = output/train_out - json = true - time_step_interval = 1 - execute_on = TIMESTEP_END -[] diff --git a/modules/combined/examples/stochastic/vortex_control/trainer.i b/modules/combined/examples/stochastic/vortex_control/trainer.i new file mode 100644 index 000000000000..3a4105a2b8bd --- /dev/null +++ b/modules/combined/examples/stochastic/vortex_control/trainer.i @@ -0,0 +1,91 @@ +[StochasticTools] +[] + +[Samplers] + [dummy] + type = CartesianProduct + linear_space_items = '0 0.01 1' + [] +[] + +[MultiApps] + [runner] + type = SamplerFullSolveMultiApp + sampler = dummy + input_files = 'flow_over_circle_linearfv.i' + mode= batch-reset + [] +[] + +[Transfers] + [nn_transfer] + type = LibtorchNeuralNetControlTransfer + to_multi_app = runner + trainer_name = nn_trainer + control_name = src_control + execute_on = TIMESTEP_BEGIN + [] + [r_transfer] + type = SamplerReporterTransfer + from_multi_app = runner + sampler = dummy + stochastic_reporter = storage + from_reporter = 'results/p1:value results/p2:value results/p3:value results/p4:value results/p5:value results/reward:value results/Q:value results/log_prob_Q:value' + [] +[] + +[Trainers] + [nn_trainer] + type = LibtorchDRLControlTrainer + response = 'storage/r_transfer:results:p1:value storage/r_transfer:results:p2:value storage/r_transfer:results:p3:value storage/r_transfer:results:p4:value storage/r_transfer:results:p5:value' + control = 'storage/r_transfer:results:Q:value' + log_probability = 'storage/r_transfer:results:log_prob_Q:value' + reward = 'storage/r_transfer:results:reward:value' + + num_epochs = 400 + update_frequency = 1 + decay_factor = 0.99 + + loss_print_frequency = 10 + + critic_learning_rate = 0.0005 + num_critic_neurons_per_layer = '64 32' + + control_learning_rate = 0.0005 + num_control_neurons_per_layer = '64 32' + + # keep consistent with LibtorchNeuralNetControl + input_timesteps = 1 + + response_scaling_factors = '0.4 0.4 0.4 0.4 0.4' + response_shift_factors = '-0.4 -0.4 -0.4 -0.4 -0.4' + action_standard_deviations = '0.02' + + standardize_advantage = true + + read_from_file = false + [] +[] + +[Reporters] + [storage] + type = StochasticReporter + parallel_type = ROOT + [] + [reward] + type = DRLRewardReporter + drl_trainer_name = nn_trainer + [] +[] + +[Executioner] + type = Transient + num_steps = 20 +[] + +[Outputs] + file_base = output/train_out + json = true + time_step_interval = 1 + execute_on = TIMESTEP_END +[] From 3a0a78df91f593d75fc3f68a069f47eeaffef386 Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 4 Feb 2025 18:47:11 -0700 Subject: [PATCH 12/51] Add run-ready files. --- .../LiftDragRewardPostprocessor.h | 2 - .../LiftDragRewardPostprocessor.C | 8 ++-- .../flow_over_circle_linearfv.i | 23 ++++++----- .../stochastic/vortex_control/trainer.i | 26 +++++++------ .../libtorch/controls/LibtorchDRLControl.h | 3 ++ .../libtorch/controls/LibtorchDRLControl.C | 38 +++++++++++-------- 6 files changed, 57 insertions(+), 43 deletions(-) diff --git a/framework/include/postprocessors/LiftDragRewardPostprocessor.h b/framework/include/postprocessors/LiftDragRewardPostprocessor.h index a122d66d31fb..96237b4f120f 100644 --- a/framework/include/postprocessors/LiftDragRewardPostprocessor.h +++ b/framework/include/postprocessors/LiftDragRewardPostprocessor.h @@ -41,6 +41,4 @@ class LiftDragRewardPostprocessor : public GeneralPostprocessor unsigned int _replace_counter; - - }; diff --git a/framework/src/postprocessors/LiftDragRewardPostprocessor.C b/framework/src/postprocessors/LiftDragRewardPostprocessor.C index 7ad86dd830f0..b82572da48da 100644 --- a/framework/src/postprocessors/LiftDragRewardPostprocessor.C +++ b/framework/src/postprocessors/LiftDragRewardPostprocessor.C @@ -26,8 +26,8 @@ LiftDragRewardPostprocessor::validParams() params.addRequiredParam("drag","Drag coeff"); params.addParam("averaging_window", 1, "The window"); - params.addParam("coeff_1", 0.2, "Coeff 1"); - params.addParam("coeff_2", 1.59, "Coeff 2"); + params.addParam("coeff_1", 1.59, "Coeff 1"); + params.addParam("coeff_2", 0.2, "Coeff 2"); params.addClassDescription("Blabla."); @@ -52,7 +52,7 @@ LiftDragRewardPostprocessor::LiftDragRewardPostprocessor(const InputParameters & Real LiftDragRewardPostprocessor::getValue() const { - return _coeff_1 + _avg_drag - _coeff_2*std::abs(_avg_lift); + return _coeff_1 - _avg_drag - _coeff_2*_avg_lift; } void @@ -61,7 +61,7 @@ LiftDragRewardPostprocessor::execute() auto rolling_index = _replace_counter % _averaging_window; auto normalization = std::min(_replace_counter + 1, _averaging_window); - _lift_history[rolling_index] = _lift; + _lift_history[rolling_index] = std::abs(_lift); _drag_history[rolling_index] = _drag; _avg_lift = std::reduce(_lift_history.begin(), _lift_history.end())/normalization; diff --git a/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i b/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i index 73b3cbc10369..2c98ca23be5d 100644 --- a/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i +++ b/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i @@ -240,9 +240,9 @@ type = LiftDragRewardPostprocessor lift = lift_coeff drag = drag_coeff - averaging_window = 1 - coeff_1 = 0.2 - coeff_2 = 1.59 + averaging_window = 200 + coeff_1 = 2.0 + coeff_2 = 0.2 execute_on = 'INITIAL TIMESTEP_END' [] [p1] @@ -306,11 +306,14 @@ input_timesteps = 1 response_scaling_factors = '0.4 0.4 0.4 0.4 0.4' response_shift_factors = '-0.4 -0.4 -0.4 -0.4 -0.4' - action_standard_deviations = '0.02' - action_scaling_factors = 0.01 + action_standard_deviations = '0.01' + action_scaling_factors = 0.2 + + maximum_actions = 0.108 + minimum_actions = -0.108 execute_on = 'TIMESTEP_BEGIN' - smoother = 0.1 + smoother = 0.2 num_stems_in_period = 25 [] [] @@ -325,7 +328,7 @@ momentum_systems = 'u_system v_system' pressure_system = 'pressure_system' momentum_equation_relaxation = 0.9 - pressure_variable_relaxation = 0.6 + pressure_variable_relaxation = 0.55 num_iterations = 100 pressure_absolute_tolerance = 5e-6 momentum_absolute_tolerance = 5e-6 @@ -336,15 +339,15 @@ print_fields = false continue_on_max_its = true dt = 0.001 - num_steps = 500 + num_steps = 2000 [] [Outputs] - # exodus = true + exodus = true [json] type = JSON execute_on = final [] - # console = false + console = false # execute_on = FINAL [] diff --git a/modules/combined/examples/stochastic/vortex_control/trainer.i b/modules/combined/examples/stochastic/vortex_control/trainer.i index 3a4105a2b8bd..8dc702435077 100644 --- a/modules/combined/examples/stochastic/vortex_control/trainer.i +++ b/modules/combined/examples/stochastic/vortex_control/trainer.i @@ -4,7 +4,9 @@ [Samplers] [dummy] type = CartesianProduct - linear_space_items = '0 0.01 1' + linear_space_items = '0 0.01 4' + min_procs_per_row = 7 + max_procs_per_row = 7 [] [] @@ -13,17 +15,19 @@ type = SamplerFullSolveMultiApp sampler = dummy input_files = 'flow_over_circle_linearfv.i' - mode= batch-reset + mode = batch-reset + min_procs_per_app = 7 + max_procs_per_app = 7 [] [] [Transfers] [nn_transfer] - type = LibtorchNeuralNetControlTransfer + type = SamplerNeuralNetControlTransfer to_multi_app = runner trainer_name = nn_trainer control_name = src_control - execute_on = TIMESTEP_BEGIN + sampler = dummy [] [r_transfer] type = SamplerReporterTransfer @@ -42,16 +46,16 @@ log_probability = 'storage/r_transfer:results:log_prob_Q:value' reward = 'storage/r_transfer:results:reward:value' - num_epochs = 400 + num_epochs = 50 update_frequency = 1 - decay_factor = 0.99 + decay_factor = 0.98 loss_print_frequency = 10 - critic_learning_rate = 0.0005 + critic_learning_rate = 0.001 num_critic_neurons_per_layer = '64 32' - control_learning_rate = 0.0005 + control_learning_rate = 0.001 num_control_neurons_per_layer = '64 32' # keep consistent with LibtorchNeuralNetControl @@ -59,7 +63,7 @@ response_scaling_factors = '0.4 0.4 0.4 0.4 0.4' response_shift_factors = '-0.4 -0.4 -0.4 -0.4 -0.4' - action_standard_deviations = '0.02' + action_standard_deviations = '0.01' standardize_advantage = true @@ -71,6 +75,7 @@ [storage] type = StochasticReporter parallel_type = ROOT + outputs = none [] [reward] type = DRLRewardReporter @@ -80,12 +85,11 @@ [Executioner] type = Transient - num_steps = 20 + num_steps = 300 [] [Outputs] file_base = output/train_out json = true - time_step_interval = 1 execute_on = TIMESTEP_END [] diff --git a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h index 335fec39f864..2e2fa371679b 100644 --- a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h +++ b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h @@ -66,6 +66,9 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl const unsigned int _num_steps_in_period; const Real _smoother; + std::vector _maximum_actions; + std::vector _minimum_actions; + }; #endif diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C index 586600f7087e..d22185647d7f 100644 --- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C +++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C @@ -30,6 +30,8 @@ LibtorchDRLControl::validParams() params.addParam("num_stems_in_period", 1, "Blabla"); params.addParam("smoother", 1.0, "Blabla"); + params.addParam>("maximum_actions", {}, "The maximum actions"); + params.addParam>("minimum_actions", {}, "The minimum actions"); return params; } @@ -42,7 +44,9 @@ LibtorchDRLControl::LibtorchDRLControl(const InputParameters & parameters) _current_smoothed_signal(std::vector(_control_names.size(), 0.0)), _call_counter(0), _num_steps_in_period(getParam("num_stems_in_period")), - _smoother(getParam("smoother")) + _smoother(getParam("smoother")), + _maximum_actions(isParamSetByUser("maximum_actions") ? getParam>("maximum_actions") : std::vector(_control_names.size(), std::numeric_limits::max())), + _minimum_actions(isParamSetByUser("minimum_actions") ? getParam>("minimum_actions") : std::vector(_control_names.size(), -std::numeric_limits::max())) { if (_control_names.size() != _action_std.size()) @@ -59,6 +63,18 @@ LibtorchDRLControl::LibtorchDRLControl(const InputParameters & parameters) _std = torch::eye(_control_names.size()); for (unsigned int i = 0; i < _control_names.size(); ++i) _std[i][i] = _action_std[i]; + + if (isParamSetByUser("maximum_actions")) + { + for (const auto i : index_range(_maximum_actions)) + _maximum_actions[i] = _maximum_actions[i]/_action_scaling_factors[i]; + } + + if (isParamSetByUser("minimum_actions")) + { + for (const auto i : index_range(_minimum_actions)) + _minimum_actions[i] = _minimum_actions[i]/_action_scaling_factors[i]; + } } void @@ -85,27 +101,20 @@ LibtorchDRLControl::execute() // Evaluate the neural network to get the expected control value torch::Tensor output_tensor = _nn->forward(input_tensor); -<<<<<<< HEAD - // std::cout << "Input " << input_tensor << std::endl; - // std::cout << "Output " << output_tensor << std::endl; - - // Sample control value (action) from Gaussian distribution - torch::Tensor action = at::normal(output_tensor, _std); + // std::cout << "Input " << input_tensor << std::endl; + // std::cout << "Output " << output_tensor << std::endl; - // std::cout << "Action " << action << std::endl; - - // Compute log probability - torch::Tensor log_probability = computeLogProbability(action, output_tensor); -======= // Sample control value (action) from Gaussian distribution torch::Tensor action = at::normal(output_tensor, _std); // Compute log probability torch::Tensor log_probability = computeLogProbability(action, output_tensor); ->>>>>>> 4b5d311c80 (Add option for smoothing signal.) _current_control_signals = {action.data_ptr(), action.data_ptr() + action.size(1)}; + for (const auto i : index_range(_current_control_signals)) + _current_control_signals[i] = std::min(std::max(_current_control_signals[i], _minimum_actions[i]), _maximum_actions[i]); + _current_control_signal_log_probabilities = {log_probability.data_ptr(), log_probability.data_ptr() + log_probability.size(1)}; @@ -119,11 +128,8 @@ LibtorchDRLControl::execute() _current_smoothed_signal[i] = _previous_control_signal[i] + _smoother*(_current_control_signals[i] - _previous_control_signal[i]); -<<<<<<< HEAD // std::cout << "Setting control signal to: " << Moose::stringify(_current_control_signals) << std::endl; // std::cout << "Setting log probability to: " << Moose::stringify(_current_control_signal_log_probabilities) << std::endl; -======= ->>>>>>> 4b5d311c80 (Add option for smoothing signal.) for (unsigned int control_i = 0; control_i < n_controls; ++control_i) { From b17f8316b54e4c677c2e8352f4131fe79541ca5f Mon Sep 17 00:00:00 2001 From: Peter German Date: Sun, 16 Feb 2025 08:00:34 -0700 Subject: [PATCH 13/51] Add actor network --- .../controls/LibtorchNeuralNetControl.h | 2 +- .../libtorch/utils/LibtorchActorNeuralNet.h | 108 ++++++++++++++ .../utils/LibtorchArtificialNeuralNet.h | 19 ++- .../libtorch/utils/LibtorchActorNeuralNet.C | 140 ++++++++++++++++++ .../utils/LibtorchArtificialNeuralNet.C | 92 ++++++++++-- .../LiftDragRewardPostprocessor.C | 4 +- framework/src/postprocessors/PointValue.C | 1 - .../flow_over_circle_linearfv.i | 111 ++++++++++---- .../stochastic/vortex_control/trainer.i | 52 ++++--- .../libtorch/controls/LibtorchDRLControl.h | 16 +- .../surrogates/LibtorchDRLControlTrainer.h | 15 +- .../libtorch/controls/LibtorchDRLControl.C | 122 ++++++++++----- .../trainers/LibtorchDRLControlTrainer.C | 137 +++++++++-------- 13 files changed, 639 insertions(+), 180 deletions(-) create mode 100644 framework/include/libtorch/utils/LibtorchActorNeuralNet.h create mode 100644 framework/src/libtorch/utils/LibtorchActorNeuralNet.C diff --git a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h index b6229008cce3..8a04700a2b23 100644 --- a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h +++ b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h @@ -48,7 +48,7 @@ class LibtorchNeuralNetControl : public Control * when copying the neural network from a main app which trains it. * @param input_nn Reference to a neural network which will be copied into this object */ - void loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn); + virtual void loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn); /// Return a reference to the stored neural network const Moose::LibtorchNeuralNetBase & controlNeuralNet() const; diff --git a/framework/include/libtorch/utils/LibtorchActorNeuralNet.h b/framework/include/libtorch/utils/LibtorchActorNeuralNet.h new file mode 100644 index 000000000000..cdc83b7fb815 --- /dev/null +++ b/framework/include/libtorch/utils/LibtorchActorNeuralNet.h @@ -0,0 +1,108 @@ +//* This file is part of the MOOSE framework +//* https://www.mooseframework.org +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef LIBTORCH_ENABLED + +#pragma once + +#include +#include +#include "LibtorchArtificialNeuralNet.h" + +namespace Moose +{ + +class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet +{ +public: + /** + * Construct using input parameters + * @param name Name of the neural network + * @param num_inputs The number of input neurons/parameters + * @param num_neurons_per_layer Number of neurons per hidden layer + * @param num_outputs The number of output neurons + */ + LibtorchActorNeuralNet(const std::string name, + const unsigned int num_inputs, + const unsigned int num_outputs, + const std::vector & num_neurons_per_layer, + const std::vector & std, + const std::vector & activation_function = {"relu"}, + const std::vector & minimum_values = {}, + const std::vector & maximum_values = {}, + const torch::DeviceType device_type = torch::kCPU, + const torch::ScalarType scalar_type = torch::kDouble, + const bool build_on_construct = true); + + /** + * Copy construct an artificial neural network + * @param nn The neural network which needs to be copied + */ + LibtorchActorNeuralNet(const Moose::LibtorchActorNeuralNet & nn, + const bool build_on_construct = true); + + /** + * Overriding the forward substitution function for the neural network, unfortunately + * this cannot be const since it creates a graph in the background + * @param x Input tensor for the evaluation + */ + virtual torch::Tensor forward(torch::Tensor & x) override; + + /// Construct the neural network + virtual void constructNeuralNetwork() override; + + const std::vector & std() const {return _std;}; + + const torch::Tensor & stdTensor() const {return _std_tensor;} + + torch::Tensor computeLogProbability(const torch::Tensor & action, + const torch::Tensor & signal); + + torch::Tensor logProbability() {return _log_probability;} + torch::Tensor logProbability(torch::Tensor other) {return computeLogProbability(_mean, other);} + + torch::Tensor entropy(); + +protected: + const std::vector & _std; + + torch::Tensor _std_tensor; + + torch::Tensor _mean; + torch::Tensor _log_probability; +}; + +void to_json(nlohmann::json & json, const Moose::LibtorchActorNeuralNet * const & network); + +} + +template <> +void dataStore( + std::ostream & stream, + std::shared_ptr & nn, + void * context); + +template <> +void dataLoad( + std::istream & stream, + std::shared_ptr & nn, + void * context); + +// This is needed because the reporter which is used to ouput the neural net parameters to JSON +// requires a dataStore/dataLoad. However, these functions will be empty due to the fact that +// we are only interested in the JSON output and we don't want to output everything +template <> +void dataStore( + std::ostream & stream, Moose::LibtorchActorNeuralNet const *& nn, void * context); + +template <> +void dataLoad( + std::istream & stream, Moose::LibtorchActorNeuralNet const *& nn, void * context); + +#endif diff --git a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h index c995d61ed7d1..925a5306f449 100644 --- a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h +++ b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h @@ -38,14 +38,18 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu const unsigned int num_outputs, const std::vector & num_neurons_per_layer, const std::vector & activation_function = {"relu"}, + const std::vector & minimum_values = {}, + const std::vector & maximum_values = {}, const torch::DeviceType device_type = torch::kCPU, - const torch::ScalarType scalar_type = torch::kDouble); + const torch::ScalarType scalar_type = torch::kDouble, + const bool build_on_construct = true); /** * Copy construct an artificial neural network * @param nn The neural network which needs to be copied */ - LibtorchArtificialNeuralNet(const Moose::LibtorchArtificialNeuralNet & nn); + LibtorchArtificialNeuralNet(const Moose::LibtorchArtificialNeuralNet & nn, + const bool build_on_construct = true); /** * Add layers to the neural network @@ -80,7 +84,11 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu /// Return the data type which is used by this neural network torch::ScalarType dataType() const { return _data_type; } /// Construct the neural network - void constructNeuralNetwork(); + virtual void constructNeuralNetwork(); + + const std::vector & minValues() const {return _minimum_values;}; + + const std::vector & maxValues() const {return _maximum_values;}; /// Store the network architecture in a json file (for debugging, visualization) void store(nlohmann::json & json) const; @@ -104,6 +112,11 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu const torch::DeviceType _device_type; /// The data type used in this neural network const torch::ScalarType _data_type; + /// + const std::vector _minimum_values; + const std::vector _maximum_values; + torch::Tensor _min_tensor; + torch::Tensor _max_tensor; }; void to_json(nlohmann::json & json, const Moose::LibtorchArtificialNeuralNet * const & network); diff --git a/framework/src/libtorch/utils/LibtorchActorNeuralNet.C b/framework/src/libtorch/utils/LibtorchActorNeuralNet.C new file mode 100644 index 000000000000..aab8f47d4aa0 --- /dev/null +++ b/framework/src/libtorch/utils/LibtorchActorNeuralNet.C @@ -0,0 +1,140 @@ +//* This file is part of the MOOSE framework +//* https://www.mooseframework.org +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef LIBTORCH_ENABLED + +#include "LibtorchActorNeuralNet.h" +#include "MooseError.h" +#include "LibtorchUtils.h" + +namespace Moose +{ + +LibtorchActorNeuralNet::LibtorchActorNeuralNet( + const std::string name, + const unsigned int num_inputs, + const unsigned int num_outputs, + const std::vector & num_neurons_per_layer, + const std::vector & std, + const std::vector & activation_function, + const std::vector & minimum_values, + const std::vector & maximum_values, + const torch::DeviceType device_type, + const torch::ScalarType data_type, + const bool build_on_construct) + : + LibtorchArtificialNeuralNet(name, num_inputs, num_outputs, num_neurons_per_layer, + activation_function, minimum_values, maximum_values, + device_type, data_type, + false), + _std(std) +{ + if (build_on_construct) + constructNeuralNetwork(); +} + +LibtorchActorNeuralNet::LibtorchActorNeuralNet( + const Moose::LibtorchActorNeuralNet & nn, + const bool build_on_construct) + : LibtorchArtificialNeuralNet(dynamic_cast(nn), false), + _std(nn.std()) +{ + // We construct the NN architecture + if (build_on_construct) + { + constructNeuralNetwork(); + // We fill it up with the current parameter values + const auto & from_params = nn.named_parameters(); + auto to_params = this->named_parameters(); + for (unsigned int param_i : make_range(from_params.size())) + to_params[param_i].value().data() = from_params[param_i].value().data().clone(); + } +} + +void +LibtorchActorNeuralNet::constructNeuralNetwork() +{ + LibtorchArtificialNeuralNet::constructNeuralNetwork(); + + torch::Tensor std_tensor = torch::eye(_std.size()).to(_data_type); + for (unsigned int i = 0; i < _std.size(); ++i) + std_tensor[i][i] = _std[i]; + + _std_tensor = register_parameter("std", std_tensor); +} + +torch::Tensor +LibtorchActorNeuralNet::entropy() +{ + return 0.5*std::log(2*M_PI)+torch::log(_std_tensor)+0.5; +} + +torch::Tensor +LibtorchActorNeuralNet::forward(torch::Tensor & x) +{ + torch::Tensor output(x); + if (_data_type != output.scalar_type()) + output.to(_data_type); + if (_device_type != output.device().type()) + output.to(_device_type); + + for (unsigned int i = 0; i < _weights.size() - 1; ++i) + { + std::string activation = + _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0]; + if (activation == "relu") + output = torch::relu(_weights[i]->forward(output)); + else if (activation == "sigmoid") + output = torch::sigmoid(_weights[i]->forward(output)); + else if (activation == "tanh") + output = torch::tanh(_weights[i]->forward(output)); + else if (activation == "elu") + output = torch::elu(_weights[i]->forward(output)); + else if (activation == "gelu") + output = torch::gelu(_weights[i]->forward(output)); + else if (activation == "linear") + output = _weights[i]->forward(output); + } + + if (_minimum_values.size()) + { + output = torch::sigmoid(_weights[_weights.size() - 1]->forward(output)); + torch::Tensor scale = torch::sub(_max_tensor, _min_tensor).to(_data_type); + output = torch::mul(output, scale); + output = output + _min_tensor; + } + else + { + output = _weights[_weights.size() - 1]->forward(output); + } + + _mean = output; + auto action = at::normal(output, _std_tensor); + _log_probability = computeLogProbability(action, output); + + output = torch::clamp(_min_tensor, _max_tensor, action); + + return output; +} + +torch::Tensor +LibtorchActorNeuralNet::computeLogProbability(const torch::Tensor & action, + const torch::Tensor & signal) +{ + // Logarithmic probability of taken action, given the current distribution. + torch::Tensor var = torch::matmul(_std_tensor, _std_tensor); + + return -((action - signal) * (action - signal)) / (2.0 * var) - 0.5*torch::log(var) - + 0.5*std::log(2.0 * M_PI); +} + +} + + +#endif diff --git a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C index 58fb40e33cf6..d8da06d109a5 100644 --- a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C +++ b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C @@ -11,6 +11,7 @@ #include "LibtorchArtificialNeuralNet.h" #include "MooseError.h" +#include "LibtorchUtils.h" namespace Moose { @@ -21,15 +22,20 @@ LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( const unsigned int num_outputs, const std::vector & num_neurons_per_layer, const std::vector & activation_function, + const std::vector & minimum_values, + const std::vector & maximum_values, const torch::DeviceType device_type, - const torch::ScalarType data_type) + const torch::ScalarType data_type, + const bool build_on_construct) : _name(name), _num_inputs(num_inputs), _num_outputs(num_outputs), _num_neurons_per_layer(num_neurons_per_layer), - _activation_function(MultiMooseEnum("relu sigmoid elu gelu linear", "relu")), + _activation_function(MultiMooseEnum("relu sigmoid elu gelu linear tanh", "relu")), _device_type(device_type), - _data_type(data_type) + _data_type(data_type), + _minimum_values(minimum_values), + _maximum_values(maximum_values) { _activation_function = activation_function; @@ -38,11 +44,24 @@ LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( (_activation_function.size() != _num_neurons_per_layer.size())) mooseError("The number of activation functions should be either one or the same as the number " "of hidden layers"); - constructNeuralNetwork(); + + if (_minimum_values.size()) + { + auto min_value = _minimum_values; + LibtorchUtils::vectorToTensor(min_value, _min_tensor); + _min_tensor.to(_data_type).to(_device_type); + auto max_value = _maximum_values; + LibtorchUtils::vectorToTensor(max_value, _max_tensor); + _max_tensor.to(_data_type).to(_device_type); + } + + if (build_on_construct) + constructNeuralNetwork(); } LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( - const Moose::LibtorchArtificialNeuralNet & nn) + const Moose::LibtorchArtificialNeuralNet & nn, + const bool build_on_construct) : torch::nn::Module(), _name(nn.name()), _num_inputs(nn.numInputs()), @@ -50,16 +69,31 @@ LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( _num_neurons_per_layer(nn.numNeuronsPerLayer()), _activation_function(nn.activationFunctions()), _device_type(nn.deviceType()), - _data_type(nn.dataType()) + _data_type(nn.dataType()), + _minimum_values(nn.minValues()), + _maximum_values(nn.maxValues()) { // We construct the NN architecture - constructNeuralNetwork(); - // We fill it up with the current parameter values - const auto & from_params = nn.named_parameters(); - auto to_params = this->named_parameters(); - for (unsigned int param_i : make_range(from_params.size())) - to_params[param_i].value().data() = from_params[param_i].value().data().clone(); + if (build_on_construct) + { + constructNeuralNetwork(); + // We fill it up with the current parameter values + const auto & from_params = nn.named_parameters(); + auto to_params = this->named_parameters(); + for (unsigned int param_i : make_range(from_params.size())) + to_params[param_i].value().data() = from_params[param_i].value().data().clone(); + } + + if (_minimum_values.size()) + { + auto min_value = _minimum_values; + LibtorchUtils::vectorToTensor(min_value, _min_tensor); + _min_tensor.to(_data_type).to(_device_type); + auto max_value = _maximum_values; + LibtorchUtils::vectorToTensor(max_value, _max_tensor); + _max_tensor.to(_data_type).to(_device_type); + } } void @@ -101,6 +135,8 @@ LibtorchArtificialNeuralNet::forward(const torch::Tensor & x) output = torch::relu(_weights[i]->forward(output)); else if (activation == "sigmoid") output = torch::sigmoid(_weights[i]->forward(output)); + else if (activation == "tanh") + output = torch::tanh(_weights[i]->forward(output)); else if (activation == "elu") output = torch::elu(_weights[i]->forward(output)); else if (activation == "gelu") @@ -109,7 +145,17 @@ LibtorchArtificialNeuralNet::forward(const torch::Tensor & x) output = _weights[i]->forward(output); } - output = _weights[_weights.size() - 1]->forward(output); + if (_minimum_values.size()) + { + output = torch::sigmoid(_weights[_weights.size() - 1]->forward(output)); + torch::Tensor scale = torch::sub(_max_tensor, _min_tensor).to(_data_type); + output = torch::mul(output, scale); + output = output + _min_tensor; + } + else + { + output = _weights[_weights.size() - 1]->forward(output); + } return output; } @@ -183,6 +229,16 @@ dataStore( for (unsigned int i = 0; i < afs; ++i) items[i] = nn->activationFunctions()[i]; + // unsigned int nminv(nn->minValues().size()); + // dataStore(stream, nminv, context); + std::vector minv(nn->minValues()); + dataStore(stream, minv, context); + + // unsigned int nmaxv(nn->minValues().size()); + // dataStore(stream, nmaxv, context); + std::vector maxv(nn->maxValues()); + dataStore(stream, maxv, context); + dataStore(stream, items, context); auto device_type = static_cast::type>(nn->deviceType()); @@ -222,6 +278,14 @@ dataLoad( activation_functions.resize(num_activation_items); dataLoad(stream, activation_functions, context); + std::vector min_values; + min_values.resize(num_outputs); + dataLoad(stream, min_values, context); + + std::vector max_values; + max_values.resize(num_outputs); + dataLoad(stream, max_values, context); + std::underlying_type::type device_type; dataLoad(stream, device_type, context); const torch::DeviceType divt(static_cast(device_type)); @@ -231,7 +295,7 @@ dataLoad( const torch::ScalarType datt(static_cast(data_type)); nn = std::make_shared( - name, num_inputs, num_outputs, num_neurons_per_layer, activation_functions, divt, datt); + name, num_inputs, num_outputs, num_neurons_per_layer, activation_functions, min_values, max_values, divt, datt); torch::load(nn, name); } diff --git a/framework/src/postprocessors/LiftDragRewardPostprocessor.C b/framework/src/postprocessors/LiftDragRewardPostprocessor.C index b82572da48da..945fed68a264 100644 --- a/framework/src/postprocessors/LiftDragRewardPostprocessor.C +++ b/framework/src/postprocessors/LiftDragRewardPostprocessor.C @@ -52,7 +52,7 @@ LiftDragRewardPostprocessor::LiftDragRewardPostprocessor(const InputParameters & Real LiftDragRewardPostprocessor::getValue() const { - return _coeff_1 - _avg_drag - _coeff_2*_avg_lift; + return _coeff_1 - _avg_drag - _coeff_2*std::abs(_avg_lift); } void @@ -61,7 +61,7 @@ LiftDragRewardPostprocessor::execute() auto rolling_index = _replace_counter % _averaging_window; auto normalization = std::min(_replace_counter + 1, _averaging_window); - _lift_history[rolling_index] = std::abs(_lift); + _lift_history[rolling_index] = _lift; _drag_history[rolling_index] = _drag; _avg_lift = std::reduce(_lift_history.begin(), _lift_history.end())/normalization; diff --git a/framework/src/postprocessors/PointValue.C b/framework/src/postprocessors/PointValue.C index c9a8ce4cdfba..8abe943b9f4c 100644 --- a/framework/src/postprocessors/PointValue.C +++ b/framework/src/postprocessors/PointValue.C @@ -72,6 +72,5 @@ PointValue::execute() Real PointValue::getValue() const { - // std::cout << "Point value " << _value << std::endl; return _value; } diff --git a/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i b/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i index 2c98ca23be5d..5f36dc9398b7 100644 --- a/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i +++ b/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i @@ -240,44 +240,105 @@ type = LiftDragRewardPostprocessor lift = lift_coeff drag = drag_coeff - averaging_window = 200 - coeff_1 = 2.0 + averaging_window = 50 + coeff_1 = 0.0 coeff_2 = 0.2 + execute_on = 'TIMESTEP_END' + [] + # [p1] + # type = PointValue + # variable = pressure + # point = '0 0.07 0.0' + # execute_on = 'INITIAL TIMESTEP_END' + # [] + # [p2] + # type = PointValue + # variable = pressure + # point = '0 -0.07 0.0' + # execute_on = 'INITIAL TIMESTEP_END' + # [] + # [p3] + # type = PointValue + # variable = pressure + # point = '0.075 0.1 0.0' + # execute_on = 'INITIAL TIMESTEP_END' + # [] + # [p4] + # type = PointValue + # variable = pressure + # point = '0.075 0.0 0.0' + # execute_on = 'INITIAL TIMESTEP_END' + # [] + # [p5] + # type = PointValue + # variable = pressure + # point = '0.075 -0.1 0.0' + # execute_on = 'INITIAL TIMESTEP_END' + # [] + [p1x] + type = PointValue + variable = vel_x + point = '0 0.07 0.0' execute_on = 'INITIAL TIMESTEP_END' [] - [p1] + [p2x] type = PointValue - variable = pressure + variable = vel_x + point = '0 -0.07 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p3x] + type = PointValue + variable = vel_x + point = '0.075 0.1 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p4x] + type = PointValue + variable = vel_x + point = '0.075 0.0 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p5x] + type = PointValue + variable = vel_x + point = '0.075 -0.1 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p1y] + type = PointValue + variable = vel_y point = '0 0.07 0.0' execute_on = 'INITIAL TIMESTEP_END' [] - [p2] + [p2y] type = PointValue - variable = pressure + variable = vel_y point = '0 -0.07 0.0' execute_on = 'INITIAL TIMESTEP_END' [] - [p3] + [p3y] type = PointValue - variable = pressure + variable = vel_y point = '0.075 0.1 0.0' execute_on = 'INITIAL TIMESTEP_END' [] - [p4] + [p4y] type = PointValue - variable = pressure + variable = vel_y point = '0.075 0.0 0.0' execute_on = 'INITIAL TIMESTEP_END' [] - [p5] + [p5y] type = PointValue - variable = pressure + variable = vel_y point = '0.075 -0.1 0.0' execute_on = 'INITIAL TIMESTEP_END' [] [Q_signal] type = ConstantPostprocessor value = 0.0 + execute_on = TIMESTEP_BEGIN [] [Q] type = LibtorchControlValuePostprocessor @@ -292,7 +353,7 @@ [Reporters] [results] type = AccumulateReporter - reporters = 'p1/value p2/value p3/value p4/value p5/value reward/value Q/value log_prob_Q/value' + reporters = 'p1x/value p2x/value p3x/value p4x/value p5x/value p1y/value p2y/value p3y/value p4y/value p5y/value reward/value Q/value log_prob_Q/value' [] [] @@ -300,21 +361,21 @@ [src_control] type = LibtorchDRLControl parameters = "Postprocessors/Q_signal/value" - responses = 'p1 p2 p3 p4 p5' + responses = 'p1x p2x p3x p4x p5x p1y p2y p3y p4y p5y' # keep consistent with LibtorchDRLControlTrainer input_timesteps = 1 - response_scaling_factors = '0.4 0.4 0.4 0.4 0.4' - response_shift_factors = '-0.4 -0.4 -0.4 -0.4 -0.4' - action_standard_deviations = '0.01' - action_scaling_factors = 0.2 + response_scaling_factors = '13.33 15.38 16.66 38.46 15.38 33.33 40 11.76 4.711 15.38' + response_shift_factors = '2.055 2.055 1.93 -0.171 1.945 0.449 -0.525 0.029 0.17675 1.945' + action_scaling_factors = 0.5 - maximum_actions = 0.108 - minimum_actions = -0.108 + # response_scaling_factors = '1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0' + # response_shift_factors = '0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0' + # action_scaling_factors = 1.0 execute_on = 'TIMESTEP_BEGIN' - smoother = 0.2 - num_stems_in_period = 25 + smoother = 0.1 + num_stems_in_period = 50 [] [] @@ -328,7 +389,7 @@ momentum_systems = 'u_system v_system' pressure_system = 'pressure_system' momentum_equation_relaxation = 0.9 - pressure_variable_relaxation = 0.55 + pressure_variable_relaxation = 0.6 num_iterations = 100 pressure_absolute_tolerance = 5e-6 momentum_absolute_tolerance = 5e-6 @@ -338,8 +399,8 @@ pressure_petsc_options_value = 'hypre boomeramg' print_fields = false continue_on_max_its = true - dt = 0.001 - num_steps = 2000 + dt = 0.0005 + num_steps = 800 [] [Outputs] diff --git a/modules/combined/examples/stochastic/vortex_control/trainer.i b/modules/combined/examples/stochastic/vortex_control/trainer.i index 8dc702435077..ba8d50e9f0e1 100644 --- a/modules/combined/examples/stochastic/vortex_control/trainer.i +++ b/modules/combined/examples/stochastic/vortex_control/trainer.i @@ -4,9 +4,9 @@ [Samplers] [dummy] type = CartesianProduct - linear_space_items = '0 0.01 4' - min_procs_per_row = 7 - max_procs_per_row = 7 + linear_space_items = '0 0.01 5' + min_procs_per_row = 20 + max_procs_per_row = 20 [] [] @@ -16,8 +16,8 @@ sampler = dummy input_files = 'flow_over_circle_linearfv.i' mode = batch-reset - min_procs_per_app = 7 - max_procs_per_app = 7 + min_procs_per_app = 20 + max_procs_per_app = 20 [] [] @@ -34,40 +34,54 @@ from_multi_app = runner sampler = dummy stochastic_reporter = storage - from_reporter = 'results/p1:value results/p2:value results/p3:value results/p4:value results/p5:value results/reward:value results/Q:value results/log_prob_Q:value' + from_reporter = 'results/p1x:value results/p2x:value results/p3x:value results/p4x:value results/p5x:value ' + 'results/p1y:value results/p2y:value results/p3y:value results/p4y:value results/p5y:value ' + 'results/reward:value results/Q:value results/log_prob_Q:value' [] [] [Trainers] [nn_trainer] type = LibtorchDRLControlTrainer - response = 'storage/r_transfer:results:p1:value storage/r_transfer:results:p2:value storage/r_transfer:results:p3:value storage/r_transfer:results:p4:value storage/r_transfer:results:p5:value' + response = 'storage/r_transfer:results:p1x:value storage/r_transfer:results:p2x:value storage/r_transfer:results:p3x:value storage/r_transfer:results:p4x:value storage/r_transfer:results:p5x:value ' + 'storage/r_transfer:results:p1y:value storage/r_transfer:results:p2y:value storage/r_transfer:results:p3y:value storage/r_transfer:results:p4y:value storage/r_transfer:results:p5y:value' control = 'storage/r_transfer:results:Q:value' log_probability = 'storage/r_transfer:results:log_prob_Q:value' reward = 'storage/r_transfer:results:reward:value' - num_epochs = 50 - update_frequency = 1 - decay_factor = 0.98 + num_epochs = 25 + update_frequency = 2 + decay_factor = 0.99 - loss_print_frequency = 10 + loss_print_frequency = 1 - critic_learning_rate = 0.001 - num_critic_neurons_per_layer = '64 32' + critic_learning_rate = 0.0005 + num_critic_neurons_per_layer = '512 512' + critic_activation_functions = 'relu relu' - control_learning_rate = 0.001 - num_control_neurons_per_layer = '64 32' + control_learning_rate = 0.0005 + num_control_neurons_per_layer = '512 512' + control_activation_functions = 'tanh tanh' # keep consistent with LibtorchNeuralNetControl input_timesteps = 1 - response_scaling_factors = '0.4 0.4 0.4 0.4 0.4' - response_shift_factors = '-0.4 -0.4 -0.4 -0.4 -0.4' - action_standard_deviations = '0.01' + response_scaling_factors = '13.33 15.38 16.66 38.46 15.38 33.33 40 11.76 4.711 15.38' + response_shift_factors = '2.055 2.055 1.93 -0.171 1.945 0.449 -0.525 0.029 0.17675 1.945' + + # response_scaling_factors = '1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0' + # response_shift_factors = '0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0' + + action_standard_deviations = '0.007' standardize_advantage = true read_from_file = false + + min_control_value = ${fparse -0.108*2} + max_control_value = ${fparse 0.108*2} + + batch_size = 200 [] [] @@ -85,7 +99,7 @@ [Executioner] type = Transient - num_steps = 300 + num_steps = 500 [] [Outputs] diff --git a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h index 2e2fa371679b..66fd4446c8c6 100644 --- a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h +++ b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h @@ -11,7 +11,7 @@ #pragma once -#include "LibtorchArtificialNeuralNet.h" +#include "LibtorchActorNeuralNet.h" #include "LibtorchNeuralNetControl.h" /** @@ -39,6 +39,8 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl */ Real getSignalLogProbability(const unsigned int signal_index) const; + virtual void loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn) override; + protected: /** * Function which computes the logarithmic probability of given actions. @@ -53,22 +55,14 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl /// The log probability of control signals from the last evaluation of the controller std::vector _current_control_signal_log_probabilities; - /// Standard deviation for the actions, supplied by the user - const std::vector _action_std; - - /// Standard deviations converted to a 2D diagonal tensor that can be used by Libtorch routines. - torch::Tensor _std; - std::vector _previous_control_signal; std::vector _current_smoothed_signal; + Moose::LibtorchActorNeuralNet * _actor_nn; + unsigned int _call_counter; const unsigned int _num_steps_in_period; const Real _smoother; - - std::vector _maximum_actions; - std::vector _minimum_actions; - }; #endif diff --git a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h index f1f478b74606..8f1f39777251 100644 --- a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h +++ b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h @@ -12,7 +12,7 @@ #pragma once #include -#include "LibtorchArtificialNeuralNet.h" +#include "LibtorchActorNeuralNet.h" #include "libmesh/utility.h" #include "SurrogateTrainer.h" @@ -188,14 +188,16 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase /// The frequency the loss should be printed const unsigned int _loss_print_frequency; + /// min + std::vector _min_values; + /// max + std::vector _max_values; + /// Pointer to the control (or actor) neural net object - std::shared_ptr _control_nn; + std::shared_ptr _control_nn; /// Pointer to the critic neural net object std::shared_ptr _critic_nn; - /// standard deviation in a tensor format for sampling the actual control value - torch::Tensor _std; - /// Torch::tensor version of the input and action data torch::Tensor _input_tensor; torch::Tensor _output_tensor; @@ -203,6 +205,9 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase torch::Tensor _log_probability_tensor; private: + + torch::Tensor gaussianEntropy(const torch::Tensor std); + /** * Extract the response values from the postprocessors of the controlled system. * This assumes that they are stored in an AccumulateReporter diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C index d22185647d7f..0d5dcc72cc4c 100644 --- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C +++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C @@ -10,9 +10,14 @@ #ifdef MOOSE_LIBTORCH_ENABLED #include "LibtorchDRLControl.h" +<<<<<<< HEAD #include "TorchScriptModule.h" #include "LibtorchArtificialNeuralNet.h" +======= +#include "LibtorchTorchScriptNeuralNet.h" +>>>>>>> a7b46c70e5 (Add actor network) #include "Transient.h" +#include "LibtorchUtils.h" registerMooseObject("StochasticToolsApp", LibtorchDRLControl); @@ -23,15 +28,11 @@ LibtorchDRLControl::validParams() params.addClassDescription( "Sets the value of multiple 'Real' input parameters and postprocessors based on a Deep " "Reinforcement Learning (DRL) neural network trained using a PPO algorithm."); - params.addRequiredParam>( - "action_standard_deviations", "Standard deviation value used while sampling the actions."); params.addParam("seed", "Seed for the random number generator."); params.addParam("num_stems_in_period", 1, "Blabla"); params.addParam("smoother", 1.0, "Blabla"); - params.addParam>("maximum_actions", {}, "The maximum actions"); - params.addParam>("minimum_actions", {}, "The minimum actions"); return params; } @@ -39,42 +40,15 @@ LibtorchDRLControl::validParams() LibtorchDRLControl::LibtorchDRLControl(const InputParameters & parameters) : LibtorchNeuralNetControl(parameters), _current_control_signal_log_probabilities(std::vector(_control_names.size(), 0.0)), - _action_std(getParam>("action_standard_deviations")), _previous_control_signal(std::vector(_control_names.size(), 0.0)), _current_smoothed_signal(std::vector(_control_names.size(), 0.0)), _call_counter(0), _num_steps_in_period(getParam("num_stems_in_period")), - _smoother(getParam("smoother")), - _maximum_actions(isParamSetByUser("maximum_actions") ? getParam>("maximum_actions") : std::vector(_control_names.size(), std::numeric_limits::max())), - _minimum_actions(isParamSetByUser("minimum_actions") ? getParam>("minimum_actions") : std::vector(_control_names.size(), -std::numeric_limits::max())) - + _smoother(getParam("smoother")) { - if (_control_names.size() != _action_std.size()) - paramError("action_standard_deviations", - "Number of action_standard_deviations does not match the number of controlled " - "parameters."); - // Fixing the RNG seed to make sure every experiment is the same. if (isParamValid("seed")) torch::manual_seed(getParam("seed")); - - // We convert and store the user-supplied standard deviations into a tensor which can be easily - // used by routines in libtorch - _std = torch::eye(_control_names.size()); - for (unsigned int i = 0; i < _control_names.size(); ++i) - _std[i][i] = _action_std[i]; - - if (isParamSetByUser("maximum_actions")) - { - for (const auto i : index_range(_maximum_actions)) - _maximum_actions[i] = _maximum_actions[i]/_action_scaling_factors[i]; - } - - if (isParamSetByUser("minimum_actions")) - { - for (const auto i : index_range(_minimum_actions)) - _minimum_actions[i] = _minimum_actions[i]/_action_scaling_factors[i]; - } } void @@ -88,6 +62,7 @@ LibtorchDRLControl::execute() // Fill a vector with the current values of the responses updateCurrentResponse(); +<<<<<<< HEAD // If this is the first time this control is called and we need to use older values, fill up the // needed old values using the initial values if (_old_responses.empty()) @@ -148,18 +123,85 @@ LibtorchDRLControl::execute() _old_responses[0] = _current_response; } _call_counter++; +======= + if (_current_execute_flag == EXEC_TIMESTEP_BEGIN) + { + // If this is the first time this control is called and we need to use older values, fill up the + // needed old values using the initial values + if (!_initialized) + { + _old_responses.clear(); + for (unsigned int step_i = 0; step_i < num_old_timesteps; ++step_i) + _old_responses.push_back(_current_response); + _initialized = true; + } + + if (_call_counter % _num_steps_in_period == 0) + { + // Organize the old an current solution into a tensor so we can evaluate the neural net + torch::Tensor input_tensor = prepareInputTensor(); + + // Evaluate the neural network to get the expected control value + torch::Tensor action = _actor_nn->forward(input_tensor); + + // Compute log probability + torch::Tensor log_probability = _actor_nn->logProbability(); + + _current_control_signals = {action.data_ptr(), action.data_ptr() + action.size(1)}; + + // std::cout << "Computing control signal to: " << Moose::stringify(_current_control_signals) << std::endl; + + + // for (const auto i : index_range(_current_control_signals)) + // _current_control_signals[i] = std::min(std::max(_current_control_signals[i], _minimum_actions[i]), _maximum_actions[i]); + + _current_control_signal_log_probabilities = {log_probability.data_ptr(), + log_probability.data_ptr() + + log_probability.size(1)}; + } + + + // Convert data + _previous_control_signal = _current_smoothed_signal; + + + for (const auto i : index_range(_current_smoothed_signal)) + _current_smoothed_signal[i] = _previous_control_signal[i] + _smoother*(_current_control_signals[i] - _previous_control_signal[i]); + + + // std::cout << "Setting control signal to: " << Moose::stringify(_current_smoothed_signal) << std::endl; + + + for (unsigned int control_i = 0; control_i < n_controls; ++control_i) + { + + // We scale the controllable value for physically meaningful control action + setControllableValueByName(_control_names[control_i], + _current_smoothed_signal[control_i] * + _action_scaling_factors[control_i]); + } + + // We add the curent solution to the old solutions and move everything in there one step + // backward + if (_old_responses.size()) + { + std::rotate(_old_responses.rbegin(), _old_responses.rbegin() + 1, _old_responses.rend()); + _old_responses[0] = _current_response; + } + _call_counter++; + } +>>>>>>> a7b46c70e5 (Add actor network) } } -torch::Tensor -LibtorchDRLControl::computeLogProbability(const torch::Tensor & action, - const torch::Tensor & output_tensor) +void +LibtorchDRLControl::loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn) { - // Logarithmic probability of taken action, given the current distribution. - torch::Tensor var = torch::matmul(_std, _std); - - return -((action - output_tensor) * (action - output_tensor)) / (2.0 * var) - torch::log(_std) - - std::log(std::sqrt(2.0 * M_PI)); + const auto * check = dynamic_cast(&input_nn); + if (!check) + mooseError("This needs to be a LibtorchActorNeuralNet!"); + _nn = std::make_shared(*check); + _actor_nn = dynamic_cast(_nn.get()); } Real diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index d243915258fc..cbf3d1d2bdf0 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -10,7 +10,6 @@ #ifdef MOOSE_LIBTORCH_ENABLED #include "LibtorchDataset.h" -#include "LibtorchArtificialNeuralNetTrainer.h" #include "LibtorchUtils.h" #include "LibtorchDRLControlTrainer.h" #include "Sampler.h" @@ -122,6 +121,9 @@ LibtorchDRLControlTrainer::validParams() 0, "The frequency which is used to print the loss values. If 0, the " "loss values are not printed."); + params.addParam("batch_size", 100, "Batch size"); + params.addParam>("min_control_value", {}, "The minimum values of the control signal."); + params.addParam>("max_control_value", {}, "The maximum calue of the control signal."); return params; } @@ -161,6 +163,8 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _average_episode_reward(0.0), _standardize_advantage(getParam("standardize_advantage")), _loss_print_frequency(getParam("loss_print_frequency")), + _min_values(getParam>("min_control_value")), + _max_values(getParam>("max_control_value")), _update_counter(_update_frequency) { if (_response_names.size() != _response_shift_factors.size()) @@ -181,20 +185,18 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par // Otherwise sampling / stochastic gradient descent would be different. torch::manual_seed(getParam("seed")); - // Convert the user input standard deviations to a diagonal tensor - _std = torch::eye(_control_names.size()); - for (unsigned int i = 0; i < _control_names.size(); ++i) - _std[i][i] = _action_std[i]; - bool filename_valid = isParamValid("filename_base"); // Initializing the control neural net so that the control can grab it right away - _control_nn = std::make_shared( + _control_nn = std::make_shared( filename_valid ? _filename_base + "_control.net" : "control.net", _num_inputs, _num_outputs, _num_control_neurons_per_layer, - getParam>("control_activation_functions")); + _action_std, + getParam>("control_activation_functions"), + _min_values, + _max_values); // We read parameters for the control neural net if it is requested if (_read_from_file) @@ -353,52 +355,80 @@ LibtorchDRLControlTrainer::trainController() if (_standardize_advantage) advantage = (advantage - advantage.mean()) / (advantage.std() + 1e-10); + // auto data_set = DRLDataset({_input_tensor, _output_tensor, _log_probability_tensor, _return_tensor}); + + // Transform the dataset se that the loader has an easier time + auto input_size = _input_tensor.sizes()[0]; + auto batch_size = getParam("batch_size"); + // auto data_loader = torch::data::make_data_loader(std::move(transformed_data_set), batch_size); + for (unsigned int epoch = 0; epoch < _num_epochs; ++epoch) { - // Get the approximate return from the neural net again (this one does have an associated - // gradient) - value = evaluateValue(_input_tensor); - // Get the approximate logarithmic action probability using the control neural net - auto curr_log_probability = evaluateAction(_input_tensor, _output_tensor); - - // Prepare the ratio by using the e^(logx-logy)=x/y expression - auto ratio = (curr_log_probability - _log_probability_tensor).exp(); - - // Use clamping for limiting - auto surr1 = ratio * advantage; - auto surr2 = torch::clamp(ratio, 1.0 - _clip_param, 1.0 + _clip_param) * advantage; - - // Compute loss values for the critic and the control neural net - auto actor_loss = -torch::min(surr1, surr2).mean(); - auto critic_loss = torch::mse_loss(value, _return_tensor); - - // Update the weights in the neural nets - actor_optimizer.zero_grad(); - actor_loss.backward(); - actor_optimizer.step(); - - critic_optimizer.zero_grad(); - critic_loss.backward(); - critic_optimizer.step(); - - // const auto & named_params = _control_nn->named_parameters(); - // for (const auto & param_i : make_range(named_params.size())) - // { - // // We cast the parameters into a 1D vector - // std::cout << Moose::stringify(std::vector( - // named_params[param_i].value().data_ptr(), - // named_params[param_i].value().data_ptr() + named_params[param_i].value().numel())) << std::endl; - // } - - // print loss per epoch + auto permutation = torch::randperm(input_size); + unsigned int batch_begin = 0; + unsigned int batch_end = 0; + while (batch_end < input_size) + { + batch_end = batch_begin + batch_size > input_size ? input_size : batch_begin + batch_size; + unsigned int offset = batch_end - batch_begin; + auto batch_permutation = permutation.narrow(0, batch_begin, offset); + auto obs_batch = _input_tensor.index({batch_permutation}); + auto action_batch = _output_tensor.index({batch_permutation}); + auto log_prob_batch = _log_probability_tensor.index({batch_permutation}); + auto return_batch = _return_tensor.index({batch_permutation}); + auto advantage_batch = advantage.index({batch_permutation}); + + // Get the approximate return from the neural net again (this one does have an associated + // gradient) + value = evaluateValue(obs_batch); + + + _control_nn->forward(obs_batch); + // Get the approximate logarithmic action probability using the control neural net + auto curr_log_probability = _control_nn->logProbability(action_batch); + + // Prepare the ratio by using the e^(logx-logy)=x/y expression + auto ratio = (curr_log_probability - log_prob_batch).exp(); + + // Use clamping for limiting + auto surr1 = ratio * advantage_batch; + auto surr2 = torch::clamp(ratio, 1.0 - _clip_param, 1.0 + _clip_param) * advantage_batch; + + // Compute loss values for the critic and the control neural net + auto actor_loss = -(torch::min(surr1, surr2) + 0.01*_control_nn->entropy()).mean(); + auto critic_loss = torch::mse_loss(value, return_batch); + + // Update the weights in the neural nets + actor_optimizer.zero_grad(); + actor_loss.backward(); + actor_optimizer.step(); + + critic_optimizer.zero_grad(); + critic_loss.backward(); + critic_optimizer.step(); + + // const auto & named_params = _control_nn->named_parameters(); + // for (const auto & param_i : make_range(named_params.size())) + // { + // // We cast the parameters into a 1D vector + // std::cout << Moose::stringify(std::vector( + // named_params[param_i].value().data_ptr(), + // named_params[param_i].value().data_ptr() + named_params[param_i].value().numel())) << std::endl; + // } + + // print loss per epoch if (_loss_print_frequency) - if (epoch % _loss_print_frequency == 0) + if (epoch % _loss_print_frequency == 0 && batch_begin == 0) { - _console << "Epoch: " << epoch << " | Actor Loss: " << COLOR_GREEN - << actor_loss.item() << COLOR_DEFAULT << " | Critic Loss: " << COLOR_GREEN - << critic_loss.item() << COLOR_DEFAULT << std::endl; + << actor_loss.item() << COLOR_DEFAULT << " | Critic Loss: " << COLOR_GREEN + << critic_loss.item() << COLOR_DEFAULT << std::endl; } + + batch_begin = batch_end; + } + std::cout << _control_nn->stdTensor() << std::endl; + } } @@ -454,17 +484,6 @@ LibtorchDRLControlTrainer::evaluateValue(torch::Tensor & input) return _critic_nn->forward(input); } -torch::Tensor -LibtorchDRLControlTrainer::evaluateAction(torch::Tensor & input, torch::Tensor & output) -{ - torch::Tensor var = torch::matmul(_std, _std); - - // Compute an action and get it's logarithmic proability based on an assumed Gaussian distribution - torch::Tensor action = _control_nn->forward(input); - return -((action - output) * (action - output)) / (2 * var) - torch::log(_std) - - std::log(std::sqrt(2 * M_PI)); -} - void LibtorchDRLControlTrainer::resetData() { From e950f3789c89ec4bf9f855e85416864a786141c3 Mon Sep 17 00:00:00 2001 From: Peter German Date: Wed, 19 Feb 2025 08:22:14 -0700 Subject: [PATCH 14/51] Add different distributions. --- .../libtorch/utils/LibtorchActorNeuralNet.h | 25 +++- framework/src/functions/MooseParsedFunction.C | 3 + .../functions/MooseParsedFunctionWrapper.C | 3 + .../libtorch/utils/LibtorchActorNeuralNet.C | 126 +++++++++++++++--- .../LiftDragRewardPostprocessor.C | 2 + framework/src/postprocessors/PointValue.C | 5 +- .../flow_over_circle_linearfv.i | 10 +- .../stochastic/vortex_control/trainer.i | 38 +++--- .../libtorch/controls/LibtorchDRLControl.C | 9 +- .../trainers/LibtorchDRLControlTrainer.C | 9 +- 10 files changed, 179 insertions(+), 51 deletions(-) diff --git a/framework/include/libtorch/utils/LibtorchActorNeuralNet.h b/framework/include/libtorch/utils/LibtorchActorNeuralNet.h index cdc83b7fb815..b30a4d4f40fc 100644 --- a/framework/include/libtorch/utils/LibtorchActorNeuralNet.h +++ b/framework/include/libtorch/utils/LibtorchActorNeuralNet.h @@ -54,18 +54,24 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet */ virtual torch::Tensor forward(torch::Tensor & x) override; + virtual torch::Tensor evaluate(torch::Tensor & input, bool sampled); + + virtual torch::Tensor sample(); + /// Construct the neural network virtual void constructNeuralNetwork() override; - const std::vector & std() const {return _std;}; + const std::vector & std() const {return _std;} const torch::Tensor & stdTensor() const {return _std_tensor;} - torch::Tensor computeLogProbability(const torch::Tensor & action, - const torch::Tensor & signal); + const torch::Tensor & alphaTensor() const {return _alpha_tensor;} + + const torch::Tensor & betaTensor() const {return _beta_tensor;} - torch::Tensor logProbability() {return _log_probability;} - torch::Tensor logProbability(torch::Tensor other) {return computeLogProbability(_mean, other);} + void resetDistributionParams(torch::Tensor input); + + torch::Tensor logProbability(const torch::Tensor & other); torch::Tensor entropy(); @@ -74,8 +80,15 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet torch::Tensor _std_tensor; + std::vector _alpha_module; + std::vector _beta_module; + + torch::Tensor _alpha_tensor; + torch::Tensor _beta_tensor; + torch::Tensor _alpha_beta_tensor; + torch::Tensor _log_norm; + torch::Tensor _mean; - torch::Tensor _log_probability; }; void to_json(nlohmann::json & json, const Moose::LibtorchActorNeuralNet * const & network); diff --git a/framework/src/functions/MooseParsedFunction.C b/framework/src/functions/MooseParsedFunction.C index d0536a5505ee..20cb51b7cb80 100644 --- a/framework/src/functions/MooseParsedFunction.C +++ b/framework/src/functions/MooseParsedFunction.C @@ -44,6 +44,9 @@ MooseParsedFunction::MooseParsedFunction(const InputParameters & parameters) Real MooseParsedFunction::value(Real t, const Point & p) const { + // if (name() == "gap_x" || name() == "gap_y") + // std::cout << name() << std::endl; + mooseAssert(_function_ptr, "ParsedFunction should have been initialized"); return _function_ptr->evaluate(t, p); } diff --git a/framework/src/functions/MooseParsedFunctionWrapper.C b/framework/src/functions/MooseParsedFunctionWrapper.C index 23a2b9d17530..2c2381321399 100644 --- a/framework/src/functions/MooseParsedFunctionWrapper.C +++ b/framework/src/functions/MooseParsedFunctionWrapper.C @@ -145,7 +145,10 @@ void MooseParsedFunctionWrapper::update() { for (unsigned int i = 0; i < _pp_index.size(); ++i) + { (*_addr[_pp_index[i]]) = (*_pp_vals[i]); + // std::cout << (*_pp_vals[i]) << std::endl; + } for (unsigned int i = 0; i < _scalar_index.size(); ++i) (*_addr[_scalar_index[i]]) = (*_scalar_vals[i]); diff --git a/framework/src/libtorch/utils/LibtorchActorNeuralNet.C b/framework/src/libtorch/utils/LibtorchActorNeuralNet.C index aab8f47d4aa0..fc2b0d887dac 100644 --- a/framework/src/libtorch/utils/LibtorchActorNeuralNet.C +++ b/framework/src/libtorch/utils/LibtorchActorNeuralNet.C @@ -60,7 +60,29 @@ LibtorchActorNeuralNet::LibtorchActorNeuralNet( void LibtorchActorNeuralNet::constructNeuralNetwork() { - LibtorchArtificialNeuralNet::constructNeuralNetwork(); + // Adding hidden layers + unsigned int inp_neurons = _num_inputs; + for (unsigned int i = 0; i < numHiddenLayers(); ++i) + { + std::unordered_map parameters = { + {"inp_neurons", inp_neurons}, {"out_neurons", _num_neurons_per_layer[i]}}; + addLayer("hidden_layer_" + std::to_string(i + 1), parameters); + + // Necessary to retain double precision (and error-free runs) + _weights[i]->to(_device_type, _data_type); + inp_neurons = _num_neurons_per_layer[i]; + } + + if (_minimum_values.size()) + { + auto num_inps = _num_neurons_per_layer[numHiddenLayers()-1]; + _alpha_module.push_back(register_module("alpha", torch::nn::Linear(torch::nn::LinearOptions(num_inps, _num_outputs).bias(false)))); + _beta_module.push_back(register_module("beta", torch::nn::Linear(torch::nn::LinearOptions(num_inps, _num_outputs).bias(false)))); + _alpha_module[0]->to(_device_type, _data_type); + _beta_module[0]->to(_device_type, _data_type); + + return; + } torch::Tensor std_tensor = torch::eye(_std.size()).to(_data_type); for (unsigned int i = 0; i < _std.size(); ++i) @@ -72,9 +94,41 @@ LibtorchActorNeuralNet::constructNeuralNetwork() torch::Tensor LibtorchActorNeuralNet::entropy() { + if (_minimum_values.size()) + { + return _log_norm - (_beta_tensor - 1.0) * torch::digamma(_beta_tensor) + - (_alpha_tensor - 1.0) * torch::digamma(_alpha_tensor) + + (_alpha_beta_tensor - 2.0) * torch::digamma(_alpha_beta_tensor); + } + return 0.5*std::log(2*M_PI)+torch::log(_std_tensor)+0.5; } +void +LibtorchActorNeuralNet::resetDistributionParams(torch::Tensor input) +{ + if (_minimum_values.size()) + { + auto alpha = _alpha_module[0]->forward(input); + _alpha_tensor = torch::log(torch::exp(alpha) + 1.0) + 1.0; + // std::cout << "setting alpha tensor to " << _alpha_tensor << std::endl; + auto beta = _beta_module[0]->forward(input); + _beta_tensor = torch::log(torch::exp(beta) + 1.0) + 1.0; + // std::cout << "setting beta tensor to " << _beta_tensor << std::endl; + + _alpha_beta_tensor = torch::clamp_min(_alpha_tensor + _beta_tensor, 1e-8); + _mean = _alpha_tensor/_alpha_beta_tensor; + + // std::cout << "setting mean to " << _mean << std::endl; + + _log_norm = at::lgamma(_alpha_tensor) + at::lgamma(_beta_tensor) - at::lgamma(_alpha_beta_tensor); + + return; + } + + _mean = input; +} + torch::Tensor LibtorchActorNeuralNet::forward(torch::Tensor & x) { @@ -102,35 +156,69 @@ LibtorchActorNeuralNet::forward(torch::Tensor & x) output = _weights[i]->forward(output); } + return output; +} + +torch::Tensor +LibtorchActorNeuralNet::evaluate(torch::Tensor & x, bool sampled) +{ + torch::Tensor output(x); + if (_data_type != output.scalar_type()) + output.to(_data_type); + if (_device_type != output.device().type()) + output.to(_device_type); + + // std::cout << "input" << output << std::endl; + output = forward(output); + + // std::cout << "out" << output << std::endl; + resetDistributionParams(output); + + if (sampled) + return sample(); + + return _mean; +} + +torch::Tensor +LibtorchActorNeuralNet::sample() +{ if (_minimum_values.size()) { - output = torch::sigmoid(_weights[_weights.size() - 1]->forward(output)); - torch::Tensor scale = torch::sub(_max_tensor, _min_tensor).to(_data_type); - output = torch::mul(output, scale); - output = output + _min_tensor; - } - else - { - output = _weights[_weights.size() - 1]->forward(output); - } + auto alpha_sample = at::_standard_gamma(_alpha_tensor); + auto beta_sample = at::_standard_gamma(_beta_tensor); - _mean = output; - auto action = at::normal(output, _std_tensor); - _log_probability = computeLogProbability(action, output); + auto sampled = alpha_sample / (alpha_sample + beta_sample); - output = torch::clamp(_min_tensor, _max_tensor, action); + // std::cout << "sampled " << sampled << std::endl; - return output; + return _min_tensor + (_max_tensor - _min_tensor)*sampled; + } + + return at::normal(_mean, _std_tensor); } torch::Tensor -LibtorchActorNeuralNet::computeLogProbability(const torch::Tensor & action, - const torch::Tensor & signal) +LibtorchActorNeuralNet::logProbability(const torch::Tensor & action) { // Logarithmic probability of taken action, given the current distribution. - torch::Tensor var = torch::matmul(_std_tensor, _std_tensor); + if (_minimum_values.size()) + { + // std::cout << "input action " << action << std::endl; + // std::cout << "mintensor " << _min_tensor << std::endl; + // std::cout << "bewfore clamp " << (action - _min_tensor) / (_max_tensor - _min_tensor) <stdTensor() << std::endl; + // std::cout << "Input" << input_tensor << std::endl; // Evaluate the neural network to get the expected control value - torch::Tensor action = _actor_nn->forward(input_tensor); + torch::Tensor action = _actor_nn->evaluate(input_tensor, true); + // std::cout << "in za control " << action << std::endl; // Compute log probability - torch::Tensor log_probability = _actor_nn->logProbability(); + torch::Tensor log_probability = _actor_nn->logProbability(action); _current_control_signals = {action.data_ptr(), action.data_ptr() + action.size(1)}; @@ -158,6 +162,7 @@ LibtorchDRLControl::execute() _current_control_signal_log_probabilities = {log_probability.data_ptr(), log_probability.data_ptr() + log_probability.size(1)}; + // std::cout << "Logprob: " << Moose::stringify(_current_control_signal_log_probabilities) << std::endl; } diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index cbf3d1d2bdf0..d1f5c8ff93d0 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -383,10 +383,14 @@ LibtorchDRLControlTrainer::trainController() value = evaluateValue(obs_batch); - _control_nn->forward(obs_batch); + auto new_action = _control_nn->evaluate(obs_batch, false); + + // std::cout << "new action " << new_action << std::endl; // Get the approximate logarithmic action probability using the control neural net auto curr_log_probability = _control_nn->logProbability(action_batch); + // std::cout << "log probability " << curr_log_probability << std::endl; + // Prepare the ratio by using the e^(logx-logy)=x/y expression auto ratio = (curr_log_probability - log_prob_batch).exp(); @@ -428,7 +432,8 @@ LibtorchDRLControlTrainer::trainController() batch_begin = batch_end; } std::cout << _control_nn->stdTensor() << std::endl; - + std::cout << _control_nn->alphaTensor().mean() << std::endl; + std::cout << _control_nn->betaTensor().mean() << std::endl; } } From 2abba0d494cc3dd419fe76625277a0b3ef291ed4 Mon Sep 17 00:00:00 2001 From: Peter German Date: Fri, 28 Feb 2025 16:24:24 -0700 Subject: [PATCH 15/51] Add control option without sampling. --- .../controls/LibtorchNeuralNetControl.h | 2 + .../libtorch/utils/LibtorchActorNeuralNet.h | 2 + .../utils/LibtorchArtificialNeuralNet.h | 4 + .../controls/LibtorchNeuralNetControl.C | 65 +- .../libtorch/utils/LibtorchActorNeuralNet.C | 27 +- .../utils/LibtorchArtificialNeuralNet.C | 26 + ...arFVAdvectionDiffusionFunctorDirichletBC.C | 2 + .../LiftDragRewardPostprocessor.C | 22 +- .../flow_over_circle_linearfv.i | 8 +- .../flow_over_circle_linearfv_single.i | 426 +++++++++++++ .../stochastic/vortex_control/trainer.i | 28 +- .../libtorch/controls/LibtorchDRLControl.h | 6 +- .../surrogates/LibtorchDRLControlTrainer.h | 74 ++- .../libtorch/controls/LibtorchDRLControl.C | 74 ++- .../trainers/LibtorchDRLControlTrainer.C | 558 ++++++++++++++---- 15 files changed, 1112 insertions(+), 212 deletions(-) create mode 100644 modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv_single.i diff --git a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h index 8a04700a2b23..f1226e6615a7 100644 --- a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h +++ b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h @@ -50,6 +50,8 @@ class LibtorchNeuralNetControl : public Control */ virtual void loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn); + virtual void loadControlNeuralNetFromFile(const InputParameters & parameters); + /// Return a reference to the stored neural network const Moose::LibtorchNeuralNetBase & controlNeuralNet() const; diff --git a/framework/include/libtorch/utils/LibtorchActorNeuralNet.h b/framework/include/libtorch/utils/LibtorchActorNeuralNet.h index b30a4d4f40fc..437f5bfd5bba 100644 --- a/framework/include/libtorch/utils/LibtorchActorNeuralNet.h +++ b/framework/include/libtorch/utils/LibtorchActorNeuralNet.h @@ -75,6 +75,8 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet torch::Tensor entropy(); + virtual void initializeNeuralNetwork() override; + protected: const std::vector & _std; diff --git a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h index 925a5306f449..f9e498c81c04 100644 --- a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h +++ b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h @@ -86,6 +86,10 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu /// Construct the neural network virtual void constructNeuralNetwork(); + Real determineGain(const std::string & activation); + + virtual void initializeNeuralNetwork(); + const std::vector & minValues() const {return _minimum_values;}; const std::vector & maxValues() const {return _maximum_values;}; diff --git a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C index 709bc957f5bf..090aec0a8218 100644 --- a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C +++ b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C @@ -104,40 +104,45 @@ LibtorchNeuralNetControl::LibtorchNeuralNetControl(const InputParameters & param // If the user wants to read the neural net from file, we do it. We can read it from a // torchscript file, or we can create a shell and read back the parameters - if (isParamValid("filename")) + this->loadControlNeuralNetFromFile(parameters); +} + +void +LibtorchNeuralNetControl::loadControlNeuralNetFromFile(const InputParameters & parameters) +{ + const auto & filename = getParam("filename"); + if (getParam("torch_script_format")) + _nn = std::make_shared(filename); + else { - std::string filename = getParam("filename"); - if (getParam("torch_script_format")) - _nn = std::make_shared(filename); - else + unsigned int num_inputs = _response_names.size() * _input_timesteps; + unsigned int num_outputs = _control_names.size(); + std::vector num_neurons_per_layer = + getParam>("num_neurons_per_layer"); + std::vector activation_functions = + parameters.isParamSetByUser("activation_function") + ? getParam>("activation_function") + : std::vector({"relu"}); + auto nn = std::make_shared( + filename, num_inputs, num_outputs, num_neurons_per_layer, activation_functions); + + try { - unsigned int num_inputs = _response_names.size() * _input_timesteps; - unsigned int num_outputs = _control_names.size(); - std::vector num_neurons_per_layer = - getParam>("num_neurons_per_layer"); - std::vector activation_functions = - parameters.isParamSetByUser("activation_function") - ? getParam>("activation_function") - : std::vector({"relu"}); - auto nn = std::make_shared( - filename, num_inputs, num_outputs, num_neurons_per_layer, activation_functions); - - try - { - torch::load(nn, filename); - _nn = std::make_shared(*nn); - } - catch (const c10::Error & e) - { - mooseError( - "The requested pytorch parameter file could not be loaded. This can either be the" - "result of the file not existing or a misalignment in the generated container and" - "the data in the file. Make sure the dimensions of the generated neural net are the" - "same as the dimensions of the parameters in the input file!\n", - e.msg()); - } + torch::load(nn, filename); + _nn = std::make_shared(*nn); + } + catch (const c10::Error & e) + { + mooseError( + "The requested pytorch parameter file could not be loaded. This can either be the" + "result of the file not existing or a misalignment in the generated container and" + "the data in the file. Make sure the dimensions of the generated neural net are the" + "same as the dimensions of the parameters in the input file!\n", + e.msg()); } } + + execute(); } void diff --git a/framework/src/libtorch/utils/LibtorchActorNeuralNet.C b/framework/src/libtorch/utils/LibtorchActorNeuralNet.C index fc2b0d887dac..5d215258dbfd 100644 --- a/framework/src/libtorch/utils/LibtorchActorNeuralNet.C +++ b/framework/src/libtorch/utils/LibtorchActorNeuralNet.C @@ -57,6 +57,24 @@ LibtorchActorNeuralNet::LibtorchActorNeuralNet( } } +void +LibtorchActorNeuralNet::initializeNeuralNetwork() +{ + for (unsigned int i = 0; i < numHiddenLayers(); ++i) + { + const auto & activation = _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0]; + const Real gain = determineGain(activation); + torch::nn::init::orthogonal_(_weights[i]->weight, gain); + torch::nn::init::zeros_(_weights[i]->bias); + } + + if (_minimum_values.size()) + { + torch::nn::init::orthogonal_(_alpha_module[0]->weight); + torch::nn::init::orthogonal_(_beta_module[0]->weight); + } +} + void LibtorchActorNeuralNet::constructNeuralNetwork() { @@ -138,7 +156,7 @@ LibtorchActorNeuralNet::forward(torch::Tensor & x) if (_device_type != output.device().type()) output.to(_device_type); - for (unsigned int i = 0; i < _weights.size() - 1; ++i) + for (unsigned int i = 0; i < _weights.size(); ++i) { std::string activation = _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0]; @@ -154,6 +172,8 @@ LibtorchActorNeuralNet::forward(torch::Tensor & x) output = torch::gelu(_weights[i]->forward(output)); else if (activation == "linear") output = _weights[i]->forward(output); + + // std::cout << "midresult" << i << output << std::endl; } return output; @@ -163,6 +183,7 @@ torch::Tensor LibtorchActorNeuralNet::evaluate(torch::Tensor & x, bool sampled) { torch::Tensor output(x); + // std::cout << output << std::endl; if (_data_type != output.scalar_type()) output.to(_data_type); if (_device_type != output.device().type()) @@ -171,13 +192,13 @@ LibtorchActorNeuralNet::evaluate(torch::Tensor & x, bool sampled) // std::cout << "input" << output << std::endl; output = forward(output); - // std::cout << "out" << output << std::endl; + // std::cout << "midresult" << output << std::endl; resetDistributionParams(output); if (sampled) return sample(); - return _mean; + return _min_tensor + (_max_tensor - _min_tensor)*_mean; } torch::Tensor diff --git a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C index d8da06d109a5..3692e50dc38f 100644 --- a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C +++ b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C @@ -96,6 +96,32 @@ LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( } } +Real +LibtorchArtificialNeuralNet::determineGain(const std::string & activation) +{ + if (activation == "relu") + return std::sqrt(2); + if (activation == "tanh") + return 5.0/3.0; + + return 1.0; +} + +void +LibtorchArtificialNeuralNet::initializeNeuralNetwork() +{ + for (unsigned int i = 0; i < numHiddenLayers(); ++i) + { + const auto & activation = _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0]; + const Real gain = determineGain(activation); + torch::nn::init::orthogonal_(_weights[i]->weight, gain); + torch::nn::init::zeros_(_weights[i]->bias); + } + + torch::nn::init::orthogonal_(_weights.back()->weight); + torch::nn::init::zeros_(_weights.back()->bias); +} + void LibtorchArtificialNeuralNet::constructNeuralNetwork() { diff --git a/framework/src/linearfvbcs/LinearFVAdvectionDiffusionFunctorDirichletBC.C b/framework/src/linearfvbcs/LinearFVAdvectionDiffusionFunctorDirichletBC.C index 8d799da2e592..c30480d2c51c 100644 --- a/framework/src/linearfvbcs/LinearFVAdvectionDiffusionFunctorDirichletBC.C +++ b/framework/src/linearfvbcs/LinearFVAdvectionDiffusionFunctorDirichletBC.C @@ -57,6 +57,8 @@ LinearFVAdvectionDiffusionFunctorDirichletBC::computeBoundaryValueMatrixContribu Real LinearFVAdvectionDiffusionFunctorDirichletBC::computeBoundaryValueRHSContribution() const { + // if (name() == "gap_x") + // std::cout << name() << std::endl; // Fetch the boundary value from the provided functor. return computeBoundaryValue(); } diff --git a/framework/src/postprocessors/LiftDragRewardPostprocessor.C b/framework/src/postprocessors/LiftDragRewardPostprocessor.C index 386bd469ec50..00a1f0f8b12f 100644 --- a/framework/src/postprocessors/LiftDragRewardPostprocessor.C +++ b/framework/src/postprocessors/LiftDragRewardPostprocessor.C @@ -44,8 +44,7 @@ LiftDragRewardPostprocessor::LiftDragRewardPostprocessor(const InputParameters & _avg_lift(0.0), _avg_drag(0.0), _lift_history(std::vector(_averaging_window,0.0)), - _drag_history(std::vector(_averaging_window,0.0)), - _replace_counter(0) + _drag_history(std::vector(_averaging_window,0.0)) { } @@ -59,15 +58,26 @@ LiftDragRewardPostprocessor::getValue() const void LiftDragRewardPostprocessor::execute() { - auto rolling_index = _replace_counter % _averaging_window; - auto normalization = std::min(_replace_counter + 1, _averaging_window); + auto rolling_index = _t_step % _averaging_window; + // std::cout << " Rolling index " << rolling_index << std::endl; // std::cout << "Lift" << _lift << " drag " << _drag << std::endl; _lift_history[rolling_index] = _lift; _drag_history[rolling_index] = _drag; - _avg_lift = std::reduce(_lift_history.begin(), _lift_history.end())/normalization; - _avg_drag = std::reduce(_drag_history.begin(), _drag_history.end())/normalization; + // std::cout << Moose::stringify(_lift_history) << std::endl; + // std::cout << Moose::stringify(_drag_history) << std::endl; + + if (!rolling_index) + { + const auto normalization = _t_step ? _averaging_window : 1; + // std::cout << Moose::stringify(_lift_history) << std::endl; + // std::cout << Moose::stringify(_drag_history) << std::endl; + _avg_lift = std::reduce(_lift_history.begin(), _lift_history.end())/normalization; + _avg_drag = std::reduce(_drag_history.begin(), _drag_history.end())/normalization; + _lift_history = std::vector(_averaging_window,0.0); + _drag_history = std::vector(_averaging_window,0.0); + } _replace_counter++; } diff --git a/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i b/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i index 7a2a99cc9a0e..eb4c81d9b29c 100644 --- a/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i +++ b/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i @@ -241,9 +241,9 @@ lift = lift_coeff drag = drag_coeff averaging_window = 50 - coeff_1 = 0.159 - coeff_2 = 0.25 - execute_on = 'TIMESTEP_END' + coeff_1 = 0.0 + coeff_2 = 0.2 + execute_on = 'INITIAL TIMESTEP_END' [] # [p1] # type = PointValue @@ -400,7 +400,7 @@ print_fields = false continue_on_max_its = true dt = 0.0005 - num_steps = 800 + num_steps = 2000 [] [Outputs] diff --git a/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv_single.i b/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv_single.i new file mode 100644 index 000000000000..7cafe723a903 --- /dev/null +++ b/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv_single.i @@ -0,0 +1,426 @@ +!include header.i + +[Mesh] + [fmg] + type = FileMeshGenerator + file = flow_over_circle_linearfv_out_orig.e + use_for_exodus_restart = true + [] +[] + +[Problem] + linear_sys_names = 'u_system v_system pressure_system' + previous_nl_solution_required = true +[] + +[Functions] + [inlet_function] + type = ParsedFunction + expression = '4*U*(y-ymin)*(ymax-y)/(ymax-ymin)/(ymax-ymin)' + symbol_names = 'U ymax ymin' + symbol_values = '${inlet_velocity} ${y_max} ${y_min}' + [] + [gap_x] + type = ParsedFunction + expression = 'Q*x/(x^2+y^2)*abs(cos(pi/(20/180*pi)*atan(x/y)))' + symbol_names = 'Q' + symbol_values = 'Q_signal' + [] + [gap_y] + type = ParsedFunction + expression = 'if(y>0,Q,-Q)*y/(x^2+y^2)*abs(cos(pi/(20/180*pi)*atan(x/y)))' + symbol_names = 'Q' + symbol_values = 'Q_signal' + [] +[] + +[UserObjects] + [rc] + type = RhieChowMassFlux + u = vel_x + v = vel_y + pressure = pressure + rho = ${rho} + p_diffusion_kernel = p_diffusion + [] +[] + +[Variables] + [vel_x] + type = MooseLinearVariableFVReal + solver_sys = u_system + initial_from_file_var = vel_x + initial_from_file_timestep = LATEST + [] + [vel_y] + type = MooseLinearVariableFVReal + solver_sys = v_system + initial_from_file_var = vel_y + initial_from_file_timestep = LATEST + [] + [pressure] + type = MooseLinearVariableFVReal + # initial_condition = 0 + solver_sys = pressure_system + initial_from_file_var = pressure + initial_from_file_timestep = LATEST + [] +[] + +[LinearFVKernels] + [u_time] + type = LinearFVTimeDerivative + variable = vel_x + factor = ${rho} + [] + [u_advection_stress] + type = LinearWCNSFVMomentumFlux + variable = vel_x + advected_interp_method = ${advected_interp_method} + mu = ${mu} + u = vel_x + v = vel_y + momentum_component = 'x' + rhie_chow_user_object = 'rc' + use_nonorthogonal_correction = true + [] + [u_pressure] + type = LinearFVMomentumPressure + variable = vel_x + pressure = pressure + momentum_component = 'x' + [] + + [v_time] + type = LinearFVTimeDerivative + variable = vel_y + factor = ${rho} + [] + [v_advection_stress] + type = LinearWCNSFVMomentumFlux + variable = vel_y + advected_interp_method = ${advected_interp_method} + mu = ${mu} + u = vel_x + v = vel_y + momentum_component = 'y' + rhie_chow_user_object = 'rc' + use_nonorthogonal_correction = true + [] + [v_pressure] + type = LinearFVMomentumPressure + variable = vel_y + pressure = pressure + momentum_component = 'y' + [] + + [p_diffusion] + type = LinearFVAnisotropicDiffusion + variable = pressure + diffusion_tensor = Ainv + use_nonorthogonal_correction = true + [] + [HbyA_divergence] + type = LinearFVDivergence + variable = pressure + face_flux = HbyA + force_boundary_execution = true + [] +[] + +[LinearFVBCs] + [inlet_x] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_x + boundary = 'left_boundary' + functor = 'inlet_function' + [] + [inlet_y] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_y + boundary = 'left_boundary' + functor = 0 + [] + [circle_x] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_x + boundary = 'circle' + functor = 0 + [] + [circle_y] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_y + boundary = 'circle' + functor = 0 + [] + [gap_x] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_x + boundary = 'top_gap bottom_gap' + functor = 'gap_x' + [] + [gap_y] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_y + boundary = 'top_gap bottom_gap' + functor = 'gap_y' + [] + [walls_x] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_x + boundary = 'top_boundary bottom_boundary' + functor = 0 + [] + [walls_y] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_y + boundary = 'top_boundary bottom_boundary' + functor = 0 + [] + [outlet_p] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + boundary = 'right_boundary' + variable = pressure + functor = 0 + [] + [outlet_u] + type = LinearFVAdvectionDiffusionOutflowBC + variable = vel_x + use_two_term_expansion = false + boundary = 'right_boundary' + [] + [outlet_v] + type = LinearFVAdvectionDiffusionOutflowBC + variable = vel_y + use_two_term_expansion = false + boundary = 'right_boundary' + [] +[] + +[Postprocessors] + [drag_force] + type = IntegralDirectedSurfaceForce + vel_x = vel_x + vel_y = vel_y + mu = ${mu} + pressure = pressure + principal_direction = '1 0 0' + boundary = 'circle' + outputs = none + execute_on = 'INITIAL TIMESTEP_END' + [] + [drag_coeff] + type = ParsedPostprocessor + expression = '2*drag_force/rho/(avgvel*avgvel)/D' + constant_names = 'rho avgvel D' + constant_expressions = '${rho} ${fparse 2/3*inlet_velocity} ${fparse 2*circle_radius}' + pp_names = 'drag_force' + execute_on = 'INITIAL TIMESTEP_END' + [] + [lift_force] + type = IntegralDirectedSurfaceForce + vel_x = vel_x + vel_y = vel_y + mu = ${mu} + pressure = pressure + principal_direction = '0 1 0' + boundary = 'circle' + outputs = none + execute_on = 'INITIAL TIMESTEP_END' + [] + [lift_coeff] + type = ParsedPostprocessor + expression = '2*lift_force/rho/(avgvel*avgvel)/D' + constant_names = 'rho avgvel D' + constant_expressions = '${rho} ${fparse 2/3*inlet_velocity} ${fparse 2*circle_radius}' + pp_names = 'lift_force' + execute_on = 'INITIAL TIMESTEP_END' + [] + [reward] + type = LiftDragRewardPostprocessor + lift = lift_coeff + drag = drag_coeff + averaging_window = 50 + coeff_1 = 0.0 + coeff_2 = 0.2 + execute_on = 'INITIAL TIMESTEP_END' + [] + # [p1] + # type = PointValue + # variable = pressure + # point = '0 0.07 0.0' + # execute_on = 'INITIAL TIMESTEP_END' + # [] + # [p2] + # type = PointValue + # variable = pressure + # point = '0 -0.07 0.0' + # execute_on = 'INITIAL TIMESTEP_END' + # [] + # [p3] + # type = PointValue + # variable = pressure + # point = '0.075 0.1 0.0' + # execute_on = 'INITIAL TIMESTEP_END' + # [] + # [p4] + # type = PointValue + # variable = pressure + # point = '0.075 0.0 0.0' + # execute_on = 'INITIAL TIMESTEP_END' + # [] + # [p5] + # type = PointValue + # variable = pressure + # point = '0.075 -0.1 0.0' + # execute_on = 'INITIAL TIMESTEP_END' + # [] + [p1x] + type = PointValue + variable = vel_x + point = '0 0.07 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p2x] + type = PointValue + variable = vel_x + point = '0 -0.07 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p3x] + type = PointValue + variable = vel_x + point = '0.075 0.1 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p4x] + type = PointValue + variable = vel_x + point = '0.075 0.0 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p5x] + type = PointValue + variable = vel_x + point = '0.075 -0.1 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p1y] + type = PointValue + variable = vel_y + point = '0 0.07 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p2y] + type = PointValue + variable = vel_y + point = '0 -0.07 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p3y] + type = PointValue + variable = vel_y + point = '0.075 0.1 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p4y] + type = PointValue + variable = vel_y + point = '0.075 0.0 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p5y] + type = PointValue + variable = vel_y + point = '0.075 -0.1 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [Q_signal] + type = ConstantPostprocessor + value = 0.0 + execute_on = TIMESTEP_BEGIN + [] + [Q] + type = LibtorchControlValuePostprocessor + control_name = src_control + [] + [log_prob_Q] + type = LibtorchDRLLogProbabilityPostprocessor + control_name = src_control + [] +[] + +[Reporters] + [results] + type = AccumulateReporter + reporters = 'p1x/value p2x/value p3x/value p4x/value p5x/value p1y/value p2y/value p3y/value p4y/value p5y/value reward/value Q/value log_prob_Q/value' + [] +[] + +[Controls] + [src_control] + type = LibtorchDRLControl + parameters = "Postprocessors/Q_signal/value" + responses = 'p1x p2x p3x p4x p5x p1y p2y p3y p4y p5y' + + # keep consistent with LibtorchDRLControlTrainer + input_timesteps = 1 + response_shift_factors = '1.98 1.825 2.015 0.03 1.9 0.58 -0.425 0.06 0.12 -0.02' + response_scaling_factors = '1.47 1.03 2.60 3.45 2.0 1.19 1.6 2.7 1.47 2.08' + action_scaling_factors = 1.0 + + filename = "control.net" + + num_neurons_per_layer = '512 512' + activation_function = 'tanh tanh' + + min_control_value = ${fparse -0.108} + max_control_value = ${fparse 0.108} + + action_standard_deviations = '0.1' + + # response_scaling_factors = '1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0' + # response_shift_factors = '0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0' + # action_scaling_factors = 1.0 + + execute_on = 'TIMESTEP_BEGIN' + smoother = 0.1 + num_stems_in_period = 50 + + deterministic = false + [] +[] + +[Executioner] + type = PIMPLE + momentum_l_abs_tol = 1e-7 + pressure_l_abs_tol = 1e-7 + momentum_l_tol = 1e-7 + pressure_l_tol = 1e-7 + rhie_chow_user_object = 'rc' + momentum_systems = 'u_system v_system' + pressure_system = 'pressure_system' + momentum_equation_relaxation = 0.9 + pressure_variable_relaxation = 0.6 + num_iterations = 100 + pressure_absolute_tolerance = 5e-6 + momentum_absolute_tolerance = 5e-6 + momentum_petsc_options_iname = '-pc_type -pc_hypre_type' + momentum_petsc_options_value = 'hypre boomeramg' + pressure_petsc_options_iname = '-pc_type -pc_hypre_type' + pressure_petsc_options_value = 'hypre boomeramg' + print_fields = false + continue_on_max_its = true + dt = 0.0005 + num_steps = 2000 +[] + +[Outputs] + exodus = true + [json] + type = JSON + execute_on = final + [] + # console = false + # execute_on = FINAL +[] diff --git a/modules/combined/examples/stochastic/vortex_control/trainer.i b/modules/combined/examples/stochastic/vortex_control/trainer.i index 92f1f24eda76..a7310c0a0456 100644 --- a/modules/combined/examples/stochastic/vortex_control/trainer.i +++ b/modules/combined/examples/stochastic/vortex_control/trainer.i @@ -4,9 +4,9 @@ [Samplers] [dummy] type = CartesianProduct - linear_space_items = '0 0.01 1' - min_procs_per_row = 30 - max_procs_per_row = 30 + linear_space_items = '0 0.01 5' + min_procs_per_row = 20 + max_procs_per_row = 20 [] [] @@ -16,8 +16,8 @@ sampler = dummy input_files = 'flow_over_circle_linearfv.i' mode = batch-reset - min_procs_per_app = 30 - max_procs_per_app = 30 + min_procs_per_app = 20 + max_procs_per_app = 20 [] [] @@ -49,14 +49,15 @@ log_probability = 'storage/r_transfer:results:log_prob_Q:value' reward = 'storage/r_transfer:results:reward:value' - num_epochs = 25 - update_frequency = 10 - decay_factor = 0.995 + num_epochs = 50 + update_frequency = 2 + decay_factor = 0.99 + lambda_factor = 0.97 loss_print_frequency = 1 critic_learning_rate = 0.001 - num_critic_neurons_per_layer = '64 64' + num_critic_neurons_per_layer = '128 128' critic_activation_functions = 'relu relu' control_learning_rate = 0.001 @@ -75,7 +76,7 @@ # response_scaling_factors = '1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0' # response_shift_factors = '0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0' - action_standard_deviations = '0.01' + action_standard_deviations = '0.1' standardize_advantage = true @@ -84,10 +85,11 @@ # min_control_value = ${fparse -0.108} # max_control_value = ${fparse 0.108} - min_control_value = ${fparse -0.080} - max_control_value = ${fparse 0.080} + min_control_value = ${fparse -0.108} + max_control_value = ${fparse 0.108} - batch_size = 1600 + batch_size = 160 + timestep_window = 50 [] [] diff --git a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h index 66fd4446c8c6..4996150c293e 100644 --- a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h +++ b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h @@ -41,6 +41,8 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl virtual void loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn) override; + virtual void loadControlNeuralNetFromFile(const InputParameters & parameters) override; + protected: /** * Function which computes the logarithmic probability of given actions. @@ -58,11 +60,13 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl std::vector _previous_control_signal; std::vector _current_smoothed_signal; - Moose::LibtorchActorNeuralNet * _actor_nn; + std::shared_ptr _actor_nn; unsigned int _call_counter; const unsigned int _num_steps_in_period; const Real _smoother; + const bool _deterministic; + }; #endif diff --git a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h index 8f1f39777251..b723e5d469b3 100644 --- a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h +++ b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h @@ -56,10 +56,20 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase * @param tensor_data The tensor where we would like to save the results * @param detach If the gradient info needs to be detached from the tensor */ - void convertDataToTensor(std::vector> & vector_data, + void convertDataToTensor(std::vector>> & vector_data, torch::Tensor & tensor_data, const bool detach = false); + /** + * Function to convert input/output data from std::vector to torch::tensor + * @param vector_data The input data in vector-vectors format + * @param tensor_data The tensor where we would like to save the results + * @param detach If the gradient info needs to be detached from the tensor + */ + void convertDataToTensor(std::vector> & vector_data, + torch::Tensor & tensor_data, + const bool detach = false); + /** * Function which evaluates the critic to get the value (discounter reward) * @param input The observation values (responses) @@ -77,31 +87,32 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase torch::Tensor evaluateAction(torch::Tensor & input, torch::Tensor & output); /// Compute the return value by discounting the rewards and summing them - void computeRewardToGo(std::vector & data, - const std::vector> * const reporter_link); + void computeReturn(std::vector> & data, + const std::vector> & reward, + const Real decay_factor); /// Reset data after updating the neural network void resetData(); /// Response reporter names - const std::vector _response_names; + const std::vector _state_names; /// Pointers to the current values of the responses /// We can have multiple responses, multiple samples, multiple timesteps - std::vector> *> _response_value_pointers; + std::vector> *> _state_value_pointers; /// Shifting constants for the responses - const std::vector _response_shift_factors; + const std::vector _state_shift_factors; /// Scaling constants for the responses - const std::vector _response_scaling_factors; + const std::vector _state_scaling_factors; /// Control reporter names - const std::vector _control_names; + const std::vector _action_names; /// Pointers to the current values of the control signals /// We can have multiple control signals, multiple samples, multiple timesteps - std::vector> *> _control_value_pointers; + std::vector> *> _action_value_pointers; /// Log probability reporter names const std::vector _log_probability_names; @@ -126,16 +137,18 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase unsigned int _num_outputs; ///@{ - /// The gathered data from the reporters, each row represents one QoI, each column represents one time step - std::vector> _input_data; - std::vector> _output_data; - std::vector> _log_probability_data; + std::vector>> _state_data; + std::vector>> _next_state_data; + std::vector>> _action_data; + std::vector>> _log_probability_data; ///@} ///@{ /// The reward and return data. The return is calculated using the _reward_data - std::vector _reward_data; - std::vector _return_data; + std::vector> _reward_data; + std::vector> _return_data; + std::vector> _delta_data; + std::vector> _gae_data; ///@} /// Number of epochs for the training of the emulator @@ -161,6 +174,7 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase /// Decaying factor that is used when calculating the return from the reward const Real _decay_factor; + const Real _lambda_factor; /// Standard deviation for the actions const std::vector _action_std; @@ -199,15 +213,18 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase std::shared_ptr _critic_nn; /// Torch::tensor version of the input and action data - torch::Tensor _input_tensor; - torch::Tensor _output_tensor; + torch::Tensor _state_tensor; + torch::Tensor _next_state_tensor; + torch::Tensor _action_tensor; + torch::Tensor _gae_tensor; torch::Tensor _return_tensor; + torch::Tensor _delta_tensor; torch::Tensor _log_probability_tensor; -private: - - torch::Tensor gaussianEntropy(const torch::Tensor std); + std::unique_ptr _actor_optimizer; + std::unique_ptr _critic_optimizer; +private: /** * Extract the response values from the postprocessors of the controlled system. * This assumes that they are stored in an AccumulateReporter @@ -215,7 +232,8 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase * @param reporter_names The names of the reporters which need to be extracted * @param num_timesteps The number of timesteps we want to use for training */ - void getResponseDataFromReporter(std::vector> & data, + void getResponseDataFromReporter(std::vector>> & data, + std::vector>> & next_data, const std::vector> *> & reporter_links, const unsigned int num_timesteps); /** @@ -224,10 +242,16 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase * @param data The data where we would like to store the output values * @param reporter_names The names of the reporters which need to be extracted */ - void getSignalDataFromReporter(std::vector> & data, + void getSignalDataFromReporter(std::vector>> & data, const std::vector> *> & reporter_links); - void normalizeResponseData(std::vector> & data, const unsigned int num_reporters, const unsigned int num_timesteps); + void computeCumulativeRewardEstimate(std::vector> & data, + std::vector>> & state, + std::vector>> & next_state, + std::vector> & reward); + + void normalizeResponseData(std::vector>> & data, + const unsigned int num_timesteps); /** * Extract the reward values from the postprocessors of the controlled system @@ -235,7 +259,7 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase * @param data The data where we would like to store the reward values * @param reporter_names The name of the reporter which need to be extracted */ - void getRewardDataFromReporter(std::vector & data, + void getRewardDataFromReporter(std::vector> & data, const std::vector> * const reporter_link); /// Getting reporter pointers with given names @@ -244,6 +268,8 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase /// Counter for number of transient simulations that have been run before updating the controller unsigned int _update_counter; + + unsigned int _timestep_window; }; #endif diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C index 9d68f1d789d6..e96300c5d997 100644 --- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C +++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C @@ -34,6 +34,14 @@ LibtorchDRLControl::validParams() params.addParam("num_stems_in_period", 1, "Blabla"); params.addParam("smoother", 1.0, "Blabla"); + params.addParam("deterministic", false, "Blabla"); + + params.addRequiredParam>( + "action_standard_deviations", "Standard deviation value used while sampling the actions."); + + params.addParam>("min_control_value", {}, "The minimum values of the control signal."); + params.addParam>("max_control_value", {}, "The maximum calue of the control signal."); + return params; } @@ -44,18 +52,61 @@ LibtorchDRLControl::LibtorchDRLControl(const InputParameters & parameters) _current_smoothed_signal(std::vector(_control_names.size(), 0.0)), _call_counter(0), _num_steps_in_period(getParam("num_stems_in_period")), - _smoother(getParam("smoother")) + _smoother(getParam("smoother")), + _deterministic(getParam("deterministic")) { // Fixing the RNG seed to make sure every experiment is the same. if (isParamValid("seed")) torch::manual_seed(getParam("seed")); + + loadControlNeuralNetFromFile(parameters); +} + +void +LibtorchDRLControl::loadControlNeuralNetFromFile(const InputParameters & parameters) +{ + const auto & filename = getParam("filename"); + if (getParam("torch_script_format")) + _nn = std::make_shared(filename); + else + { + unsigned int num_inputs = _response_names.size() * _input_timesteps; + unsigned int num_outputs = _control_names.size(); + std::vector num_neurons_per_layer = + getParam>("num_neurons_per_layer"); + std::vector activation_functions = + parameters.isParamSetByUser("activation_function") + ? getParam>("activation_function") + : std::vector({"relu"}); + + const std::vector & minimum_values = getParam>("min_control_value"); + const std::vector & maximum_values = getParam>("max_control_value"); + const std::vector & action_std = getParam>("action_standard_deviations"); + + auto nn = std::make_shared( + filename, num_inputs, num_outputs, num_neurons_per_layer, action_std, activation_functions, minimum_values, maximum_values); + + try + { + torch::load(nn, filename); + _actor_nn =std::make_shared(*nn); + } + catch (const c10::Error & e) + { + mooseError( + "The requested pytorch parameter file could not be loaded. This can either be the" + "result of the file not existing or a misalignment in the generated container and" + "the data in the file. Make sure the dimensions of the generated neural net are the" + "same as the dimensions of the parameters in the input file!\n", + e.msg()); + } + } } void LibtorchDRLControl::execute() { - // std::cout << _nn << " " << (_current_execute_flag == EXEC_TIMESTEP_BEGIN) << std::endl; - if (_nn) + if (_actor_nn) { unsigned int n_controls = _control_names.size(); unsigned int num_old_timesteps = _input_timesteps - 1; @@ -145,24 +196,31 @@ LibtorchDRLControl::execute() // std::cout << "Std" << _actor_nn->stdTensor() << std::endl; // std::cout << "Input" << input_tensor << std::endl; // Evaluate the neural network to get the expected control value - torch::Tensor action = _actor_nn->evaluate(input_tensor, true); + torch::Tensor action = _actor_nn->evaluate(input_tensor, _deterministic); // std::cout << "in za control " << action << std::endl; // Compute log probability - torch::Tensor log_probability = _actor_nn->logProbability(action); _current_control_signals = {action.data_ptr(), action.data_ptr() + action.size(1)}; + if (_call_counter == 0) + _current_smoothed_signal = _current_control_signals; + // std::cout << "Computing control signal to: " << Moose::stringify(_current_control_signals) << std::endl; // for (const auto i : index_range(_current_control_signals)) // _current_control_signals[i] = std::min(std::max(_current_control_signals[i], _minimum_actions[i]), _maximum_actions[i]); - _current_control_signal_log_probabilities = {log_probability.data_ptr(), + if (!_deterministic) + { + torch::Tensor log_probability = _actor_nn->logProbability(action); + + _current_control_signal_log_probabilities = {log_probability.data_ptr(), log_probability.data_ptr() + log_probability.size(1)}; - // std::cout << "Logprob: " << Moose::stringify(_current_control_signal_log_probabilities) << std::endl; + // std::cout << "Logprob: " << Moose::stringify(_current_control_signal_log_probabilities) << std::endl; + } } @@ -206,7 +264,7 @@ LibtorchDRLControl::loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNe if (!check) mooseError("This needs to be a LibtorchActorNeuralNet!"); _nn = std::make_shared(*check); - _actor_nn = dynamic_cast(_nn.get()); + _actor_nn = std::make_shared(*check); } Real diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index d1f5c8ff93d0..7419ed534bab 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -107,6 +107,12 @@ LibtorchDRLControlTrainer::validParams() "Decay factor for calculating the return. This accounts for decreased " "reward values from the later steps."); + params.addRangeCheckedParam( + "lambda_factor", + 1.0, + "0.0<=lambda_factor<=1.0", + "GAE lambda."); + params.addParam( "read_from_file", false, "Switch to read the neural network parameters from a file."); params.addParam( @@ -124,28 +130,32 @@ LibtorchDRLControlTrainer::validParams() params.addParam("batch_size", 100, "Batch size"); params.addParam>("min_control_value", {}, "The minimum values of the control signal."); params.addParam>("max_control_value", {}, "The maximum calue of the control signal."); + + params.addParam("timestep_window", 1, "Data acquisition timesteps (every nth)"); + return params; } LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & parameters) : SurrogateTrainerBase(parameters), - _response_names(getParam>("response")), - _response_shift_factors(isParamValid("response_shift_factors") + _state_names(getParam>("response")), + _state_shift_factors(isParamValid("response_shift_factors") ? getParam>("response_shift_factors") - : std::vector(_response_names.size(), 0.0)), - _response_scaling_factors(isParamValid("response_scaling_factors") + : std::vector(_state_names.size(), 0.0)), + _state_scaling_factors(isParamValid("response_scaling_factors") ? getParam>("response_scaling_factors") - : std::vector(_response_names.size(), 1.0)), - _control_names(getParam>("control")), + : std::vector(_state_names.size(), 1.0)), + _action_names(getParam>("control")), _log_probability_names(getParam>("log_probability")), _reward_name(getParam("reward")), _reward_value_pointer(&getReporterValueByName>>(_reward_name)), _input_timesteps(getParam("input_timesteps")), - _num_inputs(_input_timesteps * _response_names.size()), - _num_outputs(_control_names.size()), - _input_data(std::vector>(_num_inputs)), - _output_data(std::vector>(_num_outputs)), - _log_probability_data(std::vector>(_num_outputs)), + _num_inputs(_input_timesteps * _state_names.size()), + _num_outputs(_action_names.size()), + _state_data(std::vector>>(_num_inputs)), + _next_state_data(std::vector>>(_num_inputs)), + _action_data(std::vector>>(_num_outputs)), + _log_probability_data(std::vector>>(_num_outputs)), _num_epochs(getParam("num_epochs")), _num_critic_neurons_per_layer( getParam>("num_critic_neurons_per_layer")), @@ -156,6 +166,7 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _update_frequency(getParam("update_frequency")), _clip_param(getParam("clip_parameter")), _decay_factor(getParam("decay_factor")), + _lambda_factor(getParam("lambda_factor")), _action_std(getParam>("action_standard_deviations")), _filename_base(isParamValid("filename_base") ? getParam("filename_base") : ""), _read_from_file(getParam("read_from_file")), @@ -165,20 +176,21 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _loss_print_frequency(getParam("loss_print_frequency")), _min_values(getParam>("min_control_value")), _max_values(getParam>("max_control_value")), - _update_counter(_update_frequency) + _update_counter(_update_frequency), + _timestep_window(getParam("timestep_window")) { - if (_response_names.size() != _response_shift_factors.size()) + if (_state_names.size() != _state_shift_factors.size()) paramError("response_shift_factors", "The number of shift factors is not the same as the number of responses!"); - if (_response_names.size() != _response_scaling_factors.size()) + if (_state_names.size() != _state_scaling_factors.size()) paramError( "response_scaling_factors", "The number of normalization coefficients is not the same as the number of responses!"); // We establish the links with the chosen reporters - getReporterPointers(_response_names, _response_value_pointers); - getReporterPointers(_control_names, _control_value_pointers); + getReporterPointers(_state_names, _state_value_pointers); + getReporterPointers(_action_names, _action_value_pointers); getReporterPointers(_log_probability_names, _log_probability_value_pointers); // Fixing the RNG seed to make sure every experiment is the same. @@ -223,6 +235,11 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _num_critic_neurons_per_layer, getParam>("critic_activation_functions")); + _actor_optimizer = std::make_unique(_control_nn->parameters(), + torch::optim::AdamOptions(_control_learning_rate)); + _critic_optimizer = std::make_unique(_critic_nn->parameters(), + torch::optim::AdamOptions(_critic_learning_rate)); + // We read parameters for the critic neural net if it is requested if (_read_from_file) { @@ -239,37 +256,167 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par } else if (filename_valid) torch::save(_critic_nn, _critic_nn->name()); + + // Define the optimizers for the training + // torch::optim::Adam actor_optimizer(_control_nn->parameters(), + // torch::optim::AdamOptions(_control_learning_rate)); + + // torch::optim::Adam critic_optimizer(_critic_nn->parameters(), + // torch::optim::AdamOptions(_critic_learning_rate)); + + // auto obs = torch::zeros({4,10}, torch::TensorOptions().dtype(torch::kFloat64)); + // for (int i : make_range(10)) + // for (int j : make_range(4)) + // obs.index_put_({j, i}, j+0.1*(i+1)); + + // auto action = torch::zeros({4,1}, torch::TensorOptions().dtype(torch::kFloat64)); + // for (int j : make_range(4)) + // action.index_put_({j, 0}, 0.01+j*0.005); + + // auto log_prob = torch::zeros({4,1}, torch::TensorOptions().dtype(torch::kFloat64)); + // for (int j : make_range(4)) + // log_prob.index_put_({j, 0}, 2.3-j*0.2); + + // auto reward = torch::zeros({4,1}, torch::TensorOptions().dtype(torch::kFloat64)); + // for (int j : make_range(4)) + // reward.index_put_({j, 0}, -2.9-j*0.1); + + // auto ret = torch::zeros({4,1}, torch::TensorOptions().dtype(torch::kFloat64)); + // Real v = 0.0; + // for (int j : make_range(4)) + // { + // v = reward.index({3-j, 0}).item()+0.95*v; + // ret.index_put_({3-j, 0}, v); + // } + + // std::cout << "states" << std::endl; + // std::cout << obs << std::endl; + // std::cout << "actions" << std::endl; + // std::cout << action << std::endl; + // std::cout << "logprobs" << std::endl; + // std::cout << log_prob << std::endl; + // std::cout << "reward" << std::endl; + // std::cout << reward << std::endl; + // std::cout << "return" << std::endl; + // std::cout << ret << std::endl; + + // auto value = evaluateValue(obs).detach(); + + // std::cout << "evaluate V" << std::endl; + // std::cout << value << std::endl; + + // auto advantage = ret - value; + + // std::cout << "advantage" << std::endl; + // std::cout << advantage << std::endl; + + // // Get the approximate return from the neural net again (this one does have an associated + // // gradient) + // value = evaluateValue(obs); + + // auto new_action = _control_nn->evaluate(obs, true); + + // std::cout << "new action" << std::endl; + // std::cout << new_action << std::endl; + + // // std::cout << "new action " << new_action << std::endl; + // // Get the approximate logarithmic action probability using the control neural net + // auto curr_log_probability = _control_nn->logProbability(action); + + // // std::cout << "log probability " << curr_log_probability << std::endl; + + // // Prepare the ratio by using the e^(logx-logy)=x/y expression + // auto ratio = (curr_log_probability - log_prob).exp(); + + // std::cout << "ratio" << std::endl; + // std::cout << ratio << std::endl; + + // // Use clamping for limiting + // auto surr1 = ratio * advantage; + // auto surr2 = torch::clamp(ratio, 1.0 - _clip_param, 1.0 + _clip_param) * advantage; + + // // Compute loss values for the critic and the control neural net + // auto actor_loss = -(torch::min(surr1, surr2) + 0.01*_control_nn->entropy()).mean(); + // auto critic_loss = torch::mse_loss(value, ret); + + // std::cout << "actor loss" << std::endl; + // std::cout << actor_loss << std::endl; + + // std::cout << "critic loss" << std::endl; + // std::cout << critic_loss << std::endl; + + // // Update the weights in the neural nets + // actor_optimizer.zero_grad(); + // actor_loss.backward(); + // actor_optimizer.step(); + + // critic_optimizer.zero_grad(); + // critic_loss.backward(); + // critic_optimizer.step(); + + _control_nn->initializeNeuralNetwork(); + + // std::cout << "Control NN" << std::endl; + // const auto & control_params = _control_nn->named_parameters(); + // for (const auto & param_i : make_range(control_params.size())) + // { + // // We cast the parameters into a 1D vector + // std::cout << Moose::stringify(std::vector( + // control_params[param_i].value().data_ptr(), + // control_params[param_i].value().data_ptr() +control_params[param_i].value().numel())) << std::endl; + // } + + _critic_nn->initializeNeuralNetwork(); + + // std::cout << "Critic NN" << std::endl; + // const auto & critic_params = _critic_nn->named_parameters(); + // for (const auto & param_i : make_range(critic_params.size())) + // { + // // We cast the parameters into a 1D vector + // std::cout << Moose::stringify(std::vector( + // critic_params[param_i].value().data_ptr(), + // critic_params[param_i].value().data_ptr() + critic_params[param_i].value().numel())) << std::endl; + // } + + // mooseError("Bazinga"); } void LibtorchDRLControlTrainer::execute() { // Extract data from the reporters - getResponseDataFromReporter(_input_data, _response_value_pointers, _input_timesteps); - getSignalDataFromReporter(_output_data, _control_value_pointers); + getResponseDataFromReporter(_state_data, _next_state_data, _state_value_pointers, _input_timesteps); + getSignalDataFromReporter(_action_data, _action_value_pointers); getSignalDataFromReporter(_log_probability_data, _log_probability_value_pointers); getRewardDataFromReporter(_reward_data, _reward_value_pointer); - // Calculate return from the reward (discounting the reward) - computeRewardToGo(_return_data, _reward_value_pointer); - _update_counter--; // Only update the NNs when if (_update_counter == 0) { + // Calculate return from the reward (discounting the reward) + computeReturn(_return_data, _reward_data, _decay_factor); + // We compute the average reward first computeAverageEpisodeReward(); - normalizeResponseData(_input_data, _response_value_pointers.size(), _input_timesteps); + normalizeResponseData(_state_data, _input_timesteps); + normalizeResponseData(_next_state_data, _input_timesteps); + + computeCumulativeRewardEstimate(_delta_data, _state_data, _next_state_data, _reward_data); + + computeReturn(_gae_data, _delta_data, _decay_factor*_lambda_factor); // Transform input/output/return data to torch::Tensor - convertDataToTensor(_input_data, _input_tensor); - convertDataToTensor(_output_data, _output_tensor); + convertDataToTensor(_state_data, _state_tensor); + convertDataToTensor(_next_state_data, _next_state_tensor); + convertDataToTensor(_action_data, _action_tensor); convertDataToTensor(_log_probability_data, _log_probability_tensor); // Discard (detach) the gradient info for return data - LibtorchUtils::vectorToTensor(_return_data, _return_tensor, true); + convertDataToTensor(_return_data, _return_tensor, true); + convertDataToTensor(_gae_data, _gae_tensor, true); // We train the controller using the emulator to get a good control strategy trainController(); @@ -283,46 +430,103 @@ void LibtorchDRLControlTrainer::computeAverageEpisodeReward() { if (_reward_data.size()) - _average_episode_reward = - std::accumulate(_reward_data.begin(), _reward_data.end(), 0.0) / _reward_data.size(); + { + _average_episode_reward = 0.0; + unsigned int combined_sizes = 0; + for (const auto & sample : _reward_data) + { + _average_episode_reward += + std::accumulate(sample.begin(), sample.end(), 0.0); + combined_sizes += sample.size(); + } + _average_episode_reward = _average_episode_reward/combined_sizes; + } else _average_episode_reward = 0.0; } void -LibtorchDRLControlTrainer::computeRewardToGo(std::vector & data, - const std::vector> * const reporter_link) +LibtorchDRLControlTrainer::computeReturn(std::vector> & data, + const std::vector> & reward, + const Real decay_factor) { - // Get reward data from one simulation - std::vector reward_data_per_sim; - std::vector return_data_per_sim; - getRewardDataFromReporter(reward_data_per_sim, reporter_link); - // Discount the reward to get the return value, we need this to be able to anticipate // rewards based on the current behavior. We go backwards in samples and backwards in // accumulation. - unsigned int reward_i = reward_data_per_sim.size(); - for (const auto sample_i : index_range(*reporter_link)) + for (const auto sample_i : index_range(reward)) { - const auto backward_sample_i = reporter_link->size() - sample_i - 1; + std::vector sample_return; Real discounted_reward(0.0); - const auto history_size = (*reporter_link)[backward_sample_i].size() - _shift_outputs; - - for (const auto i : make_range(history_size)) - { - discounted_reward = reward_data_per_sim[reward_i - i - 1] + discounted_reward * _decay_factor; + const auto sample_size = reward[sample_i].size(); + for (const auto time_i : make_range(sample_size)) + { + discounted_reward = reward[sample_i][sample_size - time_i - 1] + discounted_reward * decay_factor; // We are inserting to the front of the vector and push the rest back, this will // ensure that the first element of the vector is the discounter reward for the whole transient - return_data_per_sim.insert(return_data_per_sim.begin(), discounted_reward); + sample_return.insert(sample_return.begin(), discounted_reward); } - // Update the global index - reward_i -= history_size; + // Save and accumulate the return values + data.push_back(std::move(sample_return)); } +} + +void +LibtorchDRLControlTrainer::computeCumulativeRewardEstimate(std::vector> & data, + std::vector>> & state, + std::vector>> & next_state, + std::vector> & reward) +{ + for (const auto sample_i : index_range(reward)) + { + torch::Tensor observations; + torch::Tensor next_observations; + torch::Tensor reward_tensor; + + LibtorchUtils::vectorToTensor(reward[sample_i], reward_tensor, true); + + for (const auto feature_i : index_range(state)) + { + torch::Tensor input_row; + torch::Tensor next_input_row; + LibtorchUtils::vectorToTensor(state[feature_i][sample_i], input_row, true); + LibtorchUtils::vectorToTensor(next_state[feature_i][sample_i], next_input_row, true); + + if (feature_i == 0) + { + observations = input_row; + next_observations = next_input_row; + } + else + { + observations = torch::cat({observations, input_row}, 1); + next_observations = torch::cat({next_observations, next_input_row}, 1); + } + } + + // std::cout << "going to GAE" << std::endl; + // std::cout << observations << std::endl; + // std::cout << next_observations << std::endl; + + + auto value = evaluateValue(observations).detach(); + auto value_next = evaluateValue(next_observations).detach(); - // Save and accumulate the return values - data.insert(_return_data.end(), return_data_per_sim.begin(), return_data_per_sim.end()); + // std::cout << "values" << std::endl; + // std::cout << value << std::endl; + // std::cout << value_next << std::endl; + + auto delta = reward_tensor + _decay_factor*value_next - value; + + // std::cout << "delta" << std::endl; + // std::cout << delta << std::endl; + + std::vector delta_vector; + LibtorchUtils::tensorToVector(delta, delta_vector); + + data.push_back(std::move(delta_vector)); + } } void @@ -333,32 +537,26 @@ LibtorchDRLControlTrainer::trainController() if (processor_id() == 0) { // std::cout << "Training" << std::endl; - // std::cout << "Input tensor" << std::endl << _input_tensor << std::endl; - // std::cout << "Signal tensor" << std::endl << _output_tensor << std::endl; + // std::cout << "Input tensor" << std::endl << _state_tensor << std::endl; + // std::cout << "Input tensor" << std::endl << _next_state_tensor << std::endl; + // std::cout << "Signal tensor" << std::endl << _action_tensor << std::endl; // std::cout << "Logprob tensor" << std::endl << _log_probability_tensor << std::endl; // std::cout << "reward" << std::endl << Moose::stringify(_reward_data) << std::endl; // std::cout << "Return tensor" << std::endl << _return_tensor << std::endl; + // std::cout << "GAE" << std::endl << _gae_tensor << std::endl; // Define the optimizers for the training - torch::optim::Adam actor_optimizer(_control_nn->parameters(), - torch::optim::AdamOptions(_control_learning_rate)); + // torch::optim::Adam actor_optimizer(_control_nn->parameters(), + // torch::optim::AdamOptions(_control_learning_rate)); - torch::optim::Adam critic_optimizer(_critic_nn->parameters(), - torch::optim::AdamOptions(_critic_learning_rate)); + // torch::optim::Adam critic_optimizer(_critic_nn->parameters(), + // torch::optim::AdamOptions(_critic_learning_rate)); // Compute the approximate value (return) from the critic neural net and use it to compute an // advantage - auto value = evaluateValue(_input_tensor).detach(); - auto advantage = _return_tensor - value; - - // If requested, standardize the advantage - if (_standardize_advantage) - advantage = (advantage - advantage.mean()) / (advantage.std() + 1e-10); - - // auto data_set = DRLDataset({_input_tensor, _output_tensor, _log_probability_tensor, _return_tensor}); // Transform the dataset se that the loader has an easier time - auto input_size = _input_tensor.sizes()[0]; + auto input_size = _state_tensor.sizes()[0]; auto batch_size = getParam("batch_size"); // auto data_loader = torch::data::make_data_loader(std::move(transformed_data_set), batch_size); @@ -372,16 +570,18 @@ LibtorchDRLControlTrainer::trainController() batch_end = batch_begin + batch_size > input_size ? input_size : batch_begin + batch_size; unsigned int offset = batch_end - batch_begin; auto batch_permutation = permutation.narrow(0, batch_begin, offset); - auto obs_batch = _input_tensor.index({batch_permutation}); - auto action_batch = _output_tensor.index({batch_permutation}); + auto obs_batch = _state_tensor.index({batch_permutation}); + auto action_batch = _action_tensor.index({batch_permutation}); auto log_prob_batch = _log_probability_tensor.index({batch_permutation}); auto return_batch = _return_tensor.index({batch_permutation}); - auto advantage_batch = advantage.index({batch_permutation}); + auto advantage_batch = _gae_tensor.index({batch_permutation}); + + if (_standardize_advantage) + advantage_batch = (advantage_batch - advantage_batch.mean()) / (advantage_batch.std() + 1e-10); // Get the approximate return from the neural net again (this one does have an associated // gradient) - value = evaluateValue(obs_batch); - + auto value = evaluateValue(obs_batch); auto new_action = _control_nn->evaluate(obs_batch, false); @@ -403,21 +603,32 @@ LibtorchDRLControlTrainer::trainController() auto critic_loss = torch::mse_loss(value, return_batch); // Update the weights in the neural nets - actor_optimizer.zero_grad(); + _actor_optimizer->zero_grad(); actor_loss.backward(); - actor_optimizer.step(); + _actor_optimizer->step(); - critic_optimizer.zero_grad(); + _critic_optimizer->zero_grad(); critic_loss.backward(); - critic_optimizer.step(); + _critic_optimizer->step(); - // const auto & named_params = _control_nn->named_parameters(); - // for (const auto & param_i : make_range(named_params.size())) + // std::cout << "Control NN" << std::endl; + // const auto & control_params = _control_nn->named_parameters(); + // for (const auto & param_i : make_range(control_params.size())) // { // // We cast the parameters into a 1D vector // std::cout << Moose::stringify(std::vector( - // named_params[param_i].value().data_ptr(), - // named_params[param_i].value().data_ptr() + named_params[param_i].value().numel())) << std::endl; + // control_params[param_i].value().data_ptr(), + // control_params[param_i].value().data_ptr() +control_params[param_i].value().numel())) << std::endl; + // } + + // std::cout << "Critic NN" << std::endl; + // const auto & critic_params = _critic_nn->named_parameters(); + // for (const auto & param_i : make_range(critic_params.size())) + // { + // // We cast the parameters into a 1D vector + // std::cout << Moose::stringify(std::vector( + // critic_params[param_i].value().data_ptr(), + // critic_params[param_i].value().data_ptr() + critic_params[param_i].value().numel())) << std::endl; // } // print loss per epoch @@ -431,7 +642,7 @@ LibtorchDRLControlTrainer::trainController() batch_begin = batch_end; } - std::cout << _control_nn->stdTensor() << std::endl; + // std::cout << _control_nn->stdTensor() << std::endl; std::cout << _control_nn->alphaTensor().mean() << std::endl; std::cout << _control_nn->betaTensor().mean() << std::endl; } @@ -464,25 +675,51 @@ LibtorchDRLControlTrainer::trainController() } void -LibtorchDRLControlTrainer::convertDataToTensor(std::vector> & vector_data, +LibtorchDRLControlTrainer::convertDataToTensor(std::vector>> & vector_data, torch::Tensor & tensor_data, const bool detach) { - for (unsigned int i = 0; i < vector_data.size(); ++i) + for (const auto feature_i : index_range(vector_data)) { - torch::Tensor input_row; - LibtorchUtils::vectorToTensor(vector_data[i], input_row, detach); + if (vector_data[feature_i].size()) + { + torch::Tensor concatenated_feature; + convertDataToTensor(vector_data[feature_i], concatenated_feature, detach); - if (i == 0) - tensor_data = input_row; - else - tensor_data = torch::cat({tensor_data, input_row}, 1); + if (feature_i == 0) + tensor_data = concatenated_feature; + else + tensor_data = torch::cat({tensor_data, concatenated_feature}, 1); + } } if (detach) tensor_data.detach(); } +void +LibtorchDRLControlTrainer::convertDataToTensor(std::vector> & vector_data, + torch::Tensor & tensor_data, + const bool detach) +{ + if (vector_data.size()) + { + for (const auto vector_i : index_range(vector_data)) + { + torch::Tensor input_row; + LibtorchUtils::vectorToTensor(vector_data[vector_i], input_row, detach); + + if (vector_i == 0) + tensor_data = input_row; + else + tensor_data = torch::cat({tensor_data, input_row}, 0); + } + + if (detach) + tensor_data.detach(); + } +} + torch::Tensor LibtorchDRLControlTrainer::evaluateValue(torch::Tensor & input) { @@ -492,94 +729,169 @@ LibtorchDRLControlTrainer::evaluateValue(torch::Tensor & input) void LibtorchDRLControlTrainer::resetData() { - for (auto & data : _input_data) + for (auto & data : _state_data) data.clear(); - for (auto & data : _output_data) + for (auto & data : _next_state_data) + data.clear(); + for (auto & data : _action_data) data.clear(); for (auto & data : _log_probability_data) data.clear(); _reward_data.clear(); _return_data.clear(); + _gae_data.clear(); + _delta_data.clear(); + _update_counter = _update_frequency; } void LibtorchDRLControlTrainer::getResponseDataFromReporter( - std::vector> & data, + std::vector>> & data_current, + std::vector>> & data_next, const std::vector> *> & reporter_links, const unsigned int num_timesteps) { - // We have multiple reporters, each has a time series for each sample - for (const auto & rep_i : index_range(reporter_links)) + for (const auto & state_i : index_range(reporter_links)) { // Fetch the vector of time series for a given reporter - const std::vector> & reporter_data = *reporter_links[rep_i]; - - // std::cout << "Adding response: " << Moose::stringify(reporter_data) << std::endl; + const std::vector> & reporter_data = *reporter_links[state_i]; - // We might consider using older time steps too which requires adding new - // rows and populating them with staggered data - for (const auto & start_step : make_range(num_timesteps)) + // Made it to the inner loop which is just the different samples + for (const auto & start_i : make_range(num_timesteps)) { - unsigned int row = reporter_links.size() * start_step + rep_i; - - // Made it to the inner loop which is just the different samples - for (const auto sample_i : index_range(reporter_data)) + const auto input_i = start_i*reporter_links.size() + state_i; + for (const auto & sample : reporter_data) { - for (unsigned int fill_i = 1; fill_i < num_timesteps - start_step; ++fill_i) - data[row].push_back(reporter_data[sample_i][0]); + const unsigned int sample_vector_size = sample.size() - _shift_outputs; + const unsigned int num_entries_kept = sample_vector_size / _timestep_window; + std::vector split_sample(num_entries_kept, 0.0); + std::vector next_split_sample(num_entries_kept, 0.0); + + unsigned int current_real_i = 0; + unsigned int next_current_real_i = 0; + for (unsigned int time_i = 0; time_i < sample_vector_size; ++time_i) + { + if (!(time_i % _timestep_window)) + { + if (time_i < start_i) + split_sample[current_real_i] = sample[0]; + else + { + const auto shifted_i = time_i - start_i; + split_sample[current_real_i] = sample[shifted_i]; + } + current_real_i++; + } + + if (!(time_i % _timestep_window) && (time_i + _timestep_window < sample_vector_size + _shift_outputs)) + { + const auto shifted_i = time_i + _timestep_window - start_i; + next_split_sample[next_current_real_i] = sample[shifted_i]; + next_current_real_i++; + } + } - data[row].insert(data[row].end(), - reporter_data[sample_i].begin(), - reporter_data[sample_i].begin() + start_step + reporter_data[sample_i].size() - - (num_timesteps - 1) - _shift_outputs); + data_current[input_i].push_back(std::move(split_sample)); + data_next[input_i].push_back(std::move(next_split_sample)); } } } + // std::cout << " finished " << std::endl; } -void LibtorchDRLControlTrainer::normalizeResponseData(std::vector> & data, const unsigned int num_reporters, const unsigned int num_timesteps) +void LibtorchDRLControlTrainer::normalizeResponseData(std::vector>> & data, + const unsigned int num_timesteps) { // std::cout << " Normalizing " << Moose::stringify(data) << std::endl; // We have multiple reporters, each has a time series for each sample + const auto num_reporters = data.size() / num_timesteps; for (const auto & rep_i : make_range(num_reporters)) { // We shift and scale the inputs to get better training efficiency for (const auto & start_step : make_range(num_timesteps)) { - unsigned int row = num_reporters * start_step + rep_i; - std::transform( - data[row].begin(), - data[row].end(), - data[row].begin(), - [this, &rep_i](Real value) -> Real - { return (value - _response_shift_factors[rep_i]) * _response_scaling_factors[rep_i]; }); + unsigned int real_i = num_reporters * start_step + rep_i; + + for (const auto sample_i : index_range(data[real_i])) + { + std::transform( + data[real_i][sample_i].begin(), + data[real_i][sample_i].end(), + data[real_i][sample_i].begin(), + [this, &rep_i](Real value) -> Real + { return (value - _state_shift_factors[rep_i]) * _state_scaling_factors[rep_i]; }); + } } } } void LibtorchDRLControlTrainer::getSignalDataFromReporter( - std::vector> & data, + std::vector>> & data, const std::vector> *> & reporter_links) { - for (const auto & rep_i : index_range(reporter_links)) - for (const auto sample_i : index_range(*reporter_links[rep_i])) - // Fill the corresponding containers - data[rep_i].insert(data[rep_i].end(), - (*reporter_links[rep_i])[sample_i].begin() + _shift_outputs, - (*reporter_links[rep_i])[sample_i].end()); + for (const auto & action_i : index_range(reporter_links)) + { + // Fetch the vector of time series for a given reporter + const std::vector> & reporter_data = *reporter_links[action_i]; + + for (const auto & sample : reporter_data) + { + const unsigned int sample_vector_size = sample.size() - _shift_outputs; + const unsigned int num_entries_kept = sample_vector_size / _timestep_window; + std::vector action_for_sample(num_entries_kept, 0.0); + + unsigned int real_i = 0; + for (const auto time_i : make_range(sample_vector_size)) + if (!(time_i % _timestep_window)) + { + action_for_sample[real_i] = sample[time_i + _shift_outputs]; + real_i++; + } + + data[action_i].push_back(std::move(action_for_sample)); + } + } } void -LibtorchDRLControlTrainer::getRewardDataFromReporter(std::vector & data, +LibtorchDRLControlTrainer::getRewardDataFromReporter(std::vector> & data, const std::vector> * const reporter_link) { + // Fetch the vector of time series for a given reporter + const std::vector> & reporter_data = *reporter_link; + + for (const auto & sample : reporter_data) + { + const unsigned int sample_vector_size = sample.size() - _shift_outputs; + const unsigned int num_entries_kept = sample_vector_size / _timestep_window; + + std::vector reward_for_sample(num_entries_kept, 0.0); + + unsigned int real_i = 0; + for (const auto time_i : make_range(sample_vector_size)) + if (!(time_i % _timestep_window) && (time_i + _timestep_window < sample_vector_size + _shift_outputs)) + { + reward_for_sample[real_i] = sample[time_i + _timestep_window]; + real_i++; + } + + data.push_back(std::move(reward_for_sample)); + } + // Fill the corresponding container - for (const auto sample_i : index_range(*reporter_link)) - data.insert(data.end(), (*reporter_link)[sample_i].begin() + _shift_outputs, (*reporter_link)[sample_i].end()); + // for (const auto sample_i : index_range(*reporter_link)) + // { + // for (const unsigned int state_i = _shift_outputs; state_i < (*reporter_link)[sample_i].size(); state_i++) + // { + // if (!((state_i - _shift_outputs) % _timestep_window)) + // data.push_back((*reporter_link)[sample_i][state_i]); + // } + // } + // data.insert(data.end(), (*reporter_link)[sample_i].begin() + _shift_outputs, (*reporter_link)[sample_i].end()); } void From fbac7cdd5da32a23364a64fd72d9dff8e5b2aa29 Mon Sep 17 00:00:00 2001 From: Peter German Date: Fri, 28 Feb 2025 17:52:24 -0700 Subject: [PATCH 16/51] Extend reward PP --- .../flow_over_circle_linearfv.i | 3 +- .../stochastic/vortex_control/trainer.i | 4 +- .../libtorch/controls/LibtorchDRLControl.h | 2 +- .../libtorch/reporters/DRLRewardReporter.h | 9 +++- .../surrogates/LibtorchDRLControlTrainer.h | 13 +++++- .../libtorch/controls/LibtorchDRLControl.C | 15 +++---- .../libtorch/reporters/DRLRewardReporter.C | 12 ++++-- .../trainers/LibtorchDRLControlTrainer.C | 42 ++++++++++++++++--- 8 files changed, 79 insertions(+), 21 deletions(-) diff --git a/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i b/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i index eb4c81d9b29c..fb1fc83b18ec 100644 --- a/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i +++ b/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i @@ -376,6 +376,7 @@ execute_on = 'TIMESTEP_BEGIN' smoother = 0.1 num_stems_in_period = 50 + stochastic = true [] [] @@ -400,7 +401,7 @@ print_fields = false continue_on_max_its = true dt = 0.0005 - num_steps = 2000 + num_steps = 4000 [] [Outputs] diff --git a/modules/combined/examples/stochastic/vortex_control/trainer.i b/modules/combined/examples/stochastic/vortex_control/trainer.i index a7310c0a0456..d1c004a03514 100644 --- a/modules/combined/examples/stochastic/vortex_control/trainer.i +++ b/modules/combined/examples/stochastic/vortex_control/trainer.i @@ -57,8 +57,8 @@ loss_print_frequency = 1 critic_learning_rate = 0.001 - num_critic_neurons_per_layer = '128 128' - critic_activation_functions = 'relu relu' + num_critic_neurons_per_layer = '64 64' + critic_activation_functions = 'tanh tanh' control_learning_rate = 0.001 num_control_neurons_per_layer = '512 512' diff --git a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h index 4996150c293e..0b3e4573533d 100644 --- a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h +++ b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h @@ -65,7 +65,7 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl unsigned int _call_counter; const unsigned int _num_steps_in_period; const Real _smoother; - const bool _deterministic; + const bool _stochastic; }; diff --git a/modules/stochastic_tools/include/libtorch/reporters/DRLRewardReporter.h b/modules/stochastic_tools/include/libtorch/reporters/DRLRewardReporter.h index d9792772de3a..d3421239d970 100644 --- a/modules/stochastic_tools/include/libtorch/reporters/DRLRewardReporter.h +++ b/modules/stochastic_tools/include/libtorch/reporters/DRLRewardReporter.h @@ -27,7 +27,14 @@ class DRLRewardReporter : public GeneralReporter, public SurrogateModelInterface protected: /// The reward values which will be saved - Real & _reward; + Real & _average_reward; + + Real & _std_reward; + + std::vector & _sample_average_reward; + + std::vector & _sample_std_reward; + /// The DRL trainer which computes the reward values LibtorchDRLControlTrainer & _trainer; }; diff --git a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h index b723e5d469b3..85e72a541fd8 100644 --- a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h +++ b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h @@ -40,6 +40,10 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase * at the end of every episode. */ Real averageEpisodeReward() { return _average_episode_reward; } + Real stdEpisodeReward() { return _std_episode_reward; } + + std::vector sampleAverageEpsiodeRewards() { return _sample_average_episode_reward; } + std::vector sampleStdEpsiodeRewards() { return _sample_std_episode_reward; } /// The condensed training function void trainController(); @@ -48,7 +52,7 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase protected: /// Compute the average eposiodic reward - void computeAverageEpisodeReward(); + void computeEpisodeRewardStatistics(); /** * Function to convert input/output data from std::vector to torch::tensor @@ -195,6 +199,11 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase /// Storage for the current average episode reward Real _average_episode_reward; + Real _std_episode_reward; + + std::vector _sample_average_episode_reward; + std::vector _sample_std_episode_reward; + std::vector _sample_lengths; /// Switch to enable the standardization of the advantages const bool _standardize_advantage; @@ -221,6 +230,8 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase torch::Tensor _delta_tensor; torch::Tensor _log_probability_tensor; + Real _highest_reward; + std::unique_ptr _actor_optimizer; std::unique_ptr _critic_optimizer; diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C index e96300c5d997..fbb79ce620e1 100644 --- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C +++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C @@ -34,10 +34,10 @@ LibtorchDRLControl::validParams() params.addParam("num_stems_in_period", 1, "Blabla"); params.addParam("smoother", 1.0, "Blabla"); - params.addParam("deterministic", false, "Blabla"); + params.addParam("stochastic", true, "Blabla"); - params.addRequiredParam>( - "action_standard_deviations", "Standard deviation value used while sampling the actions."); + params.addParam>( + "action_standard_deviations", {}, "Standard deviation value used while sampling the actions."); params.addParam>("min_control_value", {}, "The minimum values of the control signal."); params.addParam>("max_control_value", {}, "The maximum calue of the control signal."); @@ -53,13 +53,14 @@ LibtorchDRLControl::LibtorchDRLControl(const InputParameters & parameters) _call_counter(0), _num_steps_in_period(getParam("num_stems_in_period")), _smoother(getParam("smoother")), - _deterministic(getParam("deterministic")) + _stochastic(getParam("stochastic")) { // Fixing the RNG seed to make sure every experiment is the same. if (isParamValid("seed")) torch::manual_seed(getParam("seed")); - loadControlNeuralNetFromFile(parameters); + if (isParamValid("filename")) + loadControlNeuralNetFromFile(parameters); } void @@ -196,7 +197,7 @@ LibtorchDRLControl::execute() // std::cout << "Std" << _actor_nn->stdTensor() << std::endl; // std::cout << "Input" << input_tensor << std::endl; // Evaluate the neural network to get the expected control value - torch::Tensor action = _actor_nn->evaluate(input_tensor, _deterministic); + torch::Tensor action = _actor_nn->evaluate(input_tensor, _stochastic); // std::cout << "in za control " << action << std::endl; // Compute log probability @@ -212,7 +213,7 @@ LibtorchDRLControl::execute() // for (const auto i : index_range(_current_control_signals)) // _current_control_signals[i] = std::min(std::max(_current_control_signals[i], _minimum_actions[i]), _maximum_actions[i]); - if (!_deterministic) + if (!_stochastic) { torch::Tensor log_probability = _actor_nn->logProbability(action); diff --git a/modules/stochastic_tools/src/libtorch/reporters/DRLRewardReporter.C b/modules/stochastic_tools/src/libtorch/reporters/DRLRewardReporter.C index acfef49acdc2..1979a2840cbd 100644 --- a/modules/stochastic_tools/src/libtorch/reporters/DRLRewardReporter.C +++ b/modules/stochastic_tools/src/libtorch/reporters/DRLRewardReporter.C @@ -21,7 +21,7 @@ DRLRewardReporter::validParams() params.addClassDescription("Reporter containing the reward values of a DRL controller trainer."); params.addRequiredParam( - "drl_trainer_name", "The name of the RDL controller trainer which computes the rewards."); + "drl_trainer_name", "The name of the DRL controller trainer which computes the rewards."); return params; } @@ -29,7 +29,10 @@ DRLRewardReporter::validParams() DRLRewardReporter::DRLRewardReporter(const InputParameters & parameters) : GeneralReporter(parameters), SurrogateModelInterface(this), - _reward(declareValueByName("average_reward", REPORTER_MODE_ROOT)), + _average_reward(declareValueByName("average_reward", REPORTER_MODE_ROOT)), + _std_reward(declareValueByName("std_reward", REPORTER_MODE_ROOT)), + _sample_average_reward(declareValueByName>("sample_average_reward", REPORTER_MODE_ROOT)), + _sample_std_reward(declareValueByName>("sample_std_reward", REPORTER_MODE_ROOT)), _trainer(getSurrogateTrainer("drl_trainer_name")) { } @@ -37,7 +40,10 @@ DRLRewardReporter::DRLRewardReporter(const InputParameters & parameters) void DRLRewardReporter::execute() { - _reward = _trainer.averageEpisodeReward(); + _average_reward = _trainer.averageEpisodeReward(); + _std_reward = _trainer.stdEpisodeReward(); + _sample_average_reward = _trainer.sampleAverageEpsiodeRewards(); + _sample_std_reward = _trainer.sampleStdEpsiodeRewards(); } #endif diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index 7419ed534bab..b8b211f29c82 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -176,6 +176,7 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _loss_print_frequency(getParam("loss_print_frequency")), _min_values(getParam>("min_control_value")), _max_values(getParam>("max_control_value")), + _highest_reward(-1e8), _update_counter(_update_frequency), _timestep_window(getParam("timestep_window")) { @@ -399,7 +400,10 @@ LibtorchDRLControlTrainer::execute() computeReturn(_return_data, _reward_data, _decay_factor); // We compute the average reward first - computeAverageEpisodeReward(); + computeEpisodeRewardStatistics(); + + if(_average_episode_reward > _highest_reward) + torch::save(_control_nn, _control_nn->name()+"_best"); normalizeResponseData(_state_data, _input_timesteps); normalizeResponseData(_next_state_data, _input_timesteps); @@ -427,22 +431,49 @@ LibtorchDRLControlTrainer::execute() } void -LibtorchDRLControlTrainer::computeAverageEpisodeReward() +LibtorchDRLControlTrainer::computeEpisodeRewardStatistics() { if (_reward_data.size()) { _average_episode_reward = 0.0; + _std_episode_reward = 0.0; unsigned int combined_sizes = 0; + + _sample_average_episode_reward.clear(); + _sample_std_episode_reward.clear(); + for (const auto & sample : _reward_data) { - _average_episode_reward += - std::accumulate(sample.begin(), sample.end(), 0.0); - combined_sizes += sample.size(); + const unsigned int sample_size = sample.size(); + + Real sum = std::accumulate(sample.begin(), sample.end(), 0.0); + Real mean = sum / sample_size; + _sample_average_episode_reward.push_back(mean); + + + Real variance = std::transform_reduce(sample.begin(), sample.end(), + 0.0, + std::plus<>(), + [mean](double value) { + return (value - mean) * (value - mean); + } + ); + _sample_std_episode_reward.push_back(std::sqrt(variance / sample_size)); + + _average_episode_reward += sum; + _std_episode_reward += variance; + combined_sizes += sample_size; } _average_episode_reward = _average_episode_reward/combined_sizes; + _std_episode_reward = std::sqrt(_std_episode_reward/combined_sizes); + + } else + { _average_episode_reward = 0.0; + _std_episode_reward = 0.0; + } } void @@ -645,6 +676,7 @@ LibtorchDRLControlTrainer::trainController() // std::cout << _control_nn->stdTensor() << std::endl; std::cout << _control_nn->alphaTensor().mean() << std::endl; std::cout << _control_nn->betaTensor().mean() << std::endl; + _console << "Best model so far: " << _highest_reward << std::endl; } } From e16279b44f6439fde0de90b26a8c9fc628de241d Mon Sep 17 00:00:00 2001 From: Peter German Date: Sat, 1 Mar 2025 17:04:35 -0700 Subject: [PATCH 17/51] Add plotting script. --- .../stochastic/vortex_control/plot_reward.py | 60 +++++++++++++++++++ .../stochastic/vortex_control/trainer.i | 4 +- 2 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 modules/combined/examples/stochastic/vortex_control/plot_reward.py diff --git a/modules/combined/examples/stochastic/vortex_control/plot_reward.py b/modules/combined/examples/stochastic/vortex_control/plot_reward.py new file mode 100644 index 000000000000..3204d1a3cc76 --- /dev/null +++ b/modules/combined/examples/stochastic/vortex_control/plot_reward.py @@ -0,0 +1,60 @@ +import json +import matplotlib.pyplot as plt +import numpy as np + +plt.rc('text', usetex=True) +plt.rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']}) + +# Load data from JSON file +with open('train_out.json') as f: + data = json.load(f) + +# Extract data from JSON and select every other data point starting from the first +time_steps = data['time_steps'][1::2] # Exclude the first entry and then take every other entry +average_rewards = [step['reward']['average_reward'] for step in time_steps] +std_rewards = [step['reward']['std_reward'] for step in time_steps] +sample_average_rewards = [step['reward']['sample_average_reward'] for step in time_steps] + +# Create a plot +fig, ax = plt.subplots() + +# Set LaTeX font + + +# Plot average reward data +indices = range(1, len(time_steps) + 1) # Start numbering from 1 +ax.plot(indices, average_rewards, label=r'$\mathrm{Average~Reward}$', color='darkblue', linewidth=2) + +# Compute confidence intervals +lower_bound_1std = [avg - std for avg, std in zip(average_rewards, std_rewards)] +upper_bound_1std = [avg + std for avg, std in zip(average_rewards, std_rewards)] +lower_bound_2std = [avg - 2 * std for avg, std in zip(average_rewards, std_rewards)] +upper_bound_2std = [avg + 2 * std for avg, std in zip(average_rewards, std_rewards)] + +# Fill between for confidence intervals +# ax.fill_between(indices, lower_bound_2std, upper_bound_2std, color='lightblue', alpha=0.75, label=r'$\pm 2\sigma$') +ax.fill_between(indices, lower_bound_1std, upper_bound_1std, color='lightblue', alpha=1.0, label=r'$\pm \sigma$') + +# Plot sample average reward points +for i, sample_rewards in enumerate(sample_average_rewards, start=1): + ax.scatter([i] * len(sample_rewards), sample_rewards, color='black', s=5, alpha=0.7, label=r'$\mathrm{Average~sample~rewards}$' if i == 1 else "") + +# Set custom axis ranges (adjust as needed) +ax.set_xlim([1, len(time_steps)]) # Example range for x-axis +ax.set_ylim([min(lower_bound_1std) - 1, max(upper_bound_1std) + 1]) # Example range for y-axis + +# Ensure x-axis uses only integers and includes the first and last indices +ax.set_xticks(np.arange(1, len(time_steps) + 1, step=1)) + +# Set custom axis titles +ax.set_xlabel(r'$\mathrm{Update~(10~Episodes)}$', fontsize=14) +ax.set_ylabel(r'$\mathrm{Average~Reward}$', fontsize=14) + +# Set custom legend +ax.legend(loc='best') + +# Save plot as PDF +plt.savefig('average_reward_plot_with_samples_and_confidence_intervals.pdf', format='pdf') + +# Show plot +plt.show() diff --git a/modules/combined/examples/stochastic/vortex_control/trainer.i b/modules/combined/examples/stochastic/vortex_control/trainer.i index d1c004a03514..a7310c0a0456 100644 --- a/modules/combined/examples/stochastic/vortex_control/trainer.i +++ b/modules/combined/examples/stochastic/vortex_control/trainer.i @@ -57,8 +57,8 @@ loss_print_frequency = 1 critic_learning_rate = 0.001 - num_critic_neurons_per_layer = '64 64' - critic_activation_functions = 'tanh tanh' + num_critic_neurons_per_layer = '128 128' + critic_activation_functions = 'relu relu' control_learning_rate = 0.001 num_control_neurons_per_layer = '512 512' From d6990d046eaefa7b0b876454271a85a51012bde2 Mon Sep 17 00:00:00 2001 From: Peter German Date: Wed, 26 Nov 2025 13:35:08 -0700 Subject: [PATCH 18/51] Save what happens. --- .../libtorch/utils/LibtorchActorNeuralNet.h | 12 +- .../LaserPositionPostprocessor.h | 31 ++ .../libtorch/utils/LibtorchActorNeuralNet.C | 77 +++- .../LaserPositionPostprocessor.C | 51 +++ .../combined/examples/stochastic/meltpool/3.i | 267 +++++++++++ .../stochastic/meltpool/meltpool_trainer.i | 117 +++++ .../flow_over_circle_linearfv.i | 2 +- .../flow_over_circle_linearfv_single.i | 420 ++++++++++++++++++ .../vortex_control/full-run/header.i | 39 ++ .../stochastic/vortex_control/full-run/mesh.i | 240 ++++++++++ .../flow_over_circle_linearfv_single.i | 420 ++++++++++++++++++ .../vortex_control/half-run/header.i | 39 ++ .../stochastic/vortex_control/half-run/mesh.i | 240 ++++++++++ .../vortex_control/libtorch_drl_control_sub.i | 177 -------- .../stochastic/vortex_control/trainer.i | 14 +- .../libtorch_drl_control_sub.i | 31 +- .../libtorch_drl_control_trainer.i | 29 +- .../surrogates/LibtorchDRLControlTrainer.h | 1 + .../libtorch/controls/LibtorchDRLControl.C | 8 +- .../trainers/LibtorchDRLControlTrainer.C | 20 +- 20 files changed, 1975 insertions(+), 260 deletions(-) create mode 100644 framework/include/postprocessors/LaserPositionPostprocessor.h create mode 100644 framework/src/postprocessors/LaserPositionPostprocessor.C create mode 100644 modules/combined/examples/stochastic/meltpool/3.i create mode 100644 modules/combined/examples/stochastic/meltpool/meltpool_trainer.i create mode 100644 modules/combined/examples/stochastic/vortex_control/full-run/flow_over_circle_linearfv_single.i create mode 100644 modules/combined/examples/stochastic/vortex_control/full-run/header.i create mode 100644 modules/combined/examples/stochastic/vortex_control/full-run/mesh.i create mode 100644 modules/combined/examples/stochastic/vortex_control/half-run/flow_over_circle_linearfv_single.i create mode 100644 modules/combined/examples/stochastic/vortex_control/half-run/header.i create mode 100644 modules/combined/examples/stochastic/vortex_control/half-run/mesh.i delete mode 100644 modules/combined/examples/stochastic/vortex_control/libtorch_drl_control_sub.i diff --git a/framework/include/libtorch/utils/LibtorchActorNeuralNet.h b/framework/include/libtorch/utils/LibtorchActorNeuralNet.h index 437f5bfd5bba..c349207c94c1 100644 --- a/framework/include/libtorch/utils/LibtorchActorNeuralNet.h +++ b/framework/include/libtorch/utils/LibtorchActorNeuralNet.h @@ -32,7 +32,6 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet const unsigned int num_inputs, const unsigned int num_outputs, const std::vector & num_neurons_per_layer, - const std::vector & std, const std::vector & activation_function = {"relu"}, const std::vector & minimum_values = {}, const std::vector & maximum_values = {}, @@ -61,8 +60,6 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet /// Construct the neural network virtual void constructNeuralNetwork() override; - const std::vector & std() const {return _std;} - const torch::Tensor & stdTensor() const {return _std_tensor;} const torch::Tensor & alphaTensor() const {return _alpha_tensor;} @@ -78,9 +75,8 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet virtual void initializeNeuralNetwork() override; protected: - const std::vector & _std; - - torch::Tensor _std_tensor; + std::vector _log_std_module; + std::vector _mean_module; std::vector _alpha_module; std::vector _beta_module; @@ -90,6 +86,10 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet torch::Tensor _alpha_beta_tensor; torch::Tensor _log_norm; + torch::Tensor _mean_tensor; + torch::Tensor _std_tensor; + torch::Tensor _log_std_tensor; + torch::Tensor _mean; }; diff --git a/framework/include/postprocessors/LaserPositionPostprocessor.h b/framework/include/postprocessors/LaserPositionPostprocessor.h new file mode 100644 index 000000000000..0ef780c76a4e --- /dev/null +++ b/framework/include/postprocessors/LaserPositionPostprocessor.h @@ -0,0 +1,31 @@ +//* This file is part of the MOOSE framework +//* https://www.mooseframework.org +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#pragma once + +// MOOSE includes +#include "GeneralPostprocessor.h" + +class LaserPositionPostprocessor : public GeneralPostprocessor +{ +public: + static InputParameters validParams(); + LaserPositionPostprocessor(const InputParameters & parameters); + + virtual void execute() override; + virtual void initialize() override {} + using Postprocessor::getValue; + virtual Real getValue() const override; + +protected: + + const PostprocessorValue & _speed; + Real _current_arclength; + Real _delta_arclength; +}; diff --git a/framework/src/libtorch/utils/LibtorchActorNeuralNet.C b/framework/src/libtorch/utils/LibtorchActorNeuralNet.C index 5d215258dbfd..5a4a6df6a29a 100644 --- a/framework/src/libtorch/utils/LibtorchActorNeuralNet.C +++ b/framework/src/libtorch/utils/LibtorchActorNeuralNet.C @@ -21,7 +21,6 @@ LibtorchActorNeuralNet::LibtorchActorNeuralNet( const unsigned int num_inputs, const unsigned int num_outputs, const std::vector & num_neurons_per_layer, - const std::vector & std, const std::vector & activation_function, const std::vector & minimum_values, const std::vector & maximum_values, @@ -32,8 +31,7 @@ LibtorchActorNeuralNet::LibtorchActorNeuralNet( LibtorchArtificialNeuralNet(name, num_inputs, num_outputs, num_neurons_per_layer, activation_function, minimum_values, maximum_values, device_type, data_type, - false), - _std(std) + false) { if (build_on_construct) constructNeuralNetwork(); @@ -42,8 +40,7 @@ LibtorchActorNeuralNet::LibtorchActorNeuralNet( LibtorchActorNeuralNet::LibtorchActorNeuralNet( const Moose::LibtorchActorNeuralNet & nn, const bool build_on_construct) - : LibtorchArtificialNeuralNet(dynamic_cast(nn), false), - _std(nn.std()) + : LibtorchArtificialNeuralNet(dynamic_cast(nn), false) { // We construct the NN architecture if (build_on_construct) @@ -64,14 +61,26 @@ LibtorchActorNeuralNet::initializeNeuralNetwork() { const auto & activation = _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0]; const Real gain = determineGain(activation); - torch::nn::init::orthogonal_(_weights[i]->weight, gain); + + auto sizes = _weights[i]->weight.sizes(); + auto max_dim_size = *std::max_element(sizes.begin(), sizes.end()); + torch::nn::init::orthogonal_(_weights[i]->weight, gain/max_dim_size); torch::nn::init::zeros_(_weights[i]->bias); } if (_minimum_values.size()) { - torch::nn::init::orthogonal_(_alpha_module[0]->weight); - torch::nn::init::orthogonal_(_beta_module[0]->weight); + auto sizes = _alpha_module[0]->weight.sizes(); + auto max_dim_size = *std::max_element(sizes.begin(), sizes.end()); + torch::nn::init::orthogonal_(_alpha_module[0]->weight, 1.0/max_dim_size); + torch::nn::init::orthogonal_(_beta_module[0]->weight, 1.0/max_dim_size); + } + else + { + auto sizes = _mean_module[0]->weight.sizes(); + auto max_dim_size = *std::max_element(sizes.begin(), sizes.end()); + torch::nn::init::orthogonal_(_mean_module[0]->weight, 1.0/max_dim_size); + torch::nn::init::orthogonal_(_log_std_module[0]->weight, 1.0/max_dim_size); } } @@ -91,9 +100,9 @@ LibtorchActorNeuralNet::constructNeuralNetwork() inp_neurons = _num_neurons_per_layer[i]; } + auto num_inps = _num_neurons_per_layer[numHiddenLayers()-1]; if (_minimum_values.size()) { - auto num_inps = _num_neurons_per_layer[numHiddenLayers()-1]; _alpha_module.push_back(register_module("alpha", torch::nn::Linear(torch::nn::LinearOptions(num_inps, _num_outputs).bias(false)))); _beta_module.push_back(register_module("beta", torch::nn::Linear(torch::nn::LinearOptions(num_inps, _num_outputs).bias(false)))); _alpha_module[0]->to(_device_type, _data_type); @@ -102,11 +111,10 @@ LibtorchActorNeuralNet::constructNeuralNetwork() return; } - torch::Tensor std_tensor = torch::eye(_std.size()).to(_data_type); - for (unsigned int i = 0; i < _std.size(); ++i) - std_tensor[i][i] = _std[i]; - - _std_tensor = register_parameter("std", std_tensor); + _mean_module.push_back(register_module("mean", torch::nn::Linear(torch::nn::LinearOptions(num_inps, _num_outputs).bias(false)))); + _log_std_module.push_back(register_module("std", torch::nn::Linear(torch::nn::LinearOptions(num_inps, _num_outputs).bias(false)))); + _mean_module[0]->to(_device_type, _data_type); + _log_std_module[0]->to(_device_type, _data_type); } torch::Tensor @@ -119,7 +127,7 @@ LibtorchActorNeuralNet::entropy() + (_alpha_beta_tensor - 2.0) * torch::digamma(_alpha_beta_tensor); } - return 0.5*std::log(2*M_PI)+torch::log(_std_tensor)+0.5; + return 0.5*std::log(2*M_PI)+_log_std_tensor+0.5; } void @@ -144,7 +152,30 @@ LibtorchActorNeuralNet::resetDistributionParams(torch::Tensor input) return; } - _mean = input; + // # Flat mean and log standard deviation + // mean = self.mean.apply(x=x) + // log_stddev = self.log_stddev.apply(x=x) + + // # Reshape mean and log stddev to action shape + // shape = (-1,) + self.shape + // mean = tf.reshape(tensor=mean, shape=shape) + // log_stddev = tf.reshape(tensor=log_stddev, shape=shape) + + // # Clip log stddev for numerical stability + // log_eps = log(util.epsilon) # epsilon < 1.0, hence negative + // log_stddev = tf.clip_by_value(t=log_stddev, clip_value_min=log_eps, clip_value_max=-log_eps) + + // # Standard deviation + // stddev = tf.exp(x=log_stddev) + + // return mean, stddev, log_stddev + + _mean = _mean_module[0]->forward(input); + _log_std_tensor = _log_std_module[0]->forward(input); + + _log_std_tensor = torch::clamp(_log_std_tensor, std::log(1e-12), -std::log(1e-12)); + _std_tensor = torch::exp(_log_std_tensor); + } torch::Tensor @@ -198,7 +229,10 @@ LibtorchActorNeuralNet::evaluate(torch::Tensor & x, bool sampled) if (sampled) return sample(); - return _min_tensor + (_max_tensor - _min_tensor)*_mean; + if (_minimum_values.size()) + return _min_tensor + (_max_tensor - _min_tensor)*_mean; + + return _mean; } torch::Tensor @@ -235,11 +269,16 @@ LibtorchActorNeuralNet::logProbability(const torch::Tensor & action) // std::cout << "beta tensor " << _beta_tensor << std::endl; // std::cout << "_alpha_tensor " << _alpha_tensor << std::endl; // std::cout << "_lognorm " << _log_norm << std::endl; + + // std::cout << "First term " << (_beta_tensor - 1.0) << std::endl; + // std::cout << "Second " << torch::log(torch::clamp_min(normalized, 1e-8)) << std::endl; + // std::cout << "Third " << (_alpha_tensor - 1.0) * torch::log1p(-normalized) << std::endl; + // std::cout << "Lognorm " << _log_norm << std::endl; return (_beta_tensor - 1.0) * torch::log(torch::clamp_min(normalized, 1e-8)) + (_alpha_tensor - 1.0) * torch::log1p(-normalized) - _log_norm; } - torch::Tensor var = torch::matmul(_std_tensor, _std_tensor); - return -((action - _mean) * (action - _mean)) / (2.0 * var) - 0.5*torch::log(var) - + torch::Tensor var = _std_tensor * _std_tensor; + return -((action - _mean) * (action - _mean)) / (2.0 * var) - _log_std_tensor - 0.5*std::log(2.0 * M_PI); } diff --git a/framework/src/postprocessors/LaserPositionPostprocessor.C b/framework/src/postprocessors/LaserPositionPostprocessor.C new file mode 100644 index 000000000000..8a9745f81e2a --- /dev/null +++ b/framework/src/postprocessors/LaserPositionPostprocessor.C @@ -0,0 +1,51 @@ +//* This file is part of the MOOSE framework +//* https://www.mooseframework.org +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#include "LaserPositionPostprocessor.h" +#include "FEProblemBase.h" +#include "NonlinearSystemBase.h" +#include "MathUtils.h" +#include "TransientBase.h" +#include "Restartable.h" +#include "libmesh/enum_norm_type.h" + +registerMooseObject("MooseApp", LaserPositionPostprocessor); + +InputParameters +LaserPositionPostprocessor::validParams() +{ + InputParameters params = GeneralPostprocessor::validParams(); + + params.addRequiredParam("speed","Lift coeff"); + + params.addClassDescription("Blabla."); + + return params; +} + +LaserPositionPostprocessor::LaserPositionPostprocessor(const InputParameters & parameters) + : GeneralPostprocessor(parameters), + _speed(getPostprocessorValue("speed")), + _current_arclength(0.0), + _delta_arclength(0.0) +{ +} + +Real +LaserPositionPostprocessor::getValue() const +{ + return _current_arclength; +} + +void +LaserPositionPostprocessor::execute() +{ + _delta_arclength = _speed * _dt; + _current_arclength += _delta_arclength; +} diff --git a/modules/combined/examples/stochastic/meltpool/3.i b/modules/combined/examples/stochastic/meltpool/3.i new file mode 100644 index 000000000000..b342ed7f815f --- /dev/null +++ b/modules/combined/examples/stochastic/meltpool/3.i @@ -0,0 +1,267 @@ +# Process parameters +# scanning_speed=1.0 # m/s +power=25 # W (this is the effective power so multiplied by eta) +R=50e-6 # m (this is the effective radius) + +# Geometric parameters +thickness=50e-6 # m +ymin=-180e-6 +ymax=180e-6 +xmin=-180e-6 # m +xmax=180e-6 # m +surfacetemp=300 # K (temperature at the other side of the plate) +backtemp=300 + +# Time stepping parameters +endtime=1.13e-3 # s +timestep=${fparse endtime/240} # s + +[Mesh] + [cmg] + type = GeneratedMeshGenerator + dim = 3 + xmin = ${xmin} + xmax = ${xmax} + ymin = ${ymin} + ymax = ${ymax} + zmin = ${fparse -thickness} + zmax = 0 + nx = 50 + ny = 50 + nz = 15 + [] +[] + +[Variables] + [T] + [] +[] + +[ICs] + [T] + type = FunctionIC + variable = T + function = '(${surfacetemp} - ${backtemp}) / ${thickness} * z + ${surfacetemp}' + [] +[] + +[Kernels] + [temperature_time] + type = ADHeatConductionTimeDerivative + variable = T + use_displaced_mesh = true + density_name = 'rho' + specific_heat = 'cp' + [] + [temperature_conduction] + type = ADHeatConduction + variable = T + thermal_conductivity = 'k' + use_displaced_mesh = true + [] +[] + +[BCs] + [T_cold] + type = DirichletBC + variable = T + boundary = 'back' + value = ${backtemp} + [] + [radiation_flux] + type = FunctionRadiativeBC + variable = T + boundary = 'front' + emissivity_function = '1' + Tinfinity = 300 + stefan_boltzmann_constant = 5.67e-8 + [] + [weld_flux] + type = GaussianEnergyFluxBC + variable = T + boundary = 'front' + P0 = ${power} + R = ${R} + x_beam_coord = xcoord + y_beam_coord = ycoord + z_beam_coord = '0' + [] +[] + +[Functions] + [xcoord] + type = ParsedFunction + expression = '60e-6*sin(pi/(60e-6*pi)*arclength)' + symbol_names = 'arclength' + symbol_values = 'laser_position' + [] + [ycoord] + type = ParsedFunction + expression = '60e-6*cos(pi/(60e-6*pi)*arclength)' + symbol_names = 'arclength' + symbol_values = 'laser_position' + [] + [reward_function] + type = ParsedFunction + expression = '1e-2*min(min(T1-1800, 0), 2800-T1)' + '+1e-2*min(min(T2-1800, 0), 2800-T2)' + '+1e-2*min(min(T3-1800, 0), 2800-T3)' + '+1e-2*min(min(T4-1800, 0), 2800-T4)' + '+1e-2*min(min(T5-1800, 0), 2800-T5)' + '+1e-2*min(min(T6-1800, 0), 2800-T6)' + '+1e-2*min(min(T7-1800, 0), 2800-T7)' + '+1e-2*min(min(T8-1800, 0), 2800-T8)' + symbol_names = 'T1 T2 T3 T4 T5 T6 T7 T8' + symbol_values = 'T1 T2 T3 T4 T5 T6 T7 T8' + [] +[] + +[Postprocessors] + [laser_position] + type = LaserPositionPostprocessor + execute_on = 'TIMESTEP_BEGIN' + speed = speed_signal + [] + [T1] + type = PointValue + variable = T + point = '${fparse 60e-6*sin(2*pi*1/8)} ${fparse 60e-6*cos(2*pi*1/8)} 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [T2] + type = PointValue + variable = T + point = '${fparse 60e-6*sin(2*pi*2/8)} ${fparse 60e-6*cos(2*pi*2/8)} 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [T3] + type = PointValue + variable = T + point = '${fparse 60e-6*sin(2*pi*3/8)} ${fparse 60e-6*cos(2*pi*3/8)} 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [T4] + type = PointValue + variable = T + point = '${fparse 60e-6*sin(2*pi*4/8)} ${fparse 60e-6*cos(2*pi*4/8)} 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [T5] + type = PointValue + variable = T + point = '${fparse 60e-6*sin(2*pi*5/8)} ${fparse 60e-6*cos(2*pi*5/8)} 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [T6] + type = PointValue + variable = T + point = '${fparse 60e-6*sin(2*pi*6/8)} ${fparse 60e-6*cos(2*pi*6/8)} 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [T7] + type = PointValue + variable = T + point = '${fparse 60e-6*sin(2*pi*7/8)} ${fparse 60e-6*cos(2*pi*7/8)} 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [T8] + type = PointValue + variable = T + point = '${fparse 60e-6*sin(2*pi*8/8)} ${fparse 60e-6*cos(2*pi*8/8)} 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [reward] + type = FunctionValuePostprocessor + function = reward_function + execute_on = 'INITIAL TIMESTEP_END' + indirect_dependencies = 'T1 T2 T3 T4 T5 T6 T7 T8' + [] + [speed_signal] + type = ConstantPostprocessor + value = 1.0 + execute_on = TIMESTEP_BEGIN + [] + [speed] + type = LibtorchControlValuePostprocessor + control_name = src_control + [] + [log_prob_speed] + type = LibtorchDRLLogProbabilityPostprocessor + control_name = src_control + [] +[] + +[Reporters] + [results] + type = AccumulateReporter + reporters = 'T1/value T2/value T3/value T4/value T5/value T6/value T7/value T8/value reward/value speed/value log_prob_speed/value' + [] +[] + +[Materials] + [steel] + type = LaserWeld316LStainlessSteel + temperature = T + use_constant_density = true + [] +[] + +[Controls] + [src_control] + type = LibtorchDRLControl + parameters = "Postprocessors/speed_signal/value" + responses = 'T1 T2 T3 T4 T5 T6 T7 T8' + + # keep consistent with LibtorchDRLControlTrainer + input_timesteps = 1 + response_shift_factors = '1500 1500 1500 1500 1500 1500 1500 1500' + response_scaling_factors = '0.000666667 0.000666667 0.000666667 0.000666667 0.000666667 0.000666667 0.000666667 0.000666667' + action_scaling_factors = 1.0 + + # response_scaling_factors = '1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0' + # response_shift_factors = '0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0' + # action_scaling_factors = 1.0 + + execute_on = 'TIMESTEP_BEGIN' + smoother = 1.0 + num_stems_in_period = 10 + stochastic = true + [] +[] + +[Executioner] + type = Transient + end_time = ${endtime} + # dtmin = 1e-10 + # dtmax = 1e-5 + dt = ${timestep} + # petsc_options_iname = '-pc_type -pc_factor_shift_type' + # petsc_options_value = 'lu NONZERO' + petsc_options_iname = '-pc_type -pc_hypre_type -pc_factor_shift_type' + petsc_options_value = 'hypre boomeramg NONZERO' + petsc_options = '-snes_converged_reason -ksp_converged_reason -options_left' + solve_type = 'NEWTON' + line_search = 'none' + nl_max_its = 5 + l_max_its = 100 + # [TimeStepper] + # type = IterationAdaptiveDT + # optimal_iterations = 5 + # iteration_window = 1 + # dt = ${timestep} + # linear_iteration_ratio = 1e6 + # growth_factor = 1.25 + # [] +[] + +[Debug] + show_var_residual_norms = true +[] + +[Outputs] + [exodus] + type = Exodus + # output_material_properties = true + [] + console = false +[] diff --git a/modules/combined/examples/stochastic/meltpool/meltpool_trainer.i b/modules/combined/examples/stochastic/meltpool/meltpool_trainer.i new file mode 100644 index 000000000000..911d5c9cd20b --- /dev/null +++ b/modules/combined/examples/stochastic/meltpool/meltpool_trainer.i @@ -0,0 +1,117 @@ +[StochasticTools] +[] + +[Samplers] + [dummy] + type = CartesianProduct + linear_space_items = '0 0.01 1' + min_procs_per_row = 20 + max_procs_per_row = 20 + [] +[] + +[MultiApps] + [runner] + type = SamplerFullSolveMultiApp + sampler = dummy + input_files = '3.i' + mode = batch-reset + min_procs_per_app = 20 + max_procs_per_app = 20 + [] +[] + +[Transfers] + [nn_transfer] + type = SamplerNeuralNetControlTransfer + to_multi_app = runner + trainer_name = nn_trainer + control_name = src_control + sampler = dummy + [] + [r_transfer] + type = SamplerReporterTransfer + from_multi_app = runner + sampler = dummy + stochastic_reporter = storage + from_reporter = 'results/T1:value results/T2:value results/T3:value results/T4:value ' + 'results/T5:value results/T6:value results/T7:value results/T8:value ' + 'results/reward:value results/speed:value results/log_prob_speed:value' + [] +[] + +[Trainers] + [nn_trainer] + type = LibtorchDRLControlTrainer + response = 'storage/r_transfer:results:T1:value storage/r_transfer:results:T2:value storage/r_transfer:results:T3:value storage/r_transfer:results:T4:value ' + 'storage/r_transfer:results:T5:value storage/r_transfer:results:T6:value storage/r_transfer:results:T7:value storage/r_transfer:results:T8:value' + control = 'storage/r_transfer:results:speed:value' + log_probability = 'storage/r_transfer:results:log_prob_speed:value' + reward = 'storage/r_transfer:results:reward:value' + + num_epochs = 50 + update_frequency = 1 + decay_factor = 0.99 + lambda_factor = 0.97 + + loss_print_frequency = 1 + + critic_learning_rate = 0.001 + num_critic_neurons_per_layer = '256 256' + critic_activation_functions = 'relu relu' + + control_learning_rate = 0.001 + num_control_neurons_per_layer = '256 256' + control_activation_functions = 'tanh tanh' + + # keep consistent with LibtorchNeuralNetControl + input_timesteps = 1 + + # response_scaling_factors = '13.33 15.38 16.66 38.46 15.38 33.33 40 11.76 4.711 15.38' + # response_shift_factors = '2.055 2.055 1.93 -0.171 1.945 0.449 -0.525 0.029 0.17675 1.945' + + response_shift_factors = '1500 1500 1500 1500 1500 1500 1500 1500' + response_scaling_factors = '0.000666667 0.000666667 0.000666667 0.000666667 0.000666667 0.000666667 0.000666667 0.000666667' + + # response_scaling_factors = '1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0' + # response_shift_factors = '0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0' + + standardize_advantage = true + + read_from_file = false + + # min_control_value = ${fparse -0.108} + # max_control_value = ${fparse 0.108} + + min_control_value = ${fparse 0.75} + max_control_value = ${fparse 2.0} + + batch_size = 400 + timestep_window = 10 + + entropy_coeff = 0.01 + [] +[] + +[Reporters] + [storage] + type = StochasticReporter + parallel_type = ROOT + outputs = none + [] + [reward] + type = DRLRewardReporter + drl_trainer_name = nn_trainer + [] +[] + +[Executioner] + type = Transient + num_steps = 1 +[] + +[Outputs] + file_base = output/train_out + json = true + execute_on = TIMESTEP_END +[] diff --git a/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i b/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i index fb1fc83b18ec..e9b49cd0bf88 100644 --- a/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i +++ b/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i @@ -401,7 +401,7 @@ print_fields = false continue_on_max_its = true dt = 0.0005 - num_steps = 4000 + num_steps = 2000 [] [Outputs] diff --git a/modules/combined/examples/stochastic/vortex_control/full-run/flow_over_circle_linearfv_single.i b/modules/combined/examples/stochastic/vortex_control/full-run/flow_over_circle_linearfv_single.i new file mode 100644 index 000000000000..7ec1253f14d5 --- /dev/null +++ b/modules/combined/examples/stochastic/vortex_control/full-run/flow_over_circle_linearfv_single.i @@ -0,0 +1,420 @@ +!include header.i + +[Mesh] + [fmg] + type = FileMeshGenerator + file = flow_over_circle_linearfv_out_orig.e + use_for_exodus_restart = true + [] +[] + +[Problem] + linear_sys_names = 'u_system v_system pressure_system' + previous_nl_solution_required = true +[] + +[Functions] + [inlet_function] + type = ParsedFunction + expression = '4*U*(y-ymin)*(ymax-y)/(ymax-ymin)/(ymax-ymin)' + symbol_names = 'U ymax ymin' + symbol_values = '${inlet_velocity} ${y_max} ${y_min}' + [] + [gap_x] + type = ParsedFunction + expression = 'Q*x/(x^2+y^2)*abs(cos(pi/(20/180*pi)*atan(x/y)))' + symbol_names = 'Q' + symbol_values = 'Q_signal' + [] + [gap_y] + type = ParsedFunction + expression = 'if(y>0,Q,-Q)*y/(x^2+y^2)*abs(cos(pi/(20/180*pi)*atan(x/y)))' + symbol_names = 'Q' + symbol_values = 'Q_signal' + [] +[] + +[UserObjects] + [rc] + type = RhieChowMassFlux + u = vel_x + v = vel_y + pressure = pressure + rho = ${rho} + p_diffusion_kernel = p_diffusion + [] +[] + +[Variables] + [vel_x] + type = MooseLinearVariableFVReal + solver_sys = u_system + initial_from_file_var = vel_x + initial_from_file_timestep = LATEST + [] + [vel_y] + type = MooseLinearVariableFVReal + solver_sys = v_system + initial_from_file_var = vel_y + initial_from_file_timestep = LATEST + [] + [pressure] + type = MooseLinearVariableFVReal + # initial_condition = 0 + solver_sys = pressure_system + initial_from_file_var = pressure + initial_from_file_timestep = LATEST + [] +[] + +[LinearFVKernels] + [u_time] + type = LinearFVTimeDerivative + variable = vel_x + factor = ${rho} + [] + [u_advection_stress] + type = LinearWCNSFVMomentumFlux + variable = vel_x + advected_interp_method = ${advected_interp_method} + mu = ${mu} + u = vel_x + v = vel_y + momentum_component = 'x' + rhie_chow_user_object = 'rc' + use_nonorthogonal_correction = true + [] + [u_pressure] + type = LinearFVMomentumPressure + variable = vel_x + pressure = pressure + momentum_component = 'x' + [] + + [v_time] + type = LinearFVTimeDerivative + variable = vel_y + factor = ${rho} + [] + [v_advection_stress] + type = LinearWCNSFVMomentumFlux + variable = vel_y + advected_interp_method = ${advected_interp_method} + mu = ${mu} + u = vel_x + v = vel_y + momentum_component = 'y' + rhie_chow_user_object = 'rc' + use_nonorthogonal_correction = true + [] + [v_pressure] + type = LinearFVMomentumPressure + variable = vel_y + pressure = pressure + momentum_component = 'y' + [] + + [p_diffusion] + type = LinearFVAnisotropicDiffusion + variable = pressure + diffusion_tensor = Ainv + use_nonorthogonal_correction = true + [] + [HbyA_divergence] + type = LinearFVDivergence + variable = pressure + face_flux = HbyA + force_boundary_execution = true + [] +[] + +[LinearFVBCs] + [inlet_x] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_x + boundary = 'left_boundary' + functor = 'inlet_function' + [] + [inlet_y] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_y + boundary = 'left_boundary' + functor = 0 + [] + [circle_x] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_x + boundary = 'circle' + functor = 0 + [] + [circle_y] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_y + boundary = 'circle' + functor = 0 + [] + [gap_x] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_x + boundary = 'top_gap bottom_gap' + functor = 'gap_x' + [] + [gap_y] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_y + boundary = 'top_gap bottom_gap' + functor = 'gap_y' + [] + [walls_x] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_x + boundary = 'top_boundary bottom_boundary' + functor = 0 + [] + [walls_y] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_y + boundary = 'top_boundary bottom_boundary' + functor = 0 + [] + [outlet_p] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + boundary = 'right_boundary' + variable = pressure + functor = 0 + [] + [outlet_u] + type = LinearFVAdvectionDiffusionOutflowBC + variable = vel_x + use_two_term_expansion = false + boundary = 'right_boundary' + [] + [outlet_v] + type = LinearFVAdvectionDiffusionOutflowBC + variable = vel_y + use_two_term_expansion = false + boundary = 'right_boundary' + [] +[] + +[Postprocessors] + [drag_force] + type = IntegralDirectedSurfaceForce + vel_x = vel_x + vel_y = vel_y + mu = ${mu} + pressure = pressure + principal_direction = '1 0 0' + boundary = 'circle' + outputs = none + execute_on = 'INITIAL TIMESTEP_END' + [] + [drag_coeff] + type = ParsedPostprocessor + expression = '2*drag_force/rho/(avgvel*avgvel)/D' + constant_names = 'rho avgvel D' + constant_expressions = '${rho} ${fparse 2/3*inlet_velocity} ${fparse 2*circle_radius}' + pp_names = 'drag_force' + execute_on = 'INITIAL TIMESTEP_END' + [] + [lift_force] + type = IntegralDirectedSurfaceForce + vel_x = vel_x + vel_y = vel_y + mu = ${mu} + pressure = pressure + principal_direction = '0 1 0' + boundary = 'circle' + outputs = none + execute_on = 'INITIAL TIMESTEP_END' + [] + [lift_coeff] + type = ParsedPostprocessor + expression = '2*lift_force/rho/(avgvel*avgvel)/D' + constant_names = 'rho avgvel D' + constant_expressions = '${rho} ${fparse 2/3*inlet_velocity} ${fparse 2*circle_radius}' + pp_names = 'lift_force' + execute_on = 'INITIAL TIMESTEP_END' + [] + [reward] + type = LiftDragRewardPostprocessor + lift = lift_coeff + drag = drag_coeff + averaging_window = 50 + coeff_1 = 0.0 + coeff_2 = 0.2 + execute_on = 'INITIAL TIMESTEP_END' + [] + # [p1] + # type = PointValue + # variable = pressure + # point = '0 0.07 0.0' + # execute_on = 'INITIAL TIMESTEP_END' + # [] + # [p2] + # type = PointValue + # variable = pressure + # point = '0 -0.07 0.0' + # execute_on = 'INITIAL TIMESTEP_END' + # [] + # [p3] + # type = PointValue + # variable = pressure + # point = '0.075 0.1 0.0' + # execute_on = 'INITIAL TIMESTEP_END' + # [] + # [p4] + # type = PointValue + # variable = pressure + # point = '0.075 0.0 0.0' + # execute_on = 'INITIAL TIMESTEP_END' + # [] + # [p5] + # type = PointValue + # variable = pressure + # point = '0.075 -0.1 0.0' + # execute_on = 'INITIAL TIMESTEP_END' + # [] + [p1x] + type = PointValue + variable = vel_x + point = '0 0.07 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p2x] + type = PointValue + variable = vel_x + point = '0 -0.07 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p3x] + type = PointValue + variable = vel_x + point = '0.075 0.1 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p4x] + type = PointValue + variable = vel_x + point = '0.075 0.0 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p5x] + type = PointValue + variable = vel_x + point = '0.075 -0.1 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p1y] + type = PointValue + variable = vel_y + point = '0 0.07 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p2y] + type = PointValue + variable = vel_y + point = '0 -0.07 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p3y] + type = PointValue + variable = vel_y + point = '0.075 0.1 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p4y] + type = PointValue + variable = vel_y + point = '0.075 0.0 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p5y] + type = PointValue + variable = vel_y + point = '0.075 -0.1 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [Q_signal] + type = ConstantPostprocessor + value = 0.0 + execute_on = TIMESTEP_BEGIN + [] + [Q] + type = LibtorchControlValuePostprocessor + control_name = src_control + [] + [log_prob_Q] + type = LibtorchDRLLogProbabilityPostprocessor + control_name = src_control + [] +[] + +[Reporters] + [results] + type = AccumulateReporter + reporters = 'p1x/value p2x/value p3x/value p4x/value p5x/value p1y/value p2y/value p3y/value p4y/value p5y/value reward/value Q/value log_prob_Q/value' + [] +[] + +[Controls] + [src_control] + type = LibtorchDRLControl + parameters = "Postprocessors/Q_signal/value" + responses = 'p1x p2x p3x p4x p5x p1y p2y p3y p4y p5y' + + # keep consistent with LibtorchDRLControlTrainer + input_timesteps = 1 + response_shift_factors = '1.98 1.825 2.015 0.03 1.9 0.58 -0.425 0.06 0.12 -0.02' + response_scaling_factors = '1.47 1.03 2.60 3.45 2.0 1.19 1.6 2.7 1.47 2.08' + action_scaling_factors = 1.0 + + filename = "control.net_best" + + num_neurons_per_layer = '512 512' + activation_function = 'tanh tanh' + + min_control_value = ${fparse -0.108} + max_control_value = ${fparse 0.108} + + execute_on = 'TIMESTEP_BEGIN' + smoother = 0.1 + num_stems_in_period = 50 + + stochastic = false + [] +[] + +[Executioner] + type = PIMPLE + momentum_l_abs_tol = 1e-7 + pressure_l_abs_tol = 1e-7 + momentum_l_tol = 1e-7 + pressure_l_tol = 1e-7 + rhie_chow_user_object = 'rc' + momentum_systems = 'u_system v_system' + pressure_system = 'pressure_system' + momentum_equation_relaxation = 0.9 + pressure_variable_relaxation = 0.6 + num_iterations = 100 + pressure_absolute_tolerance = 5e-6 + momentum_absolute_tolerance = 5e-6 + momentum_petsc_options_iname = '-pc_type -pc_hypre_type' + momentum_petsc_options_value = 'hypre boomeramg' + pressure_petsc_options_iname = '-pc_type -pc_hypre_type' + pressure_petsc_options_value = 'hypre boomeramg' + print_fields = false + continue_on_max_its = true + dt = 0.0005 + num_steps = 4000 +[] + +[Outputs] + exodus = true + [json] + type = JSON + execute_on = final + [] + # console = false + # execute_on = FINAL +[] diff --git a/modules/combined/examples/stochastic/vortex_control/full-run/header.i b/modules/combined/examples/stochastic/vortex_control/full-run/header.i new file mode 100644 index 000000000000..6e5236531616 --- /dev/null +++ b/modules/combined/examples/stochastic/vortex_control/full-run/header.i @@ -0,0 +1,39 @@ +# ----------------------------------------------------------------------------- +# Flow around a cylinder (2D) benchmark validation case +# This example showcases a flow around a cylinder which results in vortex +# shedding. The problem specification has been taken from the following paper: +# +# @incollection{schafer1996benchmark, +# title={Benchmark computations of laminar flow around a cylinder}, +# author={Sch{\"a}fer, Michael and Turek, Stefan and Durst, Franz and Krause, Egon and Rannacher, Rolf}, +# booktitle={Flow simulation with high-performance computers II}, +# pages={547--566}, +# year={1996}, +# publisher={Springer} +# } +# The Reyndols number is Re=100. +# The expected Strouhal number (St) is in the [0.2950, 0.3050] range, with +# refinement=8, we expect to get St=0.2941 with the model below. +# Run it using the following command: +# ./navier_stokes-opt -i header.i mesh.i flow_over_circle.i executioner_postprocessor.i +# ----------------------------------------------------------------------------- + +# Geometry parameters +circle_radius = 0.05 +pitch = 0.2 +x_min = -0.2 +x_max = 1.5 +y_min = -0.2 +y_max = 0.21 +rundoff = 1e-4 +refinement = 8 + +# Material properties +mu = 1e-3 +rho = 1 + +# Boundary conditions +inlet_velocity = 1.5 + +# Numerical schemes +advected_interp_method = 'average' diff --git a/modules/combined/examples/stochastic/vortex_control/full-run/mesh.i b/modules/combined/examples/stochastic/vortex_control/full-run/mesh.i new file mode 100644 index 000000000000..2d962b51c9d2 --- /dev/null +++ b/modules/combined/examples/stochastic/vortex_control/full-run/mesh.i @@ -0,0 +1,240 @@ +[Mesh] + # ------------------------------------------ + # Middle layer + # ------------------------------------------ + [ccmg] + type = ConcentricCircleMeshGenerator + num_sectors = '${fparse refinement*2}' + radii = '${circle_radius} ${fparse 1.2*circle_radius}' + rings = '4 ${refinement} ${refinement}' + has_outer_square = on + pitch = ${pitch} + preserve_volumes = off + smoothing_max_it = 2 + [] + [in_between] + type = SideSetsBetweenSubdomainsGenerator + input = ccmg + primary_block = 2 + paired_block = 1 + new_boundary = 'circle' + [] + [delete] + type = BlockDeletionGenerator + input = in_between + block = '1' + [] + [final_ccmg] + type = RenameBlockGenerator + input = delete + old_block = '2 3' + new_block = '0 0' + [] + [left] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${x_min}' + xmax = '${fparse -pitch/2}' + ymin = '${fparse -pitch/2}' + ymax = '${fparse pitch/2}' + nx = '${fparse refinement*2}' + ny = '${fparse refinement*4+2}' + [] + [right] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${fparse pitch/2}' + xmax = '${x_max}' + ymin = '${fparse -pitch/2}' + ymax = '${fparse pitch/2}' + nx = '${fparse refinement*30}' + ny = '${fparse refinement*4+2}' + [] + [combined_middle] + type = StitchedMeshGenerator + inputs = 'final_ccmg left right' + stitch_boundaries_pairs = 'left right; right left' + clear_stitched_boundary_ids = false + prevent_boundary_ids_overlap = true + merge_boundaries_with_same_name = true + [] + + [middle_top_sideset] + input = combined_middle + type = ParsedGenerateSideset + combinatorial_geometry = 'y > ${fparse pitch/2-rundoff}' + normal = '0 1 0' + new_sideset_name = 'middle_top' + [] + [middle_bottom_sideset] + input = middle_top_sideset + type = ParsedGenerateSideset + combinatorial_geometry = 'y < ${fparse -pitch/2+rundoff}' + normal = '0 -1 0' + new_sideset_name = 'middle_bottom' + [] + # ------------------------------------------ + # Top layer + # ------------------------------------------ + [top_left_block] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${x_min}' + xmax = '${fparse -pitch/2}' + ymin = '${fparse pitch/2}' + ymax = '${y_max}' + nx = '${fparse refinement*2}' + ny = '${fparse refinement*2+1}' + [] + [top_middle_block] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${fparse -pitch/2}' + xmax = '${fparse pitch/2}' + ymin = '${fparse pitch/2}' + ymax = '${y_max}' + nx = '${fparse refinement*4+2}' + ny = '${fparse refinement*2+1}' + [] + [top_right_block] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${fparse pitch/2}' + xmax = '${x_max}' + ymin = '${fparse pitch/2}' + ymax = '${y_max}' + nx = '${fparse refinement*30}' + ny = '${fparse refinement*2+1}' + [] + [combined_top] + type = StitchedMeshGenerator + inputs = 'top_middle_block top_left_block top_right_block' + stitch_boundaries_pairs = 'left right; right left' + prevent_boundary_ids_overlap = true + merge_boundaries_with_same_name = true + [] + [top_bottom_sideset] + input = combined_top + type = ParsedGenerateSideset + combinatorial_geometry = 'y < ${fparse pitch/2+rundoff}' + normal = '0 -1 0' + new_sideset_name = 'top_bottom' + [] + [combined_middle_top] + type = StitchedMeshGenerator + inputs = 'top_bottom_sideset middle_bottom_sideset' + stitch_boundaries_pairs = 'top_bottom middle_top' + clear_stitched_boundary_ids = false + prevent_boundary_ids_overlap = true + merge_boundaries_with_same_name = true + [] + [create_fused_top_sideset] + input = combined_middle_top + type = ParsedGenerateSideset + combinatorial_geometry = 'y > ${fparse y_max-rundoff}' + normal = '0 1 0' + new_sideset_name = 'top_boundary' + [] + # ------------------------------------------ + # Bottom layer + # ------------------------------------------ + [bottom_left_block] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${x_min}' + xmax = '${fparse -pitch/2}' + ymin = '${y_min}' + ymax = '${fparse -pitch/2}' + nx = '${fparse refinement*2}' + ny = '${fparse refinement*2}' + [] + [bottom_middle_block] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${fparse -pitch/2}' + xmax = '${fparse pitch/2}' + ymin = '${y_min}' + ymax = '${fparse -pitch/2}' + nx = '${fparse refinement*4+2}' + ny = '${fparse refinement*2}' + [] + [bottom_right_block] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${fparse pitch/2}' + xmax = '${x_max}' + ymin = '${y_min}' + ymax = '${fparse -pitch/2}' + nx = '${fparse refinement*30}' + ny = '${fparse refinement*2}' + [] + [combined_bottom] + type = StitchedMeshGenerator + inputs = 'bottom_middle_block bottom_left_block bottom_right_block' + stitch_boundaries_pairs = 'left right; right left' + prevent_boundary_ids_overlap = true + merge_boundaries_with_same_name = true + [] + [bottom_top_sideset] + input = combined_bottom + type = ParsedGenerateSideset + combinatorial_geometry = 'y > ${fparse -pitch/2-rundoff}' + normal = '0 1 0' + new_sideset_name = 'bottom_top' + [] + [combined_final] + type = StitchedMeshGenerator + inputs = 'create_fused_top_sideset bottom_top_sideset' + stitch_boundaries_pairs = 'middle_bottom bottom_top' + clear_stitched_boundary_ids = false + prevent_boundary_ids_overlap = true + merge_boundaries_with_same_name = true + [] + [create_fused_bottom_sideset] + input = combined_final + type = ParsedGenerateSideset + combinatorial_geometry = 'y < ${fparse y_min+rundoff}' + normal = '0 -1 0' + new_sideset_name = 'bottom_boundary' + [] + # ------------------------------------------ + # Left and right boundaries + # ------------------------------------------ + [create_fused_left_sideset] + input = create_fused_bottom_sideset + type = ParsedGenerateSideset + combinatorial_geometry = 'x < ${fparse x_min+rundoff}' + normal = '-1 0 0' + new_sideset_name = 'left_boundary' + [] + [create_fused_right_sideset] + input = create_fused_left_sideset + type = ParsedGenerateSideset + combinatorial_geometry = 'x > ${fparse x_max-rundoff}' + normal = '1 0 0' + new_sideset_name = 'right_boundary' + [] + [sideset_removal] + input = create_fused_right_sideset + type = BoundaryDeletionGenerator + boundary_names = 'bottom top left right middle_bottom middle_top bottom_top top_bottom' + [] + [new_boundaries_1] + type = SideSetsFromBoundingBoxGenerator + input = sideset_removal + bottom_left = '-0.008682409 0.049240388 0' + top_right = '0.008682409 0.5 0' + boundary_new = 'top_gap' + included_boundaries = 'circle' + replace = true + [] + [new_boundaries_2] + type = SideSetsFromBoundingBoxGenerator + input = new_boundaries_1 + bottom_left = '-0.008682409 -0.5 0' + top_right = '0.008682409 -0.049240388 0' + boundary_new = 'bottom_gap' + included_boundaries = 'circle' + replace = true + [] +[] diff --git a/modules/combined/examples/stochastic/vortex_control/half-run/flow_over_circle_linearfv_single.i b/modules/combined/examples/stochastic/vortex_control/half-run/flow_over_circle_linearfv_single.i new file mode 100644 index 000000000000..9ce204048882 --- /dev/null +++ b/modules/combined/examples/stochastic/vortex_control/half-run/flow_over_circle_linearfv_single.i @@ -0,0 +1,420 @@ +!include header.i + +[Mesh] + [fmg] + type = FileMeshGenerator + file = flow_over_circle_linearfv_out_orig.e + use_for_exodus_restart = true + [] +[] + +[Problem] + linear_sys_names = 'u_system v_system pressure_system' + previous_nl_solution_required = true +[] + +[Functions] + [inlet_function] + type = ParsedFunction + expression = '4*U*(y-ymin)*(ymax-y)/(ymax-ymin)/(ymax-ymin)' + symbol_names = 'U ymax ymin' + symbol_values = '${inlet_velocity} ${y_max} ${y_min}' + [] + [gap_x] + type = ParsedFunction + expression = 'Q*x/(x^2+y^2)*abs(cos(pi/(20/180*pi)*atan(x/y)))' + symbol_names = 'Q' + symbol_values = 'Q_signal' + [] + [gap_y] + type = ParsedFunction + expression = 'if(y>0,Q,-Q)*y/(x^2+y^2)*abs(cos(pi/(20/180*pi)*atan(x/y)))' + symbol_names = 'Q' + symbol_values = 'Q_signal' + [] +[] + +[UserObjects] + [rc] + type = RhieChowMassFlux + u = vel_x + v = vel_y + pressure = pressure + rho = ${rho} + p_diffusion_kernel = p_diffusion + [] +[] + +[Variables] + [vel_x] + type = MooseLinearVariableFVReal + solver_sys = u_system + initial_from_file_var = vel_x + initial_from_file_timestep = LATEST + [] + [vel_y] + type = MooseLinearVariableFVReal + solver_sys = v_system + initial_from_file_var = vel_y + initial_from_file_timestep = LATEST + [] + [pressure] + type = MooseLinearVariableFVReal + # initial_condition = 0 + solver_sys = pressure_system + initial_from_file_var = pressure + initial_from_file_timestep = LATEST + [] +[] + +[LinearFVKernels] + [u_time] + type = LinearFVTimeDerivative + variable = vel_x + factor = ${rho} + [] + [u_advection_stress] + type = LinearWCNSFVMomentumFlux + variable = vel_x + advected_interp_method = ${advected_interp_method} + mu = ${mu} + u = vel_x + v = vel_y + momentum_component = 'x' + rhie_chow_user_object = 'rc' + use_nonorthogonal_correction = true + [] + [u_pressure] + type = LinearFVMomentumPressure + variable = vel_x + pressure = pressure + momentum_component = 'x' + [] + + [v_time] + type = LinearFVTimeDerivative + variable = vel_y + factor = ${rho} + [] + [v_advection_stress] + type = LinearWCNSFVMomentumFlux + variable = vel_y + advected_interp_method = ${advected_interp_method} + mu = ${mu} + u = vel_x + v = vel_y + momentum_component = 'y' + rhie_chow_user_object = 'rc' + use_nonorthogonal_correction = true + [] + [v_pressure] + type = LinearFVMomentumPressure + variable = vel_y + pressure = pressure + momentum_component = 'y' + [] + + [p_diffusion] + type = LinearFVAnisotropicDiffusion + variable = pressure + diffusion_tensor = Ainv + use_nonorthogonal_correction = true + [] + [HbyA_divergence] + type = LinearFVDivergence + variable = pressure + face_flux = HbyA + force_boundary_execution = true + [] +[] + +[LinearFVBCs] + [inlet_x] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_x + boundary = 'left_boundary' + functor = 'inlet_function' + [] + [inlet_y] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_y + boundary = 'left_boundary' + functor = 0 + [] + [circle_x] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_x + boundary = 'circle' + functor = 0 + [] + [circle_y] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_y + boundary = 'circle' + functor = 0 + [] + [gap_x] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_x + boundary = 'top_gap bottom_gap' + functor = 'gap_x' + [] + [gap_y] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_y + boundary = 'top_gap bottom_gap' + functor = 'gap_y' + [] + [walls_x] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_x + boundary = 'top_boundary bottom_boundary' + functor = 0 + [] + [walls_y] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + variable = vel_y + boundary = 'top_boundary bottom_boundary' + functor = 0 + [] + [outlet_p] + type = LinearFVAdvectionDiffusionFunctorDirichletBC + boundary = 'right_boundary' + variable = pressure + functor = 0 + [] + [outlet_u] + type = LinearFVAdvectionDiffusionOutflowBC + variable = vel_x + use_two_term_expansion = false + boundary = 'right_boundary' + [] + [outlet_v] + type = LinearFVAdvectionDiffusionOutflowBC + variable = vel_y + use_two_term_expansion = false + boundary = 'right_boundary' + [] +[] + +[Postprocessors] + [drag_force] + type = IntegralDirectedSurfaceForce + vel_x = vel_x + vel_y = vel_y + mu = ${mu} + pressure = pressure + principal_direction = '1 0 0' + boundary = 'circle' + outputs = none + execute_on = 'INITIAL TIMESTEP_END' + [] + [drag_coeff] + type = ParsedPostprocessor + expression = '2*drag_force/rho/(avgvel*avgvel)/D' + constant_names = 'rho avgvel D' + constant_expressions = '${rho} ${fparse 2/3*inlet_velocity} ${fparse 2*circle_radius}' + pp_names = 'drag_force' + execute_on = 'INITIAL TIMESTEP_END' + [] + [lift_force] + type = IntegralDirectedSurfaceForce + vel_x = vel_x + vel_y = vel_y + mu = ${mu} + pressure = pressure + principal_direction = '0 1 0' + boundary = 'circle' + outputs = none + execute_on = 'INITIAL TIMESTEP_END' + [] + [lift_coeff] + type = ParsedPostprocessor + expression = '2*lift_force/rho/(avgvel*avgvel)/D' + constant_names = 'rho avgvel D' + constant_expressions = '${rho} ${fparse 2/3*inlet_velocity} ${fparse 2*circle_radius}' + pp_names = 'lift_force' + execute_on = 'INITIAL TIMESTEP_END' + [] + [reward] + type = LiftDragRewardPostprocessor + lift = lift_coeff + drag = drag_coeff + averaging_window = 50 + coeff_1 = 0.0 + coeff_2 = 0.2 + execute_on = 'INITIAL TIMESTEP_END' + [] + # [p1] + # type = PointValue + # variable = pressure + # point = '0 0.07 0.0' + # execute_on = 'INITIAL TIMESTEP_END' + # [] + # [p2] + # type = PointValue + # variable = pressure + # point = '0 -0.07 0.0' + # execute_on = 'INITIAL TIMESTEP_END' + # [] + # [p3] + # type = PointValue + # variable = pressure + # point = '0.075 0.1 0.0' + # execute_on = 'INITIAL TIMESTEP_END' + # [] + # [p4] + # type = PointValue + # variable = pressure + # point = '0.075 0.0 0.0' + # execute_on = 'INITIAL TIMESTEP_END' + # [] + # [p5] + # type = PointValue + # variable = pressure + # point = '0.075 -0.1 0.0' + # execute_on = 'INITIAL TIMESTEP_END' + # [] + [p1x] + type = PointValue + variable = vel_x + point = '0 0.07 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p2x] + type = PointValue + variable = vel_x + point = '0 -0.07 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p3x] + type = PointValue + variable = vel_x + point = '0.075 0.1 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p4x] + type = PointValue + variable = vel_x + point = '0.075 0.0 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p5x] + type = PointValue + variable = vel_x + point = '0.075 -0.1 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p1y] + type = PointValue + variable = vel_y + point = '0 0.07 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p2y] + type = PointValue + variable = vel_y + point = '0 -0.07 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p3y] + type = PointValue + variable = vel_y + point = '0.075 0.1 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p4y] + type = PointValue + variable = vel_y + point = '0.075 0.0 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [p5y] + type = PointValue + variable = vel_y + point = '0.075 -0.1 0.0' + execute_on = 'INITIAL TIMESTEP_END' + [] + [Q_signal] + type = ConstantPostprocessor + value = 0.0 + execute_on = TIMESTEP_BEGIN + [] + # [Q] + # type = LibtorchControlValuePostprocessor + # control_name = src_control + # [] + # [log_prob_Q] + # type = LibtorchDRLLogProbabilityPostprocessor + # control_name = src_control + # [] +[] + +# [Reporters] +# [results] +# type = AccumulateReporter +# reporters = 'p1x/value p2x/value p3x/value p4x/value p5x/value p1y/value p2y/value p3y/value p4y/value p5y/value reward/value Q/value log_prob_Q/value' +# [] +# [] + +# [Controls] +# [src_control] +# type = LibtorchDRLControl +# parameters = "Postprocessors/Q_signal/value" +# responses = 'p1x p2x p3x p4x p5x p1y p2y p3y p4y p5y' + +# # keep consistent with LibtorchDRLControlTrainer +# input_timesteps = 1 +# response_shift_factors = '1.98 1.825 2.015 0.03 1.9 0.58 -0.425 0.06 0.12 -0.02' +# response_scaling_factors = '1.47 1.03 2.60 3.45 2.0 1.19 1.6 2.7 1.47 2.08' +# action_scaling_factors = 1.0 + +# filename = "control.net_best" + +# num_neurons_per_layer = '512 512' +# activation_function = 'tanh tanh' + +# min_control_value = ${fparse -0.108} +# max_control_value = ${fparse 0.108} + +# execute_on = 'TIMESTEP_BEGIN' +# smoother = 0.1 +# num_stems_in_period = 50 + +# stochastic = false +# [] +# [] + +[Executioner] + type = PIMPLE + momentum_l_abs_tol = 1e-7 + pressure_l_abs_tol = 1e-7 + momentum_l_tol = 1e-7 + pressure_l_tol = 1e-7 + rhie_chow_user_object = 'rc' + momentum_systems = 'u_system v_system' + pressure_system = 'pressure_system' + momentum_equation_relaxation = 0.9 + pressure_variable_relaxation = 0.6 + num_iterations = 100 + pressure_absolute_tolerance = 5e-6 + momentum_absolute_tolerance = 5e-6 + momentum_petsc_options_iname = '-pc_type -pc_hypre_type' + momentum_petsc_options_value = 'hypre boomeramg' + pressure_petsc_options_iname = '-pc_type -pc_hypre_type' + pressure_petsc_options_value = 'hypre boomeramg' + print_fields = false + continue_on_max_its = true + dt = 0.0005 + num_steps = 4000 +[] + +[Outputs] + exodus = true + [json] + type = JSON + execute_on = final + [] + # console = false + # execute_on = FINAL +[] diff --git a/modules/combined/examples/stochastic/vortex_control/half-run/header.i b/modules/combined/examples/stochastic/vortex_control/half-run/header.i new file mode 100644 index 000000000000..6e5236531616 --- /dev/null +++ b/modules/combined/examples/stochastic/vortex_control/half-run/header.i @@ -0,0 +1,39 @@ +# ----------------------------------------------------------------------------- +# Flow around a cylinder (2D) benchmark validation case +# This example showcases a flow around a cylinder which results in vortex +# shedding. The problem specification has been taken from the following paper: +# +# @incollection{schafer1996benchmark, +# title={Benchmark computations of laminar flow around a cylinder}, +# author={Sch{\"a}fer, Michael and Turek, Stefan and Durst, Franz and Krause, Egon and Rannacher, Rolf}, +# booktitle={Flow simulation with high-performance computers II}, +# pages={547--566}, +# year={1996}, +# publisher={Springer} +# } +# The Reyndols number is Re=100. +# The expected Strouhal number (St) is in the [0.2950, 0.3050] range, with +# refinement=8, we expect to get St=0.2941 with the model below. +# Run it using the following command: +# ./navier_stokes-opt -i header.i mesh.i flow_over_circle.i executioner_postprocessor.i +# ----------------------------------------------------------------------------- + +# Geometry parameters +circle_radius = 0.05 +pitch = 0.2 +x_min = -0.2 +x_max = 1.5 +y_min = -0.2 +y_max = 0.21 +rundoff = 1e-4 +refinement = 8 + +# Material properties +mu = 1e-3 +rho = 1 + +# Boundary conditions +inlet_velocity = 1.5 + +# Numerical schemes +advected_interp_method = 'average' diff --git a/modules/combined/examples/stochastic/vortex_control/half-run/mesh.i b/modules/combined/examples/stochastic/vortex_control/half-run/mesh.i new file mode 100644 index 000000000000..2d962b51c9d2 --- /dev/null +++ b/modules/combined/examples/stochastic/vortex_control/half-run/mesh.i @@ -0,0 +1,240 @@ +[Mesh] + # ------------------------------------------ + # Middle layer + # ------------------------------------------ + [ccmg] + type = ConcentricCircleMeshGenerator + num_sectors = '${fparse refinement*2}' + radii = '${circle_radius} ${fparse 1.2*circle_radius}' + rings = '4 ${refinement} ${refinement}' + has_outer_square = on + pitch = ${pitch} + preserve_volumes = off + smoothing_max_it = 2 + [] + [in_between] + type = SideSetsBetweenSubdomainsGenerator + input = ccmg + primary_block = 2 + paired_block = 1 + new_boundary = 'circle' + [] + [delete] + type = BlockDeletionGenerator + input = in_between + block = '1' + [] + [final_ccmg] + type = RenameBlockGenerator + input = delete + old_block = '2 3' + new_block = '0 0' + [] + [left] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${x_min}' + xmax = '${fparse -pitch/2}' + ymin = '${fparse -pitch/2}' + ymax = '${fparse pitch/2}' + nx = '${fparse refinement*2}' + ny = '${fparse refinement*4+2}' + [] + [right] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${fparse pitch/2}' + xmax = '${x_max}' + ymin = '${fparse -pitch/2}' + ymax = '${fparse pitch/2}' + nx = '${fparse refinement*30}' + ny = '${fparse refinement*4+2}' + [] + [combined_middle] + type = StitchedMeshGenerator + inputs = 'final_ccmg left right' + stitch_boundaries_pairs = 'left right; right left' + clear_stitched_boundary_ids = false + prevent_boundary_ids_overlap = true + merge_boundaries_with_same_name = true + [] + + [middle_top_sideset] + input = combined_middle + type = ParsedGenerateSideset + combinatorial_geometry = 'y > ${fparse pitch/2-rundoff}' + normal = '0 1 0' + new_sideset_name = 'middle_top' + [] + [middle_bottom_sideset] + input = middle_top_sideset + type = ParsedGenerateSideset + combinatorial_geometry = 'y < ${fparse -pitch/2+rundoff}' + normal = '0 -1 0' + new_sideset_name = 'middle_bottom' + [] + # ------------------------------------------ + # Top layer + # ------------------------------------------ + [top_left_block] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${x_min}' + xmax = '${fparse -pitch/2}' + ymin = '${fparse pitch/2}' + ymax = '${y_max}' + nx = '${fparse refinement*2}' + ny = '${fparse refinement*2+1}' + [] + [top_middle_block] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${fparse -pitch/2}' + xmax = '${fparse pitch/2}' + ymin = '${fparse pitch/2}' + ymax = '${y_max}' + nx = '${fparse refinement*4+2}' + ny = '${fparse refinement*2+1}' + [] + [top_right_block] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${fparse pitch/2}' + xmax = '${x_max}' + ymin = '${fparse pitch/2}' + ymax = '${y_max}' + nx = '${fparse refinement*30}' + ny = '${fparse refinement*2+1}' + [] + [combined_top] + type = StitchedMeshGenerator + inputs = 'top_middle_block top_left_block top_right_block' + stitch_boundaries_pairs = 'left right; right left' + prevent_boundary_ids_overlap = true + merge_boundaries_with_same_name = true + [] + [top_bottom_sideset] + input = combined_top + type = ParsedGenerateSideset + combinatorial_geometry = 'y < ${fparse pitch/2+rundoff}' + normal = '0 -1 0' + new_sideset_name = 'top_bottom' + [] + [combined_middle_top] + type = StitchedMeshGenerator + inputs = 'top_bottom_sideset middle_bottom_sideset' + stitch_boundaries_pairs = 'top_bottom middle_top' + clear_stitched_boundary_ids = false + prevent_boundary_ids_overlap = true + merge_boundaries_with_same_name = true + [] + [create_fused_top_sideset] + input = combined_middle_top + type = ParsedGenerateSideset + combinatorial_geometry = 'y > ${fparse y_max-rundoff}' + normal = '0 1 0' + new_sideset_name = 'top_boundary' + [] + # ------------------------------------------ + # Bottom layer + # ------------------------------------------ + [bottom_left_block] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${x_min}' + xmax = '${fparse -pitch/2}' + ymin = '${y_min}' + ymax = '${fparse -pitch/2}' + nx = '${fparse refinement*2}' + ny = '${fparse refinement*2}' + [] + [bottom_middle_block] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${fparse -pitch/2}' + xmax = '${fparse pitch/2}' + ymin = '${y_min}' + ymax = '${fparse -pitch/2}' + nx = '${fparse refinement*4+2}' + ny = '${fparse refinement*2}' + [] + [bottom_right_block] + type = GeneratedMeshGenerator + dim = 2 + xmin = '${fparse pitch/2}' + xmax = '${x_max}' + ymin = '${y_min}' + ymax = '${fparse -pitch/2}' + nx = '${fparse refinement*30}' + ny = '${fparse refinement*2}' + [] + [combined_bottom] + type = StitchedMeshGenerator + inputs = 'bottom_middle_block bottom_left_block bottom_right_block' + stitch_boundaries_pairs = 'left right; right left' + prevent_boundary_ids_overlap = true + merge_boundaries_with_same_name = true + [] + [bottom_top_sideset] + input = combined_bottom + type = ParsedGenerateSideset + combinatorial_geometry = 'y > ${fparse -pitch/2-rundoff}' + normal = '0 1 0' + new_sideset_name = 'bottom_top' + [] + [combined_final] + type = StitchedMeshGenerator + inputs = 'create_fused_top_sideset bottom_top_sideset' + stitch_boundaries_pairs = 'middle_bottom bottom_top' + clear_stitched_boundary_ids = false + prevent_boundary_ids_overlap = true + merge_boundaries_with_same_name = true + [] + [create_fused_bottom_sideset] + input = combined_final + type = ParsedGenerateSideset + combinatorial_geometry = 'y < ${fparse y_min+rundoff}' + normal = '0 -1 0' + new_sideset_name = 'bottom_boundary' + [] + # ------------------------------------------ + # Left and right boundaries + # ------------------------------------------ + [create_fused_left_sideset] + input = create_fused_bottom_sideset + type = ParsedGenerateSideset + combinatorial_geometry = 'x < ${fparse x_min+rundoff}' + normal = '-1 0 0' + new_sideset_name = 'left_boundary' + [] + [create_fused_right_sideset] + input = create_fused_left_sideset + type = ParsedGenerateSideset + combinatorial_geometry = 'x > ${fparse x_max-rundoff}' + normal = '1 0 0' + new_sideset_name = 'right_boundary' + [] + [sideset_removal] + input = create_fused_right_sideset + type = BoundaryDeletionGenerator + boundary_names = 'bottom top left right middle_bottom middle_top bottom_top top_bottom' + [] + [new_boundaries_1] + type = SideSetsFromBoundingBoxGenerator + input = sideset_removal + bottom_left = '-0.008682409 0.049240388 0' + top_right = '0.008682409 0.5 0' + boundary_new = 'top_gap' + included_boundaries = 'circle' + replace = true + [] + [new_boundaries_2] + type = SideSetsFromBoundingBoxGenerator + input = new_boundaries_1 + bottom_left = '-0.008682409 -0.5 0' + top_right = '0.008682409 -0.049240388 0' + boundary_new = 'bottom_gap' + included_boundaries = 'circle' + replace = true + [] +[] diff --git a/modules/combined/examples/stochastic/vortex_control/libtorch_drl_control_sub.i b/modules/combined/examples/stochastic/vortex_control/libtorch_drl_control_sub.i deleted file mode 100644 index eb7b672d202a..000000000000 --- a/modules/combined/examples/stochastic/vortex_control/libtorch_drl_control_sub.i +++ /dev/null @@ -1,177 +0,0 @@ -air_density = 1.184 # kg/m3 -air_cp = 1000 # J/(kg K) -air_effective_k = 0.5 # W/(m K) - -[Mesh] - [mesh] - type = GeneratedMeshGenerator - dim = 2 - xmin = 0.0 - xmax = 7.0 - ymin = 0.0 - ymax = 5.0 - nx = 10 - ny = 10 - [] -[] - -[Variables] - [T] - initial_condition = 297 - [] -[] - -[Kernels] - [time_derivative] - type = CoefTimeDerivative - variable = T - Coefficient = '${fparse air_density*air_cp}' - [] - [heat_conduction] - type = MatDiffusion - variable = T - diffusivity = 'k' - [] -[] - -[BCs] - [top_flux] - type = NeumannBC - value = 0.0 - boundary = 'top' - variable = T - [] - [dirichlet] - type = FunctionDirichletBC - function = temp_env - variable = T - boundary = 'left right' - [] -[] - -[Functions] - [temp_env] - type = ParsedFunction - value = '15.0*sin(t/86400.0*pi) + 273.0' - [] - [design_function] - type = ParsedFunction - value = '297' - [] - [reward_function] - type = ScaledAbsDifferenceDRLRewardFunction - design_function = design_function - observed_value = center_temp_tend - c1 = 1 - c2 = 10 - [] -[] - -[Materials] - [constant] - type = GenericConstantMaterial - prop_names = 'k' - prop_values = ${air_effective_k} - [] -[] - -[Postprocessors] - [center_temp] - type = PointValue - variable = T - point = '3.5 2.5 0.0' - execute_on = 'INITIAL TIMESTEP_BEGIN' - [] - [center_temp_tend] - type = PointValue - variable = T - point = '3.5 2.5 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [env_temp] - type = FunctionValuePostprocessor - function = temp_env - execute_on = 'INITIAL TIMESTEP_BEGIN' - [] - [reward] - type = FunctionValuePostprocessor - function = reward_function - execute_on = 'INITIAL TIMESTEP_END' - indirect_dependencies = 'center_temp_tend' - [] - [top_flux] - type = LibtorchControlValuePostprocessor - control_name = src_control - [] - [log_prob_top_flux] - type = LibtorchDRLLogProbabilityPostprocessor - control_name = src_control - [] -[] - -[Reporters] - [T_reporter] - type = AccumulateReporter - reporters = 'center_temp_tend/value reward/value top_flux/value log_prob_top_flux/value' - [] -[] - -[Controls] - inactive = 'src_control_final' - [src_control] - type = LibtorchDRLControl - parameters = "BCs/top_flux/value" - responses = 'center_temp_tend' - - # keep consistent with LibtorchDRLControlTrainer - input_timesteps = 1 - response_scaling_factors = '0.03' - response_shift_factors = '290' - action_standard_deviations = '0.02' - action_scaling_factors = 20 - - execute_on = 'TIMESTEP_BEGIN' - [] - [src_control_final] - type = LibtorchNeuralNetControl - - filename = 'mynet_control.net' - num_neurons_per_layer = '16 6' - activation_function = 'relu' - - parameters = "BCs/top_flux/value" - responses = 'center_temp_tend' - - # keep consistent with LibtorchDRLControlTrainer - input_timesteps = 1 - response_scaling_factors = '0.03' - response_shift_factors = '290' - action_standard_deviations = '0.01' - action_scaling_factors = 20 - - execute_on = 'TIMESTEP_BEGIN' - [] -[] - -[Executioner] - type = Transient - solve_type = 'NEWTON' - - petsc_options_iname = '-pc_type -pc_factor_shift_type' - petsc_options_value = 'lu NONZERO' - line_search = 'none' - - nl_rel_tol = 1e-7 - - start_time = 0.0 - end_time = 86400 - dt = ${fparse 86400/4} -[] - -[Outputs] - # console = false - [c] - type = JSON - execute_on = FINAL - [] -[] diff --git a/modules/combined/examples/stochastic/vortex_control/trainer.i b/modules/combined/examples/stochastic/vortex_control/trainer.i index a7310c0a0456..e36e16db65ca 100644 --- a/modules/combined/examples/stochastic/vortex_control/trainer.i +++ b/modules/combined/examples/stochastic/vortex_control/trainer.i @@ -49,16 +49,16 @@ log_probability = 'storage/r_transfer:results:log_prob_Q:value' reward = 'storage/r_transfer:results:reward:value' - num_epochs = 50 - update_frequency = 2 + num_epochs = 25 + update_frequency = 1 decay_factor = 0.99 lambda_factor = 0.97 loss_print_frequency = 1 critic_learning_rate = 0.001 - num_critic_neurons_per_layer = '128 128' - critic_activation_functions = 'relu relu' + num_critic_neurons_per_layer = '512 512' + critic_activation_functions = 'tanh tanh' control_learning_rate = 0.001 num_control_neurons_per_layer = '512 512' @@ -76,8 +76,6 @@ # response_scaling_factors = '1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0' # response_shift_factors = '0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0' - action_standard_deviations = '0.1' - standardize_advantage = true read_from_file = false @@ -88,8 +86,10 @@ min_control_value = ${fparse -0.108} max_control_value = ${fparse 0.108} - batch_size = 160 + batch_size = 400 timestep_window = 50 + + entropy_coeff = 0.01 [] [] diff --git a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i index 97cf7ee691b6..3a1f03dd8229 100644 --- a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i +++ b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i @@ -10,8 +10,8 @@ air_effective_k = 0.5 # W/(m K) xmax = 7.0 ymin = 0.0 ymax = 5.0 - nx = 10 - ny = 10 + nx = 20 + ny = 20 [] [] @@ -127,30 +127,11 @@ air_effective_k = 0.5 # W/(m K) input_timesteps = 1 response_scaling_factors = '0.03' response_shift_factors = '290' - action_standard_deviations = '3e-2' action_scaling_factors = 20 + stochastic = true execute_on = 'TIMESTEP_BEGIN' [] - # [src_control_final] - # type = LibtorchNeuralNetControl - - # filename = 'mynet_control.net' - # num_neurons_per_layer = '16 6' - # activation_function = 'relu' - - # parameters = "BCs/top_flux/value" - # responses = 'center_temp_tend' - - # # keep consistent with LibtorchDRLControlTrainer - # input_timesteps = 1 - # response_scaling_factors = '0.03' - # response_shift_factors = '290' - # action_standard_deviations = '5e-5' - # action_scaling_factors = 20 - - # execute_on = 'TIMESTEP_BEGIN' - # [] [] [Executioner] @@ -165,13 +146,9 @@ air_effective_k = 0.5 # W/(m K) start_time = 0.0 end_time = 86400 - dt = ${fparse 86400/40} + dt = ${fparse 86400/80} [] [Outputs] console = false - [c] - type = JSON - execute_on = FINAL - [] [] diff --git a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i index 4eaadebd228a..dd401a99bc45 100644 --- a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i +++ b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i @@ -13,7 +13,7 @@ type = SamplerFullSolveMultiApp sampler = dummy input_files = 'libtorch_drl_control_sub.i' - # mode = batch-reset + mode = batch-reset [] [] @@ -42,27 +42,38 @@ log_probability = 'storage/r_transfer:T_reporter:log_prob_top_flux:value' reward = 'storage/r_transfer:T_reporter:reward:value' - num_epochs = 120 - update_frequency = 2 + num_epochs = 50 + update_frequency = 1 decay_factor = 0.8 + lambda_factor = 1.0 loss_print_frequency = 10 - critic_learning_rate = 0.005 + critic_learning_rate = 0.001 num_critic_neurons_per_layer = '32 16' + critic_activation_functions = 'relu relu' - control_learning_rate = 0.005 + control_learning_rate = 0.001 num_control_neurons_per_layer = '32 16' + control_activation_functions = 'relu relu' + # keep consistent with LibtorchNeuralNetControl input_timesteps = 1 response_scaling_factors = '0.03' response_shift_factors = '290' - action_standard_deviations = '3e-2' standardize_advantage = true + batch_size = 80 + read_from_file = false + + entropy_coeff = 0.0 + + # min_control_value = ${fparse -0.1} + # max_control_value = ${fparse 0.1} + [] [] @@ -70,6 +81,7 @@ [storage] type = StochasticReporter parallel_type = ROOT + outputs = none [] [reward] type = DRLRewardReporter @@ -79,13 +91,12 @@ [Executioner] type = Transient - num_steps = 4 + num_steps = 200 [] [Outputs] file_base = output/train_out - # json = true - csv = true + json = true time_step_interval = 1 execute_on = TIMESTEP_END [] diff --git a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h index 85e72a541fd8..24a68a530d19 100644 --- a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h +++ b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h @@ -231,6 +231,7 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase torch::Tensor _log_probability_tensor; Real _highest_reward; + Real _entropy_coeff; std::unique_ptr _actor_optimizer; std::unique_ptr _critic_optimizer; diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C index fbb79ce620e1..891540c2c9ae 100644 --- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C +++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C @@ -36,9 +36,6 @@ LibtorchDRLControl::validParams() params.addParam("stochastic", true, "Blabla"); - params.addParam>( - "action_standard_deviations", {}, "Standard deviation value used while sampling the actions."); - params.addParam>("min_control_value", {}, "The minimum values of the control signal."); params.addParam>("max_control_value", {}, "The maximum calue of the control signal."); @@ -82,10 +79,9 @@ LibtorchDRLControl::loadControlNeuralNetFromFile(const InputParameters & paramet const std::vector & minimum_values = getParam>("min_control_value"); const std::vector & maximum_values = getParam>("max_control_value"); - const std::vector & action_std = getParam>("action_standard_deviations"); auto nn = std::make_shared( - filename, num_inputs, num_outputs, num_neurons_per_layer, action_std, activation_functions, minimum_values, maximum_values); + filename, num_inputs, num_outputs, num_neurons_per_layer, activation_functions, minimum_values, maximum_values); try { @@ -213,7 +209,7 @@ LibtorchDRLControl::execute() // for (const auto i : index_range(_current_control_signals)) // _current_control_signals[i] = std::min(std::max(_current_control_signals[i], _minimum_actions[i]), _maximum_actions[i]); - if (!_stochastic) + if (_stochastic) { torch::Tensor log_probability = _actor_nn->logProbability(action); diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index b8b211f29c82..1c20e778fd95 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -89,9 +89,6 @@ LibtorchDRLControlTrainer::validParams() params.addParam( "seed", 11, "Random number generator seed for stochastic optimizers."); - params.addRequiredParam>( - "action_standard_deviations", "Standard deviation value used while sampling the actions."); - params.addParam( "clip_parameter", 0.2, "Clip parameter used while clamping the advantage value."); params.addRangeCheckedParam( @@ -131,6 +128,8 @@ LibtorchDRLControlTrainer::validParams() params.addParam>("min_control_value", {}, "The minimum values of the control signal."); params.addParam>("max_control_value", {}, "The maximum calue of the control signal."); + params.addParam("entropy_coeff", 0.01, "ASDASD"); + params.addParam("timestep_window", 1, "Data acquisition timesteps (every nth)"); return params; @@ -167,7 +166,6 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _clip_param(getParam("clip_parameter")), _decay_factor(getParam("decay_factor")), _lambda_factor(getParam("lambda_factor")), - _action_std(getParam>("action_standard_deviations")), _filename_base(isParamValid("filename_base") ? getParam("filename_base") : ""), _read_from_file(getParam("read_from_file")), _shift_outputs(getParam("shift_outputs")), @@ -177,6 +175,7 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _min_values(getParam>("min_control_value")), _max_values(getParam>("max_control_value")), _highest_reward(-1e8), + _entropy_coeff(getParam("entropy_coeff")), _update_counter(_update_frequency), _timestep_window(getParam("timestep_window")) { @@ -206,7 +205,6 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _num_inputs, _num_outputs, _num_control_neurons_per_layer, - _action_std, getParam>("control_activation_functions"), _min_values, _max_values); @@ -403,7 +401,10 @@ LibtorchDRLControlTrainer::execute() computeEpisodeRewardStatistics(); if(_average_episode_reward > _highest_reward) + { torch::save(_control_nn, _control_nn->name()+"_best"); + _highest_reward = _average_episode_reward; + } normalizeResponseData(_state_data, _input_timesteps); normalizeResponseData(_next_state_data, _input_timesteps); @@ -630,7 +631,7 @@ LibtorchDRLControlTrainer::trainController() auto surr2 = torch::clamp(ratio, 1.0 - _clip_param, 1.0 + _clip_param) * advantage_batch; // Compute loss values for the critic and the control neural net - auto actor_loss = -(torch::min(surr1, surr2) + 0.01*_control_nn->entropy()).mean(); + auto actor_loss = -(torch::min(surr1, surr2) + _entropy_coeff*_control_nn->entropy()).mean(); auto critic_loss = torch::mse_loss(value, return_batch); // Update the weights in the neural nets @@ -674,8 +675,11 @@ LibtorchDRLControlTrainer::trainController() batch_begin = batch_end; } // std::cout << _control_nn->stdTensor() << std::endl; - std::cout << _control_nn->alphaTensor().mean() << std::endl; - std::cout << _control_nn->betaTensor().mean() << std::endl; + if (_min_values.size()) + { + std::cout << _control_nn->alphaTensor().mean() << std::endl; + std::cout << _control_nn->betaTensor().mean() << std::endl; + } _console << "Best model so far: " << _highest_reward << std::endl; } } From b0a9231cc077cd6c5df2b957c693e2dfb9d15613 Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 21 Apr 2026 11:50:50 -0600 Subject: [PATCH 19/51] Adopt to new changes. --- .../libtorch/utils/LibtorchActorNeuralNet.h | 4 +- .../controls/LibtorchNeuralNetControl.C | 2 +- .../libtorch/utils/LibtorchActorNeuralNet.C | 4 +- .../libtorch/controls/LibtorchDRLControl.h | 10 - .../SamplerNeuralNetControlTransfer.h | 2 +- .../libtorch/controls/LibtorchDRLControl.C | 197 ++++++------------ .../trainers/LibtorchDRLControlTrainer.C | 14 +- .../SamplerNeuralNetControlTransfer.C | 2 +- 8 files changed, 76 insertions(+), 159 deletions(-) diff --git a/framework/include/libtorch/utils/LibtorchActorNeuralNet.h b/framework/include/libtorch/utils/LibtorchActorNeuralNet.h index c349207c94c1..7a6c87e31761 100644 --- a/framework/include/libtorch/utils/LibtorchActorNeuralNet.h +++ b/framework/include/libtorch/utils/LibtorchActorNeuralNet.h @@ -7,7 +7,7 @@ //* Licensed under LGPL 2.1, please see LICENSE for details //* https://www.gnu.org/licenses/lgpl-2.1.html -#ifdef LIBTORCH_ENABLED +#ifdef MOOSE_LIBTORCH_ENABLED #pragma once @@ -51,7 +51,7 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet * this cannot be const since it creates a graph in the background * @param x Input tensor for the evaluation */ - virtual torch::Tensor forward(torch::Tensor & x) override; + virtual torch::Tensor forward(const torch::Tensor & x) override; virtual torch::Tensor evaluate(torch::Tensor & input, bool sampled); diff --git a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C index 090aec0a8218..6476fdd38f59 100644 --- a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C +++ b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C @@ -112,7 +112,7 @@ LibtorchNeuralNetControl::loadControlNeuralNetFromFile(const InputParameters & p { const auto & filename = getParam("filename"); if (getParam("torch_script_format")) - _nn = std::make_shared(filename); + _nn = std::make_shared(filename); else { unsigned int num_inputs = _response_names.size() * _input_timesteps; diff --git a/framework/src/libtorch/utils/LibtorchActorNeuralNet.C b/framework/src/libtorch/utils/LibtorchActorNeuralNet.C index 5a4a6df6a29a..d1bfd39f1e6f 100644 --- a/framework/src/libtorch/utils/LibtorchActorNeuralNet.C +++ b/framework/src/libtorch/utils/LibtorchActorNeuralNet.C @@ -7,7 +7,7 @@ //* Licensed under LGPL 2.1, please see LICENSE for details //* https://www.gnu.org/licenses/lgpl-2.1.html -#ifdef LIBTORCH_ENABLED +#ifdef MOOSE_LIBTORCH_ENABLED #include "LibtorchActorNeuralNet.h" #include "MooseError.h" @@ -179,7 +179,7 @@ LibtorchActorNeuralNet::resetDistributionParams(torch::Tensor input) } torch::Tensor -LibtorchActorNeuralNet::forward(torch::Tensor & x) +LibtorchActorNeuralNet::forward(const torch::Tensor & x) { torch::Tensor output(x); if (_data_type != output.scalar_type()) diff --git a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h index 0b3e4573533d..76350343690f 100644 --- a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h +++ b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h @@ -44,16 +44,6 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl virtual void loadControlNeuralNetFromFile(const InputParameters & parameters) override; protected: - /** - * Function which computes the logarithmic probability of given actions. - * @param action The tensor containing the perturbed control signals (also known as the action of - * the controller) - * @param output_tensor The expected value of the signals predicted by the neural net - * @return The logarithmic probability of the action with respect to the neural net prediction - */ - torch::Tensor computeLogProbability(const torch::Tensor & action, - const torch::Tensor & output_tensor); - /// The log probability of control signals from the last evaluation of the controller std::vector _current_control_signal_log_probabilities; diff --git a/modules/stochastic_tools/include/libtorch/transfers/SamplerNeuralNetControlTransfer.h b/modules/stochastic_tools/include/libtorch/transfers/SamplerNeuralNetControlTransfer.h index 0fe3ab97052b..3e0527028af7 100644 --- a/modules/stochastic_tools/include/libtorch/transfers/SamplerNeuralNetControlTransfer.h +++ b/modules/stochastic_tools/include/libtorch/transfers/SamplerNeuralNetControlTransfer.h @@ -7,7 +7,7 @@ //* Licensed under LGPL 2.1, please see LICENSE for details //* https://www.gnu.org/licenses/lgpl-2.1.html -#ifdef LIBTORCH_ENABLED +#ifdef MOOSE_LIBTORCH_ENABLED #pragma once diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C index 891540c2c9ae..e10d8e1cf90a 100644 --- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C +++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C @@ -10,12 +10,7 @@ #ifdef MOOSE_LIBTORCH_ENABLED #include "LibtorchDRLControl.h" -<<<<<<< HEAD #include "TorchScriptModule.h" -#include "LibtorchArtificialNeuralNet.h" -======= -#include "LibtorchTorchScriptNeuralNet.h" ->>>>>>> a7b46c70e5 (Add actor network) #include "Transient.h" #include "LibtorchUtils.h" @@ -38,6 +33,11 @@ LibtorchDRLControl::validParams() params.addParam>("min_control_value", {}, "The minimum values of the control signal."); params.addParam>("max_control_value", {}, "The maximum calue of the control signal."); + params.addParam>( + "action_standard_deviations", + {}, + "Deprecated compatibility parameter. Actor policies now learn their own action " + "distribution widths."); return params; } @@ -65,7 +65,10 @@ LibtorchDRLControl::loadControlNeuralNetFromFile(const InputParameters & paramet { const auto & filename = getParam("filename"); if (getParam("torch_script_format")) - _nn = std::make_shared(filename); + { + _actor_nn.reset(); + _nn = std::make_shared(filename); + } else { unsigned int num_inputs = _response_names.size() * _input_timesteps; @@ -86,7 +89,8 @@ LibtorchDRLControl::loadControlNeuralNetFromFile(const InputParameters & paramet try { torch::load(nn, filename); - _actor_nn =std::make_shared(*nn); + _actor_nn = std::make_shared(*nn); + _nn = _actor_nn; } catch (const c10::Error & e) { @@ -103,155 +107,72 @@ LibtorchDRLControl::loadControlNeuralNetFromFile(const InputParameters & paramet void LibtorchDRLControl::execute() { - if (_actor_nn) - { - unsigned int n_controls = _control_names.size(); - unsigned int num_old_timesteps = _input_timesteps - 1; - - // Fill a vector with the current values of the responses - updateCurrentResponse(); - -<<<<<<< HEAD - // If this is the first time this control is called and we need to use older values, fill up the - // needed old values using the initial values - if (_old_responses.empty()) - _old_responses.assign(num_old_timesteps, _current_response); - - if (_call_counter % _num_steps_in_period == 0) - { - // Organize the old an current solution into a tensor so we can evaluate the neural net - torch::Tensor input_tensor = prepareInputTensor(); - - // Evaluate the neural network to get the expected control value - torch::Tensor output_tensor = _nn->forward(input_tensor); - - // std::cout << "Input " << input_tensor << std::endl; - // std::cout << "Output " << output_tensor << std::endl; + if (!_actor_nn && !_nn) + return; - // Sample control value (action) from Gaussian distribution - torch::Tensor action = at::normal(output_tensor, _std); + if (_current_execute_flag != EXEC_TIMESTEP_BEGIN) + return; - // Compute log probability - torch::Tensor log_probability = computeLogProbability(action, output_tensor); + const unsigned int n_controls = _control_names.size(); + const unsigned int num_old_timesteps = _input_timesteps - 1; - _current_control_signals = {action.data_ptr(), action.data_ptr() + action.size(1)}; - - for (const auto i : index_range(_current_control_signals)) - _current_control_signals[i] = std::min(std::max(_current_control_signals[i], _minimum_actions[i]), _maximum_actions[i]); - - _current_control_signal_log_probabilities = {log_probability.data_ptr(), - log_probability.data_ptr() + - log_probability.size(1)}; - } + // Fill a vector with the current values of the responses. + updateCurrentResponse(); - // Convert data - _previous_control_signal = _current_smoothed_signal; + // Seed the response history with the initial response when the control first runs. + if (_old_responses.empty()) + _old_responses.assign(num_old_timesteps, _current_response); + if (_call_counter % _num_steps_in_period == 0) + { + torch::Tensor input_tensor = prepareInputTensor(); + torch::Tensor action; - for (const auto i : index_range(_current_smoothed_signal)) - _current_smoothed_signal[i] = _previous_control_signal[i] + _smoother*(_current_control_signals[i] - _previous_control_signal[i]); - - - // std::cout << "Setting control signal to: " << Moose::stringify(_current_control_signals) << std::endl; - // std::cout << "Setting log probability to: " << Moose::stringify(_current_control_signal_log_probabilities) << std::endl; - - for (unsigned int control_i = 0; control_i < n_controls; ++control_i) + if (_actor_nn) { + action = _actor_nn->evaluate(input_tensor, _stochastic); - // We scale the controllable value for physically meaningful control action - setControllableValueByName(_control_names[control_i], - _current_smoothed_signal[control_i] * - _action_scaling_factors[control_i]); + if (_stochastic) + { + torch::Tensor log_probability = _actor_nn->logProbability(action); + _current_control_signal_log_probabilities = {log_probability.data_ptr(), + log_probability.data_ptr() + + log_probability.size(1)}; + } + else + _current_control_signal_log_probabilities.assign(n_controls, 0.0); } - - // We add the curent solution to the old solutions and move everything in there one step - // backward - if (_old_responses.size()) + else { - std::rotate(_old_responses.rbegin(), _old_responses.rbegin() + 1, _old_responses.rend()); - _old_responses[0] = _current_response; + action = _nn->forward(input_tensor); + _current_control_signal_log_probabilities.assign(n_controls, 0.0); } - _call_counter++; -======= - if (_current_execute_flag == EXEC_TIMESTEP_BEGIN) - { - // If this is the first time this control is called and we need to use older values, fill up the - // needed old values using the initial values - if (!_initialized) - { - _old_responses.clear(); - for (unsigned int step_i = 0; step_i < num_old_timesteps; ++step_i) - _old_responses.push_back(_current_response); - _initialized = true; - } - - if (_call_counter % _num_steps_in_period == 0) - { - // Organize the old an current solution into a tensor so we can evaluate the neural net - torch::Tensor input_tensor = prepareInputTensor(); - - // std::cout << "Std" << _actor_nn->stdTensor() << std::endl; - // std::cout << "Input" << input_tensor << std::endl; - // Evaluate the neural network to get the expected control value - torch::Tensor action = _actor_nn->evaluate(input_tensor, _stochastic); - - // std::cout << "in za control " << action << std::endl; - // Compute log probability - - _current_control_signals = {action.data_ptr(), action.data_ptr() + action.size(1)}; - - if (_call_counter == 0) - _current_smoothed_signal = _current_control_signals; - - // std::cout << "Computing control signal to: " << Moose::stringify(_current_control_signals) << std::endl; - - - // for (const auto i : index_range(_current_control_signals)) - // _current_control_signals[i] = std::min(std::max(_current_control_signals[i], _minimum_actions[i]), _maximum_actions[i]); - - if (_stochastic) - { - torch::Tensor log_probability = _actor_nn->logProbability(action); - - _current_control_signal_log_probabilities = {log_probability.data_ptr(), - log_probability.data_ptr() + - log_probability.size(1)}; - // std::cout << "Logprob: " << Moose::stringify(_current_control_signal_log_probabilities) << std::endl; - } - } - - // Convert data - _previous_control_signal = _current_smoothed_signal; + _current_control_signals = {action.data_ptr(), action.data_ptr() + action.size(1)}; + if (_call_counter == 0) + _current_smoothed_signal = _current_control_signals; + } - for (const auto i : index_range(_current_smoothed_signal)) - _current_smoothed_signal[i] = _previous_control_signal[i] + _smoother*(_current_control_signals[i] - _previous_control_signal[i]); - - - // std::cout << "Setting control signal to: " << Moose::stringify(_current_smoothed_signal) << std::endl; - + _previous_control_signal = _current_smoothed_signal; - for (unsigned int control_i = 0; control_i < n_controls; ++control_i) - { + for (const auto i : index_range(_current_smoothed_signal)) + _current_smoothed_signal[i] = + _previous_control_signal[i] + + _smoother * (_current_control_signals[i] - _previous_control_signal[i]); - // We scale the controllable value for physically meaningful control action - setControllableValueByName(_control_names[control_i], - _current_smoothed_signal[control_i] * - _action_scaling_factors[control_i]); - } + for (unsigned int control_i = 0; control_i < n_controls; ++control_i) + setControllableValueByName(_control_names[control_i], + _current_smoothed_signal[control_i] * + _action_scaling_factors[control_i]); - // We add the curent solution to the old solutions and move everything in there one step - // backward - if (_old_responses.size()) - { - std::rotate(_old_responses.rbegin(), _old_responses.rbegin() + 1, _old_responses.rend()); - _old_responses[0] = _current_response; - } - _call_counter++; - } ->>>>>>> a7b46c70e5 (Add actor network) + if (_old_responses.size()) + { + std::rotate(_old_responses.rbegin(), _old_responses.rbegin() + 1, _old_responses.rend()); + _old_responses[0] = _current_response; } + + _call_counter++; } void diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index 1c20e778fd95..6e91f44807f2 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -127,6 +127,11 @@ LibtorchDRLControlTrainer::validParams() params.addParam("batch_size", 100, "Batch size"); params.addParam>("min_control_value", {}, "The minimum values of the control signal."); params.addParam>("max_control_value", {}, "The maximum calue of the control signal."); + params.addParam>( + "action_standard_deviations", + {}, + "Deprecated compatibility parameter. Actor policies now learn their own action " + "distribution widths."); params.addParam("entropy_coeff", 0.01, "ASDASD"); @@ -166,6 +171,7 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _clip_param(getParam("clip_parameter")), _decay_factor(getParam("decay_factor")), _lambda_factor(getParam("lambda_factor")), + _action_std(getParam>("action_standard_deviations")), _filename_base(isParamValid("filename_base") ? getParam("filename_base") : ""), _read_from_file(getParam("read_from_file")), _shift_outputs(getParam("shift_outputs")), @@ -663,16 +669,16 @@ LibtorchDRLControlTrainer::trainController() // critic_params[param_i].value().data_ptr() + critic_params[param_i].value().numel())) << std::endl; // } - // print loss per epoch + // print loss per epoch if (_loss_print_frequency) if (epoch % _loss_print_frequency == 0 && batch_begin == 0) { _console << "Epoch: " << epoch << " | Actor Loss: " << COLOR_GREEN - << actor_loss.item() << COLOR_DEFAULT << " | Critic Loss: " << COLOR_GREEN - << critic_loss.item() << COLOR_DEFAULT << std::endl; + << actor_loss.item() << COLOR_DEFAULT << " | Critic Loss: " + << COLOR_GREEN << critic_loss.item() << COLOR_DEFAULT << std::endl; } - batch_begin = batch_end; + batch_begin = batch_end; } // std::cout << _control_nn->stdTensor() << std::endl; if (_min_values.size()) diff --git a/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C b/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C index 5dc33a99724f..fb11c80479bc 100644 --- a/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C +++ b/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C @@ -7,7 +7,7 @@ //* Licensed under LGPL 2.1, please see LICENSE for details //* https://www.gnu.org/licenses/lgpl-2.1.html -#ifdef LIBTORCH_ENABLED +#ifdef MOOSE_LIBTORCH_ENABLED #include "SamplerNeuralNetControlTransfer.h" #include "LibtorchNeuralNetControl.h" From 77baa1ba21a7ab07830481d9f668f54d55ea9e7e Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 21 Apr 2026 13:15:55 -0600 Subject: [PATCH 20/51] Apply formatting, add beta logprob fix. --- .../libtorch/utils/LibtorchActorNeuralNet.C | 125 ++++++++++-------- .../utils/LibtorchArtificialNeuralNet.C | 34 ++++- unit/src/LibtorchActorNeuralNetTest.C | 74 +++++++++++ 3 files changed, 168 insertions(+), 65 deletions(-) create mode 100644 unit/src/LibtorchActorNeuralNetTest.C diff --git a/framework/src/libtorch/utils/LibtorchActorNeuralNet.C b/framework/src/libtorch/utils/LibtorchActorNeuralNet.C index d1bfd39f1e6f..7d06a30d97c0 100644 --- a/framework/src/libtorch/utils/LibtorchActorNeuralNet.C +++ b/framework/src/libtorch/utils/LibtorchActorNeuralNet.C @@ -13,6 +13,8 @@ #include "MooseError.h" #include "LibtorchUtils.h" +#include + namespace Moose { @@ -27,31 +29,35 @@ LibtorchActorNeuralNet::LibtorchActorNeuralNet( const torch::DeviceType device_type, const torch::ScalarType data_type, const bool build_on_construct) - : - LibtorchArtificialNeuralNet(name, num_inputs, num_outputs, num_neurons_per_layer, - activation_function, minimum_values, maximum_values, - device_type, data_type, - false) + : LibtorchArtificialNeuralNet(name, + num_inputs, + num_outputs, + num_neurons_per_layer, + activation_function, + minimum_values, + maximum_values, + device_type, + data_type, + false) { if (build_on_construct) constructNeuralNetwork(); } -LibtorchActorNeuralNet::LibtorchActorNeuralNet( - const Moose::LibtorchActorNeuralNet & nn, - const bool build_on_construct) - : LibtorchArtificialNeuralNet(dynamic_cast(nn), false) +LibtorchActorNeuralNet::LibtorchActorNeuralNet(const Moose::LibtorchActorNeuralNet & nn, + const bool build_on_construct) + : LibtorchArtificialNeuralNet(dynamic_cast(nn), false) { - // We construct the NN architecture - if (build_on_construct) - { - constructNeuralNetwork(); - // We fill it up with the current parameter values - const auto & from_params = nn.named_parameters(); - auto to_params = this->named_parameters(); - for (unsigned int param_i : make_range(from_params.size())) - to_params[param_i].value().data() = from_params[param_i].value().data().clone(); - } + // We construct the NN architecture + if (build_on_construct) + { + constructNeuralNetwork(); + // We fill it up with the current parameter values + const auto & from_params = nn.named_parameters(); + auto to_params = this->named_parameters(); + for (unsigned int param_i : make_range(from_params.size())) + to_params[param_i].value().data() = from_params[param_i].value().data().clone(); + } } void @@ -59,12 +65,13 @@ LibtorchActorNeuralNet::initializeNeuralNetwork() { for (unsigned int i = 0; i < numHiddenLayers(); ++i) { - const auto & activation = _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0]; + const auto & activation = + _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0]; const Real gain = determineGain(activation); auto sizes = _weights[i]->weight.sizes(); auto max_dim_size = *std::max_element(sizes.begin(), sizes.end()); - torch::nn::init::orthogonal_(_weights[i]->weight, gain/max_dim_size); + torch::nn::init::orthogonal_(_weights[i]->weight, gain / max_dim_size); torch::nn::init::zeros_(_weights[i]->bias); } @@ -72,15 +79,15 @@ LibtorchActorNeuralNet::initializeNeuralNetwork() { auto sizes = _alpha_module[0]->weight.sizes(); auto max_dim_size = *std::max_element(sizes.begin(), sizes.end()); - torch::nn::init::orthogonal_(_alpha_module[0]->weight, 1.0/max_dim_size); - torch::nn::init::orthogonal_(_beta_module[0]->weight, 1.0/max_dim_size); + torch::nn::init::orthogonal_(_alpha_module[0]->weight, 1.0 / max_dim_size); + torch::nn::init::orthogonal_(_beta_module[0]->weight, 1.0 / max_dim_size); } else { auto sizes = _mean_module[0]->weight.sizes(); auto max_dim_size = *std::max_element(sizes.begin(), sizes.end()); - torch::nn::init::orthogonal_(_mean_module[0]->weight, 1.0/max_dim_size); - torch::nn::init::orthogonal_(_log_std_module[0]->weight, 1.0/max_dim_size); + torch::nn::init::orthogonal_(_mean_module[0]->weight, 1.0 / max_dim_size); + torch::nn::init::orthogonal_(_log_std_module[0]->weight, 1.0 / max_dim_size); } } @@ -100,19 +107,23 @@ LibtorchActorNeuralNet::constructNeuralNetwork() inp_neurons = _num_neurons_per_layer[i]; } - auto num_inps = _num_neurons_per_layer[numHiddenLayers()-1]; + auto num_inps = _num_neurons_per_layer[numHiddenLayers() - 1]; if (_minimum_values.size()) { - _alpha_module.push_back(register_module("alpha", torch::nn::Linear(torch::nn::LinearOptions(num_inps, _num_outputs).bias(false)))); - _beta_module.push_back(register_module("beta", torch::nn::Linear(torch::nn::LinearOptions(num_inps, _num_outputs).bias(false)))); + _alpha_module.push_back(register_module( + "alpha", torch::nn::Linear(torch::nn::LinearOptions(num_inps, _num_outputs).bias(false)))); + _beta_module.push_back(register_module( + "beta", torch::nn::Linear(torch::nn::LinearOptions(num_inps, _num_outputs).bias(false)))); _alpha_module[0]->to(_device_type, _data_type); _beta_module[0]->to(_device_type, _data_type); return; } - _mean_module.push_back(register_module("mean", torch::nn::Linear(torch::nn::LinearOptions(num_inps, _num_outputs).bias(false)))); - _log_std_module.push_back(register_module("std", torch::nn::Linear(torch::nn::LinearOptions(num_inps, _num_outputs).bias(false)))); + _mean_module.push_back(register_module( + "mean", torch::nn::Linear(torch::nn::LinearOptions(num_inps, _num_outputs).bias(false)))); + _log_std_module.push_back(register_module( + "std", torch::nn::Linear(torch::nn::LinearOptions(num_inps, _num_outputs).bias(false)))); _mean_module[0]->to(_device_type, _data_type); _log_std_module[0]->to(_device_type, _data_type); } @@ -122,12 +133,13 @@ LibtorchActorNeuralNet::entropy() { if (_minimum_values.size()) { - return _log_norm - (_beta_tensor - 1.0) * torch::digamma(_beta_tensor) - - (_alpha_tensor - 1.0) * torch::digamma(_alpha_tensor) - + (_alpha_beta_tensor - 2.0) * torch::digamma(_alpha_beta_tensor); + const auto scale = torch::clamp_min(_max_tensor - _min_tensor, 1e-8); + return _log_norm - (_beta_tensor - 1.0) * torch::digamma(_beta_tensor) - + (_alpha_tensor - 1.0) * torch::digamma(_alpha_tensor) + + (_alpha_beta_tensor - 2.0) * torch::digamma(_alpha_beta_tensor) + torch::log(scale); } - return 0.5*std::log(2*M_PI)+_log_std_tensor+0.5; + return 0.5 * std::log(2 * M_PI) + _log_std_tensor + 0.5; } void @@ -143,11 +155,12 @@ LibtorchActorNeuralNet::resetDistributionParams(torch::Tensor input) // std::cout << "setting beta tensor to " << _beta_tensor << std::endl; _alpha_beta_tensor = torch::clamp_min(_alpha_tensor + _beta_tensor, 1e-8); - _mean = _alpha_tensor/_alpha_beta_tensor; + _mean = _alpha_tensor / _alpha_beta_tensor; // std::cout << "setting mean to " << _mean << std::endl; - _log_norm = at::lgamma(_alpha_tensor) + at::lgamma(_beta_tensor) - at::lgamma(_alpha_beta_tensor); + _log_norm = + at::lgamma(_alpha_tensor) + at::lgamma(_beta_tensor) - at::lgamma(_alpha_beta_tensor); return; } @@ -163,7 +176,8 @@ LibtorchActorNeuralNet::resetDistributionParams(torch::Tensor input) // # Clip log stddev for numerical stability // log_eps = log(util.epsilon) # epsilon < 1.0, hence negative - // log_stddev = tf.clip_by_value(t=log_stddev, clip_value_min=log_eps, clip_value_max=-log_eps) + // log_stddev = tf.clip_by_value(t=log_stddev, clip_value_min=log_eps, + // clip_value_max=-log_eps) // # Standard deviation // stddev = tf.exp(x=log_stddev) @@ -175,7 +189,6 @@ LibtorchActorNeuralNet::resetDistributionParams(torch::Tensor input) _log_std_tensor = torch::clamp(_log_std_tensor, std::log(1e-12), -std::log(1e-12)); _std_tensor = torch::exp(_log_std_tensor); - } torch::Tensor @@ -230,7 +243,7 @@ LibtorchActorNeuralNet::evaluate(torch::Tensor & x, bool sampled) return sample(); if (_minimum_values.size()) - return _min_tensor + (_max_tensor - _min_tensor)*_mean; + return _min_tensor + (_max_tensor - _min_tensor) * _mean; return _mean; } @@ -247,7 +260,7 @@ LibtorchActorNeuralNet::sample() // std::cout << "sampled " << sampled << std::endl; - return _min_tensor + (_max_tensor - _min_tensor)*sampled; + return _min_tensor + (_max_tensor - _min_tensor) * sampled; } return at::normal(_mean, _std_tensor); @@ -259,30 +272,26 @@ LibtorchActorNeuralNet::logProbability(const torch::Tensor & action) // Logarithmic probability of taken action, given the current distribution. if (_minimum_values.size()) { - // std::cout << "input action " << action << std::endl; - // std::cout << "mintensor " << _min_tensor << std::endl; - // std::cout << "bewfore clamp " << (action - _min_tensor) / (_max_tensor - _min_tensor) < 1.0); + if (out_of_bounds.any().item()) + log_prob = torch::where(out_of_bounds, + torch::full_like(log_prob, -std::numeric_limits::infinity()), + log_prob); + + return log_prob; } torch::Tensor var = _std_tensor * _std_tensor; return -((action - _mean) * (action - _mean)) / (2.0 * var) - _log_std_tensor - - 0.5*std::log(2.0 * M_PI); + 0.5 * std::log(2.0 * M_PI); } } - #endif diff --git a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C index 3692e50dc38f..1618496e7996 100644 --- a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C +++ b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C @@ -45,8 +45,21 @@ LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( mooseError("The number of activation functions should be either one or the same as the number " "of hidden layers"); - if (_minimum_values.size()) + const bool has_minimum_values = !_minimum_values.empty(); + const bool has_maximum_values = !_maximum_values.empty(); + if (has_minimum_values != has_maximum_values) + mooseError("Bounded neural network outputs require both minimum_values and maximum_values."); + + if (has_minimum_values) { + if (_minimum_values.size() != _num_outputs || _maximum_values.size() != _num_outputs) + mooseError("The number of minimum_values and maximum_values entries must match the number " + "of outputs."); + + for (const auto i : make_range(_minimum_values.size())) + if (!(_maximum_values[i] > _minimum_values[i])) + mooseError("maximum_values entries must be strictly greater than minimum_values entries."); + auto min_value = _minimum_values; LibtorchUtils::vectorToTensor(min_value, _min_tensor); _min_tensor.to(_data_type).to(_device_type); @@ -60,8 +73,7 @@ LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( } LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( - const Moose::LibtorchArtificialNeuralNet & nn, - const bool build_on_construct) + const Moose::LibtorchArtificialNeuralNet & nn, const bool build_on_construct) : torch::nn::Module(), _name(nn.name()), _num_inputs(nn.numInputs()), @@ -102,7 +114,7 @@ LibtorchArtificialNeuralNet::determineGain(const std::string & activation) if (activation == "relu") return std::sqrt(2); if (activation == "tanh") - return 5.0/3.0; + return 5.0 / 3.0; return 1.0; } @@ -112,7 +124,8 @@ LibtorchArtificialNeuralNet::initializeNeuralNetwork() { for (unsigned int i = 0; i < numHiddenLayers(); ++i) { - const auto & activation = _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0]; + const auto & activation = + _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0]; const Real gain = determineGain(activation); torch::nn::init::orthogonal_(_weights[i]->weight, gain); torch::nn::init::zeros_(_weights[i]->bias); @@ -320,8 +333,15 @@ dataLoad( dataLoad(stream, data_type, context); const torch::ScalarType datt(static_cast(data_type)); - nn = std::make_shared( - name, num_inputs, num_outputs, num_neurons_per_layer, activation_functions, min_values, max_values, divt, datt); + nn = std::make_shared(name, + num_inputs, + num_outputs, + num_neurons_per_layer, + activation_functions, + min_values, + max_values, + divt, + datt); torch::load(nn, name); } diff --git a/unit/src/LibtorchActorNeuralNetTest.C b/unit/src/LibtorchActorNeuralNetTest.C new file mode 100644 index 000000000000..07f7bf12f05c --- /dev/null +++ b/unit/src/LibtorchActorNeuralNetTest.C @@ -0,0 +1,74 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "gtest/gtest.h" +#include "LibtorchActorNeuralNet.h" + +#include + +namespace +{ + +class TestableLibtorchActorNeuralNet : public Moose::LibtorchActorNeuralNet +{ +public: + using Moose::LibtorchActorNeuralNet::_alpha_module; + using Moose::LibtorchActorNeuralNet::_beta_module; + using Moose::LibtorchActorNeuralNet::_weights; + using Moose::LibtorchActorNeuralNet::LibtorchActorNeuralNet; +}; + +Real +inverseSoftplusPlusOne(const Real target) +{ + return std::log(std::exp(target - 1.0) - 1.0); +} + +} // namespace + +TEST(LibtorchActorNeuralNetTest, boundedBetaLogProbability) +{ + constexpr Real min_value = -2.0; + constexpr Real max_value = 4.0; + constexpr Real alpha_target = 2.3; + constexpr Real beta_target = 3.7; + constexpr Real action_value = 1.2; + + TestableLibtorchActorNeuralNet network( + "test_beta", 1, 1, {1}, {"linear"}, {min_value}, {max_value}); + + ASSERT_EQ(network._weights.size(), 1); + ASSERT_EQ(network._alpha_module.size(), 1); + ASSERT_EQ(network._beta_module.size(), 1); + + network._weights[0]->weight.data().fill_(0.0); + network._weights[0]->bias.data().fill_(1.0); + network._alpha_module[0]->weight.data().fill_(inverseSoftplusPlusOne(alpha_target)); + network._beta_module[0]->weight.data().fill_(inverseSoftplusPlusOne(beta_target)); + + auto input = torch::zeros({1, 1}, at::kDouble); + network.evaluate(input, false); + + const Real alpha = network.alphaTensor().item(); + const Real beta = network.betaTensor().item(); + const Real normalized = (action_value - min_value) / (max_value - min_value); + const Real log_norm = std::lgamma(alpha) + std::lgamma(beta) - std::lgamma(alpha + beta); + const Real expected = (alpha - 1.0) * std::log(normalized) + + (beta - 1.0) * std::log1p(-normalized) - log_norm - + std::log(max_value - min_value); + + auto action = torch::tensor({{action_value}}, at::kDouble); + const Real actual = network.logProbability(action).item(); + + EXPECT_NEAR(actual, expected, 1e-12); +} + +#endif From 5226140a4a4458e8f4d38a2a9afcadc1f02d604a Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 21 Apr 2026 14:35:41 -0600 Subject: [PATCH 21/51] Split out action distribution (close to policy) from the drltrainer. --- .../utils/LibtorchActionDistributionHead.h | 97 ++++++++ .../utils/LibtorchActionDistributionHead.C | 227 ++++++++++++++++++ 2 files changed, 324 insertions(+) create mode 100644 framework/include/libtorch/utils/LibtorchActionDistributionHead.h create mode 100644 framework/src/libtorch/utils/LibtorchActionDistributionHead.C diff --git a/framework/include/libtorch/utils/LibtorchActionDistributionHead.h b/framework/include/libtorch/utils/LibtorchActionDistributionHead.h new file mode 100644 index 000000000000..254d9fbcf3c7 --- /dev/null +++ b/framework/include/libtorch/utils/LibtorchActionDistributionHead.h @@ -0,0 +1,97 @@ +//* This file is part of the MOOSE framework +//* https://www.mooseframework.org +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#pragma once + +#include + +#include "MooseTypes.h" + +#include +#include + +namespace Moose +{ + +/** + * Reusable continuous-action distribution head for actor policies. + * + * Unbounded actions use a Gaussian parameterization. If both minimum and maximum values are + * provided, the head switches to a bounded Beta parameterization. + */ +class LibtorchActionDistributionHead : public torch::nn::Module +{ +public: + LibtorchActionDistributionHead(const std::string & name, + unsigned int num_inputs, + unsigned int num_outputs, + const std::vector & minimum_values = {}, + const std::vector & maximum_values = {}, + torch::DeviceType device_type = torch::kCPU, + torch::ScalarType scalar_type = torch::kDouble, + bool build_on_construct = true); + + LibtorchActionDistributionHead(const LibtorchActionDistributionHead & head, + bool build_on_construct = true); + + void constructHead(); + + void initialize(); + + void reset(const torch::Tensor & input); + + torch::Tensor sample() const; + + torch::Tensor deterministicAction() const; + + torch::Tensor logProbability(const torch::Tensor & action) const; + + torch::Tensor entropy() const; + + bool isBounded() const { return !_minimum_values.empty(); } + + torch::nn::Linear & primaryModule() { return _primary_parameter_module; } + const torch::nn::Linear & primaryModule() const { return _primary_parameter_module; } + + torch::nn::Linear & secondaryModule() { return _secondary_parameter_module; } + const torch::nn::Linear & secondaryModule() const { return _secondary_parameter_module; } + + const torch::Tensor & stdTensor() const { return _std_tensor; } + const torch::Tensor & alphaTensor() const { return _alpha_tensor; } + const torch::Tensor & betaTensor() const { return _beta_tensor; } + +private: + const std::string _name; + const unsigned int _num_inputs; + const unsigned int _num_outputs; + const std::vector _minimum_values; + const std::vector _maximum_values; + const torch::DeviceType _device_type; + const torch::ScalarType _data_type; + + torch::nn::Linear _primary_parameter_module{nullptr}; + torch::nn::Linear _secondary_parameter_module{nullptr}; + + torch::Tensor _min_tensor; + torch::Tensor _max_tensor; + torch::Tensor _alpha_tensor; + torch::Tensor _beta_tensor; + torch::Tensor _alpha_beta_tensor; + torch::Tensor _log_norm; + torch::Tensor _mean_tensor; + torch::Tensor _std_tensor; + torch::Tensor _log_std_tensor; + torch::Tensor _mean; +}; + +} // namespace Moose + +#endif diff --git a/framework/src/libtorch/utils/LibtorchActionDistributionHead.C b/framework/src/libtorch/utils/LibtorchActionDistributionHead.C new file mode 100644 index 000000000000..4037473f5ba0 --- /dev/null +++ b/framework/src/libtorch/utils/LibtorchActionDistributionHead.C @@ -0,0 +1,227 @@ +//* This file is part of the MOOSE framework +//* https://www.mooseframework.org +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "LibtorchActionDistributionHead.h" + +#include "LibtorchUtils.h" +#include "MooseError.h" + +#include +#include +#include +#include "libmesh/utility.h" + +namespace Moose +{ + +LibtorchActionDistributionHead::LibtorchActionDistributionHead( + const std::string & name, + const unsigned int num_inputs, + const unsigned int num_outputs, + const std::vector & minimum_values, + const std::vector & maximum_values, + const torch::DeviceType device_type, + const torch::ScalarType data_type, + const bool build_on_construct) + : _name(name), + _num_inputs(num_inputs), + _num_outputs(num_outputs), + _minimum_values(minimum_values), + _maximum_values(maximum_values), + _device_type(device_type), + _data_type(data_type) +{ + const bool has_minimum_values = !_minimum_values.empty(); + const bool has_maximum_values = !_maximum_values.empty(); + if (has_minimum_values != has_maximum_values) + mooseError("Bounded action heads require both minimum_values and maximum_values."); + + if (has_minimum_values) + { + if (_minimum_values.size() != _num_outputs || _maximum_values.size() != _num_outputs) + mooseError("The number of minimum_values and maximum_values entries must match the number " + "of action outputs."); + + for (const auto i : make_range(_minimum_values.size())) + if (!(_maximum_values[i] > _minimum_values[i])) + mooseError("maximum_values entries must be strictly greater than minimum_values entries."); + + auto min_value = _minimum_values; + LibtorchUtils::vectorToTensor(min_value, _min_tensor); + _min_tensor = _min_tensor.to(_data_type).to(_device_type); + auto max_value = _maximum_values; + LibtorchUtils::vectorToTensor(max_value, _max_tensor); + _max_tensor = _max_tensor.to(_data_type).to(_device_type); + } + + if (build_on_construct) + constructHead(); +} + +LibtorchActionDistributionHead::LibtorchActionDistributionHead( + const LibtorchActionDistributionHead & head, + const bool build_on_construct) + : torch::nn::Module(), + _name(head._name), + _num_inputs(head._num_inputs), + _num_outputs(head._num_outputs), + _minimum_values(head._minimum_values), + _maximum_values(head._maximum_values), + _device_type(head._device_type), + _data_type(head._data_type) +{ + if (_minimum_values.size()) + { + auto min_value = _minimum_values; + LibtorchUtils::vectorToTensor(min_value, _min_tensor); + _min_tensor = _min_tensor.to(_data_type).to(_device_type); + auto max_value = _maximum_values; + LibtorchUtils::vectorToTensor(max_value, _max_tensor); + _max_tensor = _max_tensor.to(_data_type).to(_device_type); + } + + if (build_on_construct) + { + constructHead(); + const auto & from_params = head.named_parameters(); + auto to_params = this->named_parameters(); + for (const auto param_i : make_range(from_params.size())) + to_params[param_i].value().data() = from_params[param_i].value().data().clone(); + } +} + +void +LibtorchActionDistributionHead::constructHead() +{ + const auto primary_name = isBounded() ? "alpha" : "mean"; + const auto secondary_name = isBounded() ? "beta" : "std"; + + _primary_parameter_module = register_module( + primary_name, + torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(false))); + _secondary_parameter_module = register_module( + secondary_name, + torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(false))); + + _primary_parameter_module->to(_device_type, _data_type); + _secondary_parameter_module->to(_device_type, _data_type); +} + +void +LibtorchActionDistributionHead::initialize() +{ + const auto primary_sizes = _primary_parameter_module->weight.sizes(); + const auto primary_max_dim_size = *std::max_element(primary_sizes.begin(), primary_sizes.end()); + torch::nn::init::orthogonal_(_primary_parameter_module->weight, 1.0 / primary_max_dim_size); + + const auto secondary_sizes = _secondary_parameter_module->weight.sizes(); + const auto secondary_max_dim_size = + *std::max_element(secondary_sizes.begin(), secondary_sizes.end()); + torch::nn::init::orthogonal_( + _secondary_parameter_module->weight, 1.0 / secondary_max_dim_size); +} + +void +LibtorchActionDistributionHead::reset(const torch::Tensor & input) +{ + auto features = input; + if (_data_type != features.scalar_type()) + features = features.to(_data_type); + if (_device_type != features.device().type()) + features = features.to(_device_type); + + if (isBounded()) + { + const auto alpha = _primary_parameter_module->forward(features); + _alpha_tensor = torch::log(torch::exp(alpha) + 1.0) + 1.0; + const auto beta = _secondary_parameter_module->forward(features); + _beta_tensor = torch::log(torch::exp(beta) + 1.0) + 1.0; + + _alpha_beta_tensor = torch::clamp_min(_alpha_tensor + _beta_tensor, 1e-8); + _mean = _alpha_tensor / _alpha_beta_tensor; + _log_norm = + at::lgamma(_alpha_tensor) + at::lgamma(_beta_tensor) - at::lgamma(_alpha_beta_tensor); + return; + } + + _mean = _primary_parameter_module->forward(features); + _log_std_tensor = _secondary_parameter_module->forward(features); + _log_std_tensor = torch::clamp(_log_std_tensor, std::log(1e-12), -std::log(1e-12)); + _std_tensor = torch::exp(_log_std_tensor); +} + +torch::Tensor +LibtorchActionDistributionHead::sample() const +{ + if (isBounded()) + { + const auto alpha_sample = at::_standard_gamma(_alpha_tensor); + const auto beta_sample = at::_standard_gamma(_beta_tensor); + const auto sampled = alpha_sample / (alpha_sample + beta_sample); + return _min_tensor + (_max_tensor - _min_tensor) * sampled; + } + + return at::normal(_mean, _std_tensor); +} + +torch::Tensor +LibtorchActionDistributionHead::deterministicAction() const +{ + if (isBounded()) + return _min_tensor + (_max_tensor - _min_tensor) * _mean; + + return _mean; +} + +torch::Tensor +LibtorchActionDistributionHead::logProbability(const torch::Tensor & action) const +{ + if (isBounded()) + { + const auto scale = torch::clamp_min(_max_tensor - _min_tensor, 1e-8); + const auto normalized = (action - _min_tensor) / scale; + const auto clipped = torch::clamp(normalized, 1e-8, 1.0 - 1e-8); + auto log_prob = (_alpha_tensor - 1.0) * torch::log(clipped) + + (_beta_tensor - 1.0) * torch::log1p(-clipped) - _log_norm - torch::log(scale); + + const auto out_of_bounds = (normalized < 0.0) | (normalized > 1.0); + if (out_of_bounds.any().item()) + log_prob = torch::where(out_of_bounds, + torch::full_like(log_prob, -std::numeric_limits::infinity()), + log_prob); + + return log_prob; + } + + constexpr Real pi = 3.14159265358979323846; + const torch::Tensor var = _std_tensor * _std_tensor; + return -((action - _mean) * (action - _mean)) / (2.0 * var) - _log_std_tensor - + 0.5 * std::log(2.0 * pi); +} + +torch::Tensor +LibtorchActionDistributionHead::entropy() const +{ + if (isBounded()) + { + const auto scale = torch::clamp_min(_max_tensor - _min_tensor, 1e-8); + return _log_norm - (_beta_tensor - 1.0) * torch::digamma(_beta_tensor) - + (_alpha_tensor - 1.0) * torch::digamma(_alpha_tensor) + + (_alpha_beta_tensor - 2.0) * torch::digamma(_alpha_beta_tensor) + torch::log(scale); + } + + constexpr Real pi = 3.14159265358979323846; + return 0.5 * std::log(2.0 * pi) + _log_std_tensor + 0.5; +} + +} // namespace Moose + +#endif From 4f75b66aed2fc86df3d896ec2b1dc05dca8add37 Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 21 Apr 2026 14:36:20 -0600 Subject: [PATCH 22/51] Split out observation history from the drl trainer. --- .../utils/LibtorchObservationHistory.h | 61 +++++++ .../utils/LibtorchObservationHistory.C | 158 ++++++++++++++++++ 2 files changed, 219 insertions(+) create mode 100644 framework/include/libtorch/utils/LibtorchObservationHistory.h create mode 100644 framework/src/libtorch/utils/LibtorchObservationHistory.C diff --git a/framework/include/libtorch/utils/LibtorchObservationHistory.h b/framework/include/libtorch/utils/LibtorchObservationHistory.h new file mode 100644 index 000000000000..b8c6c7c20a3c --- /dev/null +++ b/framework/include/libtorch/utils/LibtorchObservationHistory.h @@ -0,0 +1,61 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#pragma once + +#include "MooseTypes.h" + +#include + +/** + * Shared observation normalization and history stacking logic for libtorch-based controls and + * trainers. + */ +class LibtorchObservationHistory +{ +public: + LibtorchObservationHistory(unsigned int input_timesteps, + const std::vector & shift_factors = {}, + const std::vector & scaling_factors = {}); + + unsigned int inputTimesteps() const { return _input_timesteps; } + + std::vector normalize(const std::vector & response) const; + + void normalizeInPlace(std::vector & response) const; + + void normalizeTrajectoryInPlace(std::vector> & response_trajectories) const; + + void initializeHistory(const std::vector & normalized_response, + std::vector> & old_responses) const; + + void advanceHistory(const std::vector & normalized_response, + std::vector> & old_responses) const; + + std::vector stackCurrentObservation( + const std::vector & normalized_response, + const std::vector> & old_responses) const; + + std::vector stackTrajectoryObservation( + const std::vector> & normalized_response_trajectories, + unsigned int time_index) const; + +private: + void validateFeatureCount(std::size_t feature_count) const; + void validateTrajectoryShape( + const std::vector> & normalized_response_trajectories) const; + + const unsigned int _input_timesteps; + const std::vector _shift_factors; + const std::vector _scaling_factors; +}; + +#endif diff --git a/framework/src/libtorch/utils/LibtorchObservationHistory.C b/framework/src/libtorch/utils/LibtorchObservationHistory.C new file mode 100644 index 000000000000..352513af2f25 --- /dev/null +++ b/framework/src/libtorch/utils/LibtorchObservationHistory.C @@ -0,0 +1,158 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "LibtorchObservationHistory.h" + +#include "MooseError.h" + +#include +#include "libmesh/utility.h" + +LibtorchObservationHistory::LibtorchObservationHistory( + const unsigned int input_timesteps, + const std::vector & shift_factors, + const std::vector & scaling_factors) + : _input_timesteps(input_timesteps), + _shift_factors(shift_factors), + _scaling_factors(scaling_factors.empty() ? std::vector(shift_factors.size(), 1.0) + : scaling_factors) +{ + if (!_shift_factors.empty() && _shift_factors.size() != _scaling_factors.size()) + mooseError("Observation shift and scaling factors must have the same size."); +} + +void +LibtorchObservationHistory::validateFeatureCount(const std::size_t feature_count) const +{ + if (!_shift_factors.empty() && feature_count != _shift_factors.size()) + mooseError("Observation feature count does not match the configured normalization factors."); +} + +void +LibtorchObservationHistory::validateTrajectoryShape( + const std::vector> & normalized_response_trajectories) const +{ + if (normalized_response_trajectories.empty()) + return; + + validateFeatureCount(normalized_response_trajectories.size()); + + const auto trajectory_size = normalized_response_trajectories.front().size(); + for (const auto & trajectory : normalized_response_trajectories) + if (trajectory.size() != trajectory_size) + mooseError("Observation trajectories must all have the same number of timesteps."); +} + +std::vector +LibtorchObservationHistory::normalize(const std::vector & response) const +{ + auto normalized = response; + normalizeInPlace(normalized); + return normalized; +} + +void +LibtorchObservationHistory::normalizeInPlace(std::vector & response) const +{ + validateFeatureCount(response.size()); + + if (_shift_factors.empty()) + return; + + for (const auto i : make_range(response.size())) + response[i] = (response[i] - _shift_factors[i]) * _scaling_factors[i]; +} + +void +LibtorchObservationHistory::normalizeTrajectoryInPlace( + std::vector> & response_trajectories) const +{ + validateTrajectoryShape(response_trajectories); + + if (_shift_factors.empty()) + return; + + for (const auto feature_i : make_range(response_trajectories.size())) + for (auto & value : response_trajectories[feature_i]) + value = (value - _shift_factors[feature_i]) * _scaling_factors[feature_i]; +} + +void +LibtorchObservationHistory::initializeHistory(const std::vector & normalized_response, + std::vector> & old_responses) const +{ + old_responses.assign(_input_timesteps > 0 ? _input_timesteps - 1 : 0, normalized_response); +} + +void +LibtorchObservationHistory::advanceHistory(const std::vector & normalized_response, + std::vector> & old_responses) const +{ + if (old_responses.empty()) + return; + + std::rotate(old_responses.rbegin(), old_responses.rbegin() + 1, old_responses.rend()); + old_responses[0] = normalized_response; +} + +std::vector +LibtorchObservationHistory::stackCurrentObservation( + const std::vector & normalized_response, + const std::vector> & old_responses) const +{ + validateFeatureCount(normalized_response.size()); + + std::vector stacked; + stacked.reserve(normalized_response.size() * _input_timesteps); + + stacked.insert(stacked.end(), normalized_response.begin(), normalized_response.end()); + + for (const auto history_i : make_range(_input_timesteps > 0 ? _input_timesteps - 1 : 0)) + { + const auto & history_entry = + history_i < old_responses.size() ? old_responses[history_i] : normalized_response; + if (history_entry.size() != normalized_response.size()) + mooseError("Observation history entries must have the same feature size as the current " + "observation."); + stacked.insert(stacked.end(), history_entry.begin(), history_entry.end()); + } + + return stacked; +} + +std::vector +LibtorchObservationHistory::stackTrajectoryObservation( + const std::vector> & normalized_response_trajectories, + const unsigned int time_index) const +{ + validateTrajectoryShape(normalized_response_trajectories); + + if (normalized_response_trajectories.empty()) + return {}; + + const auto trajectory_size = normalized_response_trajectories.front().size(); + if (time_index >= trajectory_size) + mooseError("Requested observation time index is out of range."); + + std::vector stacked; + stacked.reserve(normalized_response_trajectories.size() * _input_timesteps); + + for (const auto lag : make_range(_input_timesteps)) + { + const auto source_index = time_index > lag ? time_index - lag : 0; + for (const auto feature_i : make_range(normalized_response_trajectories.size())) + stacked.push_back(normalized_response_trajectories[feature_i][source_index]); + } + + return stacked; +} + +#endif From 6ac36631a188c7c03c8c52ca1d24147a215a9142 Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 21 Apr 2026 14:36:59 -0600 Subject: [PATCH 23/51] Simplify the actor neural net. --- .../libtorch/utils/LibtorchActorNeuralNet.h | 18 +-- .../libtorch/utils/LibtorchActorNeuralNet.C | 145 +++--------------- 2 files changed, 23 insertions(+), 140 deletions(-) diff --git a/framework/include/libtorch/utils/LibtorchActorNeuralNet.h b/framework/include/libtorch/utils/LibtorchActorNeuralNet.h index 7a6c87e31761..149550181ae4 100644 --- a/framework/include/libtorch/utils/LibtorchActorNeuralNet.h +++ b/framework/include/libtorch/utils/LibtorchActorNeuralNet.h @@ -13,6 +13,7 @@ #include #include +#include "LibtorchActionDistributionHead.h" #include "LibtorchArtificialNeuralNet.h" namespace Moose @@ -66,6 +67,9 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet const torch::Tensor & betaTensor() const {return _beta_tensor;} + const LibtorchActionDistributionHead & actionDistributionHead() const { return *_action_head; } + LibtorchActionDistributionHead & actionDistributionHead() { return *_action_head; } + void resetDistributionParams(torch::Tensor input); torch::Tensor logProbability(const torch::Tensor & other); @@ -75,22 +79,10 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet virtual void initializeNeuralNetwork() override; protected: - std::vector _log_std_module; - std::vector _mean_module; - - std::vector _alpha_module; - std::vector _beta_module; - torch::Tensor _alpha_tensor; torch::Tensor _beta_tensor; - torch::Tensor _alpha_beta_tensor; - torch::Tensor _log_norm; - - torch::Tensor _mean_tensor; torch::Tensor _std_tensor; - torch::Tensor _log_std_tensor; - - torch::Tensor _mean; + std::shared_ptr _action_head; }; void to_json(nlohmann::json & json, const Moose::LibtorchActorNeuralNet * const & network); diff --git a/framework/src/libtorch/utils/LibtorchActorNeuralNet.C b/framework/src/libtorch/utils/LibtorchActorNeuralNet.C index 7d06a30d97c0..7a34bbc93581 100644 --- a/framework/src/libtorch/utils/LibtorchActorNeuralNet.C +++ b/framework/src/libtorch/utils/LibtorchActorNeuralNet.C @@ -11,9 +11,6 @@ #include "LibtorchActorNeuralNet.h" #include "MooseError.h" -#include "LibtorchUtils.h" - -#include namespace Moose { @@ -75,20 +72,7 @@ LibtorchActorNeuralNet::initializeNeuralNetwork() torch::nn::init::zeros_(_weights[i]->bias); } - if (_minimum_values.size()) - { - auto sizes = _alpha_module[0]->weight.sizes(); - auto max_dim_size = *std::max_element(sizes.begin(), sizes.end()); - torch::nn::init::orthogonal_(_alpha_module[0]->weight, 1.0 / max_dim_size); - torch::nn::init::orthogonal_(_beta_module[0]->weight, 1.0 / max_dim_size); - } - else - { - auto sizes = _mean_module[0]->weight.sizes(); - auto max_dim_size = *std::max_element(sizes.begin(), sizes.end()); - torch::nn::init::orthogonal_(_mean_module[0]->weight, 1.0 / max_dim_size); - torch::nn::init::orthogonal_(_log_std_module[0]->weight, 1.0 / max_dim_size); - } + _action_head->initialize(); } void @@ -107,88 +91,30 @@ LibtorchActorNeuralNet::constructNeuralNetwork() inp_neurons = _num_neurons_per_layer[i]; } - auto num_inps = _num_neurons_per_layer[numHiddenLayers() - 1]; - if (_minimum_values.size()) - { - _alpha_module.push_back(register_module( - "alpha", torch::nn::Linear(torch::nn::LinearOptions(num_inps, _num_outputs).bias(false)))); - _beta_module.push_back(register_module( - "beta", torch::nn::Linear(torch::nn::LinearOptions(num_inps, _num_outputs).bias(false)))); - _alpha_module[0]->to(_device_type, _data_type); - _beta_module[0]->to(_device_type, _data_type); - - return; - } - - _mean_module.push_back(register_module( - "mean", torch::nn::Linear(torch::nn::LinearOptions(num_inps, _num_outputs).bias(false)))); - _log_std_module.push_back(register_module( - "std", torch::nn::Linear(torch::nn::LinearOptions(num_inps, _num_outputs).bias(false)))); - _mean_module[0]->to(_device_type, _data_type); - _log_std_module[0]->to(_device_type, _data_type); + _action_head = std::make_shared( + "action_head", + inp_neurons, + _num_outputs, + _minimum_values, + _maximum_values, + _device_type, + _data_type); + register_module("action_head", _action_head); } torch::Tensor LibtorchActorNeuralNet::entropy() { - if (_minimum_values.size()) - { - const auto scale = torch::clamp_min(_max_tensor - _min_tensor, 1e-8); - return _log_norm - (_beta_tensor - 1.0) * torch::digamma(_beta_tensor) - - (_alpha_tensor - 1.0) * torch::digamma(_alpha_tensor) + - (_alpha_beta_tensor - 2.0) * torch::digamma(_alpha_beta_tensor) + torch::log(scale); - } - - return 0.5 * std::log(2 * M_PI) + _log_std_tensor + 0.5; + return _action_head->entropy(); } void LibtorchActorNeuralNet::resetDistributionParams(torch::Tensor input) { - if (_minimum_values.size()) - { - auto alpha = _alpha_module[0]->forward(input); - _alpha_tensor = torch::log(torch::exp(alpha) + 1.0) + 1.0; - // std::cout << "setting alpha tensor to " << _alpha_tensor << std::endl; - auto beta = _beta_module[0]->forward(input); - _beta_tensor = torch::log(torch::exp(beta) + 1.0) + 1.0; - // std::cout << "setting beta tensor to " << _beta_tensor << std::endl; - - _alpha_beta_tensor = torch::clamp_min(_alpha_tensor + _beta_tensor, 1e-8); - _mean = _alpha_tensor / _alpha_beta_tensor; - - // std::cout << "setting mean to " << _mean << std::endl; - - _log_norm = - at::lgamma(_alpha_tensor) + at::lgamma(_beta_tensor) - at::lgamma(_alpha_beta_tensor); - - return; - } - - // # Flat mean and log standard deviation - // mean = self.mean.apply(x=x) - // log_stddev = self.log_stddev.apply(x=x) - - // # Reshape mean and log stddev to action shape - // shape = (-1,) + self.shape - // mean = tf.reshape(tensor=mean, shape=shape) - // log_stddev = tf.reshape(tensor=log_stddev, shape=shape) - - // # Clip log stddev for numerical stability - // log_eps = log(util.epsilon) # epsilon < 1.0, hence negative - // log_stddev = tf.clip_by_value(t=log_stddev, clip_value_min=log_eps, - // clip_value_max=-log_eps) - - // # Standard deviation - // stddev = tf.exp(x=log_stddev) - - // return mean, stddev, log_stddev - - _mean = _mean_module[0]->forward(input); - _log_std_tensor = _log_std_module[0]->forward(input); - - _log_std_tensor = torch::clamp(_log_std_tensor, std::log(1e-12), -std::log(1e-12)); - _std_tensor = torch::exp(_log_std_tensor); + _action_head->reset(input); + _alpha_tensor = _action_head->alphaTensor(); + _beta_tensor = _action_head->betaTensor(); + _std_tensor = _action_head->stdTensor(); } torch::Tensor @@ -242,54 +168,19 @@ LibtorchActorNeuralNet::evaluate(torch::Tensor & x, bool sampled) if (sampled) return sample(); - if (_minimum_values.size()) - return _min_tensor + (_max_tensor - _min_tensor) * _mean; - - return _mean; + return _action_head->deterministicAction(); } torch::Tensor LibtorchActorNeuralNet::sample() { - if (_minimum_values.size()) - { - auto alpha_sample = at::_standard_gamma(_alpha_tensor); - auto beta_sample = at::_standard_gamma(_beta_tensor); - - auto sampled = alpha_sample / (alpha_sample + beta_sample); - - // std::cout << "sampled " << sampled << std::endl; - - return _min_tensor + (_max_tensor - _min_tensor) * sampled; - } - - return at::normal(_mean, _std_tensor); + return _action_head->sample(); } torch::Tensor LibtorchActorNeuralNet::logProbability(const torch::Tensor & action) { - // Logarithmic probability of taken action, given the current distribution. - if (_minimum_values.size()) - { - const auto scale = torch::clamp_min(_max_tensor - _min_tensor, 1e-8); - const auto normalized = (action - _min_tensor) / scale; - const auto clipped = torch::clamp(normalized, 1e-8, 1.0 - 1e-8); - auto log_prob = (_alpha_tensor - 1.0) * torch::log(clipped) + - (_beta_tensor - 1.0) * torch::log1p(-clipped) - _log_norm - torch::log(scale); - - const auto out_of_bounds = (normalized < 0.0) | (normalized > 1.0); - if (out_of_bounds.any().item()) - log_prob = torch::where(out_of_bounds, - torch::full_like(log_prob, -std::numeric_limits::infinity()), - log_prob); - - return log_prob; - } - - torch::Tensor var = _std_tensor * _std_tensor; - return -((action - _mean) * (action - _mean)) / (2.0 * var) - _log_std_tensor - - 0.5 * std::log(2.0 * M_PI); + return _action_head->logProbability(action); } } From baf2a565f530a6f80e32172b637d6d4fd4c033d0 Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 21 Apr 2026 14:37:44 -0600 Subject: [PATCH 24/51] Simplify the control neural net in the framework. --- .../controls/LibtorchNeuralNetControl.h | 4 +++ .../controls/LibtorchNeuralNetControl.C | 35 ++++++++----------- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h index f1226e6615a7..af4b20dae469 100644 --- a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h +++ b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h @@ -12,6 +12,7 @@ #pragma once #include "LibtorchArtificialNeuralNet.h" +#include "LibtorchObservationHistory.h" #include "Control.h" /** @@ -105,6 +106,9 @@ class LibtorchNeuralNetControl : public Control /// Multipliers for the actions const std::vector _action_scaling_factors; + /// Shared observation normalization and history stacking helper + const LibtorchObservationHistory _observation_history; + /// Pointer to the neural net object which is supposed to be used to control /// the parameter values. The controller owns this object, but it can be read /// from file or copied by a transfer. diff --git a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C index 6476fdd38f59..2f4bd5ac3c25 100644 --- a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C +++ b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C @@ -75,7 +75,9 @@ LibtorchNeuralNetControl::LibtorchNeuralNetControl(const InputParameters & param : std::vector(_response_names.size(), 1.0)), _action_scaling_factors(isParamValid("action_scaling_factors") ? getParam>("action_scaling_factors") - : std::vector(_control_names.size(), 1.0)) + : std::vector(_control_names.size(), 1.0)), + _observation_history( + _input_timesteps, _response_shift_factors, _response_scaling_factors) { // We first check if the input parameters make sense and throw errors if different parameter // combinations are not allowed @@ -103,8 +105,9 @@ LibtorchNeuralNetControl::LibtorchNeuralNetControl(const InputParameters & param _response_values.push_back(&getPostprocessorValueByName(_response_names[resp_i])); // If the user wants to read the neural net from file, we do it. We can read it from a - // torchscript file, or we can create a shell and read back the parameters - this->loadControlNeuralNetFromFile(parameters); + // torchscript file, or we can create a shell and read back the parameters. + if (parameters.isParamSetByUser("filename")) + this->loadControlNeuralNetFromFile(parameters); } void @@ -151,14 +154,13 @@ LibtorchNeuralNetControl::execute() if (_nn) { const unsigned int n_controls = _control_names.size(); - const unsigned int num_old_timesteps = _input_timesteps - 1; // Fetch current reporter values and populate _current_response updateCurrentResponse(); // If this is the first timestep, we fill up the old values with the initial value if (_old_responses.empty()) - _old_responses.assign(num_old_timesteps, _current_response); + _observation_history.initializeHistory(_current_response, _old_responses); // Organize the old an current solution into a tensor so we can evaluate the neural net torch::Tensor input_tensor = prepareInputTensor(); @@ -178,10 +180,7 @@ LibtorchNeuralNetControl::execute() // We add the curent solution to the old solutions and move everything in there one step // backward if (_old_responses.size()) - { - std::rotate(_old_responses.rbegin(), _old_responses.rbegin() + 1, _old_responses.rend()); - _old_responses[0] = _current_response; - } + _observation_history.advanceHistory(_current_response, _old_responses); } } @@ -215,11 +214,12 @@ void LibtorchNeuralNetControl::updateCurrentResponse() { // Gather the current response values from the reporters - _current_response.clear(); - + std::vector raw_response; + raw_response.reserve(_response_names.size()); for (const auto & resp_i : index_range(_response_names)) - _current_response.push_back((*_response_values[resp_i] - _response_shift_factors[resp_i]) * - _response_scaling_factors[resp_i]); + raw_response.push_back(*_response_values[resp_i]); + + _current_response = _observation_history.normalize(raw_response); } void @@ -231,14 +231,7 @@ LibtorchNeuralNetControl::loadControlNeuralNet(const Moose::LibtorchArtificialNe torch::Tensor LibtorchNeuralNetControl::prepareInputTensor() { - const unsigned int num_old_timesteps = _input_timesteps - 1; - - // We convert the standard vectors to libtorch tensors - std::vector raw_input(_current_response); - - for (const auto & step_i : make_range(num_old_timesteps)) - raw_input.insert(raw_input.end(), _old_responses[step_i].begin(), _old_responses[step_i].end()); - + auto raw_input = _observation_history.stackCurrentObservation(_current_response, _old_responses); torch::Tensor input_tensor; LibtorchUtils::vectorToTensor(raw_input, input_tensor); From 68da0549314248d7b05fd95b957039274e620533 Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 21 Apr 2026 14:44:26 -0600 Subject: [PATCH 25/51] Add loss object, minibatch selector, and buffer for organized data collection. --- .../utils/LibtorchRLMiniBatchSampler.h | 50 ++++++ .../libtorch/utils/LibtorchRLPPOLoss.h | 44 +++++ .../utils/LibtorchRLTrajectoryBuffer.h | 72 ++++++++ .../libtorch/utils/LibtorchRLValueEstimator.h | 47 +++++ .../utils/LibtorchRLMiniBatchSampler.C | 89 ++++++++++ .../src/libtorch/utils/LibtorchRLPPOLoss.C | 60 +++++++ .../utils/LibtorchRLTrajectoryBuffer.C | 166 ++++++++++++++++++ .../libtorch/utils/LibtorchRLValueEstimator.C | 105 +++++++++++ 8 files changed, 633 insertions(+) create mode 100644 modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h create mode 100644 modules/stochastic_tools/include/libtorch/utils/LibtorchRLPPOLoss.h create mode 100644 modules/stochastic_tools/include/libtorch/utils/LibtorchRLTrajectoryBuffer.h create mode 100644 modules/stochastic_tools/include/libtorch/utils/LibtorchRLValueEstimator.h create mode 100644 modules/stochastic_tools/src/libtorch/utils/LibtorchRLMiniBatchSampler.C create mode 100644 modules/stochastic_tools/src/libtorch/utils/LibtorchRLPPOLoss.C create mode 100644 modules/stochastic_tools/src/libtorch/utils/LibtorchRLTrajectoryBuffer.C create mode 100644 modules/stochastic_tools/src/libtorch/utils/LibtorchRLValueEstimator.C diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h new file mode 100644 index 000000000000..0c543f508aa7 --- /dev/null +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h @@ -0,0 +1,50 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#pragma once + +#include "LibtorchRLTrajectoryBuffer.h" + +#include + +#include +#include + +struct LibtorchRLMiniBatch +{ + torch::Tensor observations; + torch::Tensor actions; + torch::Tensor old_log_probabilities; + torch::Tensor value_targets; + torch::Tensor advantages; + + std::int64_t size() const { return observations.defined() ? observations.size(0) : 0; } +}; + +/** + * Samples shuffled mini-batches from a flattened on-policy trajectory batch. + */ +class LibtorchRLMiniBatchSampler +{ +public: + std::vector sample(const LibtorchRLTrajectoryBuffer::TensorBatch & batch, + unsigned int batch_size, + bool standardize_advantage) const; + +private: + static void validateBatch(const LibtorchRLTrajectoryBuffer::TensorBatch & batch); + + static LibtorchRLMiniBatch makeMiniBatch(const LibtorchRLTrajectoryBuffer::TensorBatch & batch, + const torch::Tensor & indices, + bool standardize_advantage); +}; + +#endif diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLPPOLoss.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLPPOLoss.h new file mode 100644 index 000000000000..a0e21c72fa24 --- /dev/null +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLPPOLoss.h @@ -0,0 +1,44 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#pragma once + +#include "LibtorchActorNeuralNet.h" +#include "LibtorchArtificialNeuralNet.h" +#include "LibtorchRLMiniBatchSampler.h" + +struct LibtorchRLPPOLossOutput +{ + torch::Tensor actor_loss; + torch::Tensor critic_loss; + torch::Tensor entropy; +}; + +/** + * PPO clipped surrogate loss on top of the reusable RL buffer/value-estimation core. + */ +class LibtorchRLPPOLoss +{ +public: + LibtorchRLPPOLoss(Real clip_parameter, Real entropy_coeff); + + LibtorchRLPPOLossOutput compute(Moose::LibtorchActorNeuralNet & policy_network, + Moose::LibtorchArtificialNeuralNet & value_network, + const LibtorchRLMiniBatch & batch) const; + +private: + static torch::Tensor reduceActionDimension(const torch::Tensor & tensor); + + const Real _clip_parameter; + const Real _entropy_coeff; +}; + +#endif diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLTrajectoryBuffer.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLTrajectoryBuffer.h new file mode 100644 index 000000000000..aef781adbc1c --- /dev/null +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLTrajectoryBuffer.h @@ -0,0 +1,72 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#pragma once + +#include "MooseTypes.h" + +#include + +#include +#include + +/** + * On-policy trajectory storage for fixed-horizon RL training. + */ +class LibtorchRLTrajectoryBuffer +{ +public: + struct Trajectory + { + std::vector> observations; + std::vector> next_observations; + std::vector> actions; + std::vector> log_probabilities; + std::vector rewards; + std::vector value_targets; + std::vector advantages; + }; + + struct TensorBatch + { + torch::Tensor observations; + torch::Tensor next_observations; + torch::Tensor actions; + torch::Tensor log_probabilities; + torch::Tensor rewards; + torch::Tensor value_targets; + torch::Tensor advantages; + + std::int64_t size() const { return observations.defined() ? observations.size(0) : 0; } + }; + + void addTrajectory(Trajectory trajectory); + + void clear(); + + bool empty() const { return _trajectories.empty(); } + + std::size_t numTrajectories() const { return _trajectories.size(); } + + std::size_t numTransitions() const; + + std::vector & trajectories() { return _trajectories; } + const std::vector & trajectories() const { return _trajectories; } + + TensorBatch flatten() const; + +private: + static void validateTrajectory(const Trajectory & trajectory); + + std::vector _trajectories; +}; + +#endif diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLValueEstimator.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLValueEstimator.h new file mode 100644 index 000000000000..d2bcb85f6fc2 --- /dev/null +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLValueEstimator.h @@ -0,0 +1,47 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#pragma once + +#include "LibtorchArtificialNeuralNet.h" +#include "LibtorchRLTrajectoryBuffer.h" + +#include + +/** + * Computes GAE advantages and value targets for an on-policy trajectory buffer. + */ +class LibtorchRLValueEstimator +{ +public: + struct Targets + { + std::vector advantages; + std::vector value_targets; + }; + + LibtorchRLValueEstimator(Real discount_factor, Real lambda_factor); + + void computeValueTargets(LibtorchRLTrajectoryBuffer & buffer, + Moose::LibtorchArtificialNeuralNet & value_network) const; + + Targets estimate(const LibtorchRLTrajectoryBuffer::Trajectory & trajectory, + Moose::LibtorchArtificialNeuralNet & value_network) const; + +private: + std::vector evaluate(const std::vector> & observations, + Moose::LibtorchArtificialNeuralNet & value_network) const; + + const Real _discount_factor; + const Real _lambda_factor; +}; + +#endif diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchRLMiniBatchSampler.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLMiniBatchSampler.C new file mode 100644 index 000000000000..bebec8f139ac --- /dev/null +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLMiniBatchSampler.C @@ -0,0 +1,89 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "LibtorchRLMiniBatchSampler.h" + +#include "MooseError.h" + +#include + +std::vector +LibtorchRLMiniBatchSampler::sample(const LibtorchRLTrajectoryBuffer::TensorBatch & batch, + const unsigned int batch_size, + const bool standardize_advantage) const +{ + std::vector mini_batches; + + if (!batch.size()) + return mini_batches; + + validateBatch(batch); + + const auto effective_batch_size = std::max(1, batch_size); + auto permutation = torch::randperm(batch.size(), torch::TensorOptions().dtype(torch::kLong)); + + for (std::int64_t batch_begin = 0; batch_begin < batch.size(); + batch_begin += effective_batch_size) + { + const auto batch_end = std::min( + batch.size(), batch_begin + static_cast(effective_batch_size)); + const auto indices = permutation.narrow(0, batch_begin, batch_end - batch_begin); + mini_batches.push_back(makeMiniBatch(batch, indices, standardize_advantage)); + } + + return mini_batches; +} + +void +LibtorchRLMiniBatchSampler::validateBatch(const LibtorchRLTrajectoryBuffer::TensorBatch & batch) +{ + if (!batch.actions.defined() || !batch.log_probabilities.defined() || + !batch.value_targets.defined() || !batch.advantages.defined()) + mooseError("RL tensor batches must define observations, actions, log probabilities, value " + "targets, and advantages before mini-batch sampling."); + + const auto batch_size = batch.size(); + const auto validate_rows = [batch_size](const torch::Tensor & tensor, const char * name) + { + if (!tensor.defined() || tensor.size(0) != batch_size) + mooseError( + "RL tensor batch field ", name, " must have the same number of rows as observations."); + }; + + validate_rows(batch.actions, "actions"); + validate_rows(batch.log_probabilities, "log_probabilities"); + validate_rows(batch.value_targets, "value_targets"); + validate_rows(batch.advantages, "advantages"); +} + +LibtorchRLMiniBatch +LibtorchRLMiniBatchSampler::makeMiniBatch(const LibtorchRLTrajectoryBuffer::TensorBatch & batch, + const torch::Tensor & indices, + const bool standardize_advantage) +{ + LibtorchRLMiniBatch mini_batch; + + mini_batch.observations = batch.observations.index({indices}); + mini_batch.actions = batch.actions.index({indices}); + mini_batch.old_log_probabilities = batch.log_probabilities.index({indices}); + mini_batch.value_targets = batch.value_targets.index({indices}); + mini_batch.advantages = batch.advantages.index({indices}); + + if (standardize_advantage) + { + const auto std = mini_batch.advantages.std(false); + mini_batch.advantages = (mini_batch.advantages - mini_batch.advantages.mean()) / (std + 1e-10); + } + + return mini_batch; +} + +#endif diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchRLPPOLoss.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLPPOLoss.C new file mode 100644 index 000000000000..51d36135aaaa --- /dev/null +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLPPOLoss.C @@ -0,0 +1,60 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "LibtorchRLPPOLoss.h" + +LibtorchRLPPOLoss::LibtorchRLPPOLoss(const Real clip_parameter, const Real entropy_coeff) + : _clip_parameter(clip_parameter), _entropy_coeff(entropy_coeff) +{ +} + +LibtorchRLPPOLossOutput +LibtorchRLPPOLoss::compute(Moose::LibtorchActorNeuralNet & policy_network, + Moose::LibtorchArtificialNeuralNet & value_network, + const LibtorchRLMiniBatch & batch) const +{ + auto observations = batch.observations; + policy_network.evaluate(observations, false); + + const auto current_log_probability = + reduceActionDimension(policy_network.logProbability(batch.actions)); + const auto previous_log_probability = reduceActionDimension(batch.old_log_probabilities); + const auto entropy = reduceActionDimension(policy_network.entropy()); + + const auto ratio = (current_log_probability - previous_log_probability).exp(); + const auto surr1 = ratio * batch.advantages; + const auto surr2 = + torch::clamp(ratio, 1.0 - _clip_parameter, 1.0 + _clip_parameter) * batch.advantages; + + LibtorchRLPPOLossOutput output; + output.actor_loss = -(torch::min(surr1, surr2) + _entropy_coeff * entropy).mean(); + output.critic_loss = + torch::mse_loss(value_network.forward(batch.observations), batch.value_targets); + output.entropy = entropy.mean(); + return output; +} + +torch::Tensor +LibtorchRLPPOLoss::reduceActionDimension(const torch::Tensor & tensor) +{ + if (!tensor.defined()) + return tensor; + + if (tensor.dim() == 1) + return tensor.unsqueeze(1); + + if (tensor.size(-1) == 1) + return tensor; + + return tensor.sum(-1, true); +} + +#endif diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchRLTrajectoryBuffer.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLTrajectoryBuffer.C new file mode 100644 index 000000000000..d6ee0a60da81 --- /dev/null +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLTrajectoryBuffer.C @@ -0,0 +1,166 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "LibtorchRLTrajectoryBuffer.h" + +#include "MooseError.h" + +#include "libmesh/utility.h" + +namespace +{ + +torch::Tensor +bufferMatrixToTensor(const std::vector> & rows) +{ + if (rows.empty()) + return {}; + + const auto num_columns = rows.front().size(); + auto tensor = torch::zeros({static_cast(rows.size()), static_cast(num_columns)}, + torch::TensorOptions().dtype(torch::kDouble)); + auto accessor = tensor.accessor(); + + for (const auto row_i : make_range(rows.size())) + { + if (rows[row_i].size() != num_columns) + mooseError("All rows must have the same number of entries when flattening an RL batch."); + + for (const auto column_i : make_range(num_columns)) + accessor[row_i][column_i] = rows[row_i][column_i]; + } + + return tensor; +} + +torch::Tensor +bufferVectorToColumnTensor(const std::vector & values) +{ + if (values.empty()) + return {}; + + auto tensor = torch::zeros({static_cast(values.size()), 1}, + torch::TensorOptions().dtype(torch::kDouble)); + auto accessor = tensor.accessor(); + + for (const auto value_i : make_range(values.size())) + accessor[value_i][0] = values[value_i]; + + return tensor; +} + +} // namespace + +void +LibtorchRLTrajectoryBuffer::addTrajectory(Trajectory trajectory) +{ + validateTrajectory(trajectory); + _trajectories.push_back(std::move(trajectory)); +} + +void +LibtorchRLTrajectoryBuffer::clear() +{ + _trajectories.clear(); +} + +std::size_t +LibtorchRLTrajectoryBuffer::numTransitions() const +{ + std::size_t transitions = 0; + for (const auto & trajectory : _trajectories) + transitions += trajectory.rewards.size(); + return transitions; +} + +LibtorchRLTrajectoryBuffer::TensorBatch +LibtorchRLTrajectoryBuffer::flatten() const +{ + TensorBatch batch; + + if (_trajectories.empty()) + return batch; + + std::vector> observations; + std::vector> next_observations; + std::vector> actions; + std::vector> log_probabilities; + std::vector rewards; + std::vector value_targets; + std::vector advantages; + + observations.reserve(numTransitions()); + next_observations.reserve(numTransitions()); + actions.reserve(numTransitions()); + log_probabilities.reserve(numTransitions()); + rewards.reserve(numTransitions()); + value_targets.reserve(numTransitions()); + advantages.reserve(numTransitions()); + + for (const auto & trajectory : _trajectories) + { + if (!trajectory.value_targets.empty() && + trajectory.value_targets.size() != trajectory.rewards.size()) + mooseError("Value targets must match the reward length of the trajectory."); + if (!trajectory.advantages.empty() && trajectory.advantages.size() != trajectory.rewards.size()) + mooseError("Advantages must match the reward length of the trajectory."); + + observations.insert( + observations.end(), trajectory.observations.begin(), trajectory.observations.end()); + next_observations.insert(next_observations.end(), + trajectory.next_observations.begin(), + trajectory.next_observations.end()); + actions.insert(actions.end(), trajectory.actions.begin(), trajectory.actions.end()); + log_probabilities.insert(log_probabilities.end(), + trajectory.log_probabilities.begin(), + trajectory.log_probabilities.end()); + rewards.insert(rewards.end(), trajectory.rewards.begin(), trajectory.rewards.end()); + value_targets.insert( + value_targets.end(), trajectory.value_targets.begin(), trajectory.value_targets.end()); + advantages.insert(advantages.end(), trajectory.advantages.begin(), trajectory.advantages.end()); + } + + batch.observations = bufferMatrixToTensor(observations); + batch.next_observations = bufferMatrixToTensor(next_observations); + batch.actions = bufferMatrixToTensor(actions); + batch.log_probabilities = bufferMatrixToTensor(log_probabilities); + batch.rewards = bufferVectorToColumnTensor(rewards); + batch.value_targets = bufferVectorToColumnTensor(value_targets); + batch.advantages = bufferVectorToColumnTensor(advantages); + + return batch; +} + +void +LibtorchRLTrajectoryBuffer::validateTrajectory(const Trajectory & trajectory) +{ + const auto num_steps = trajectory.rewards.size(); + + if (trajectory.observations.size() != num_steps) + mooseError("RL trajectory observations must match the reward sequence length."); + + if (trajectory.next_observations.size() != num_steps) + mooseError("RL trajectory next observations must match the reward sequence length."); + + if (trajectory.actions.size() != num_steps) + mooseError("RL trajectory actions must match the reward sequence length."); + + if (trajectory.log_probabilities.size() != num_steps) + mooseError("RL trajectory log probabilities must match the reward sequence length."); + + if (!trajectory.value_targets.empty() && trajectory.value_targets.size() != num_steps) + mooseError("RL trajectory value targets must match the reward sequence length."); + + if (!trajectory.advantages.empty() && trajectory.advantages.size() != num_steps) + mooseError("RL trajectory advantages must match the reward sequence length."); +} + +#endif diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchRLValueEstimator.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLValueEstimator.C new file mode 100644 index 000000000000..a20484202ac2 --- /dev/null +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLValueEstimator.C @@ -0,0 +1,105 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "LibtorchRLValueEstimator.h" + +#include "LibtorchUtils.h" + +#include "libmesh/utility.h" + +namespace +{ + +torch::Tensor +valueEstimatorMatrixToTensor(const std::vector> & rows) +{ + if (rows.empty()) + return {}; + + const auto num_columns = rows.front().size(); + auto tensor = torch::zeros( + {static_cast(rows.size()), static_cast(num_columns)}, + torch::TensorOptions().dtype(torch::kDouble)); + auto accessor = tensor.accessor(); + + for (const auto row_i : make_range(rows.size())) + for (const auto column_i : make_range(num_columns)) + accessor[row_i][column_i] = rows[row_i][column_i]; + + return tensor; +} + +} // namespace + +LibtorchRLValueEstimator::LibtorchRLValueEstimator(const Real discount_factor, + const Real lambda_factor) + : _discount_factor(discount_factor), _lambda_factor(lambda_factor) +{ +} + +void +LibtorchRLValueEstimator::computeValueTargets(LibtorchRLTrajectoryBuffer & buffer, + Moose::LibtorchArtificialNeuralNet & value_network) const +{ + for (auto & trajectory : buffer.trajectories()) + { + const auto targets = estimate(trajectory, value_network); + trajectory.advantages = targets.advantages; + trajectory.value_targets = targets.value_targets; + } +} + +LibtorchRLValueEstimator::Targets +LibtorchRLValueEstimator::estimate(const LibtorchRLTrajectoryBuffer::Trajectory & trajectory, + Moose::LibtorchArtificialNeuralNet & value_network) const +{ + Targets targets; + + const auto values = evaluate(trajectory.observations, value_network); + const auto next_values = evaluate(trajectory.next_observations, value_network); + + const auto num_steps = trajectory.rewards.size(); + targets.advantages.resize(num_steps, 0.0); + targets.value_targets.resize(num_steps, 0.0); + + Real gae = 0.0; + for (const auto reverse_step : make_range(num_steps)) + { + const auto step = num_steps - reverse_step - 1; + const auto delta = + trajectory.rewards[step] + _discount_factor * next_values[step] - values[step]; + gae = delta + _discount_factor * _lambda_factor * gae; + targets.advantages[step] = gae; + targets.value_targets[step] = gae + values[step]; + } + + return targets; +} + +std::vector +LibtorchRLValueEstimator::evaluate(const std::vector> & observations, + Moose::LibtorchArtificialNeuralNet & value_network) const +{ + if (observations.empty()) + return {}; + + torch::NoGradGuard no_grad; + + auto tensor = valueEstimatorMatrixToTensor(observations); + auto value_tensor = value_network.forward(tensor); + auto flattened_value_tensor = value_tensor.reshape({-1}); + + std::vector values; + LibtorchUtils::tensorToVector(flattened_value_tensor, values); + return values; +} + +#endif From 5dcb70b00ad0544953e95b59da51171c4c4fb298 Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 21 Apr 2026 14:44:58 -0600 Subject: [PATCH 26/51] Simplify the drl control object. --- .../libtorch/controls/LibtorchDRLControl.C | 105 ++++++++++++++---- 1 file changed, 86 insertions(+), 19 deletions(-) diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C index e10d8e1cf90a..aa5a21eb5953 100644 --- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C +++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C @@ -14,8 +14,59 @@ #include "Transient.h" #include "LibtorchUtils.h" +#include + registerMooseObject("StochasticToolsApp", LibtorchDRLControl); +namespace +{ + +bool +readArchiveTensor(torch::serialize::InputArchive & archive, + const std::string & key, + torch::Tensor & tensor) +{ + try + { + archive.read(key, tensor); + return true; + } + catch (const c10::Error &) + { + return false; + } +} + +void +loadActorParametersWithLegacyFallback(Moose::LibtorchActorNeuralNet & actor, + const std::string & filename) +{ + torch::serialize::InputArchive archive; + archive.load_from(filename); + + for (auto & parameter : actor.named_parameters()) + { + torch::Tensor stored_tensor; + bool loaded = readArchiveTensor(archive, parameter.key(), stored_tensor); + + if (!loaded && parameter.key().rfind("action_head.", 0) == 0) + loaded = readArchiveTensor( + archive, parameter.key().substr(std::string("action_head.").size()), stored_tensor); + + if (!loaded) + mooseError("The requested pytorch parameter file could not be loaded. This can either be " + "the result of the file not existing or a misalignment in the generated " + "container and the data in the file. Make sure the dimensions of the generated " + "neural net are the same as the dimensions of the parameters in the input file!\n" + "Missing serialized parameter: ", + parameter.key()); + + parameter.value().data().copy_(stored_tensor); + } +} + +} // namespace + InputParameters LibtorchDRLControl::validParams() { @@ -26,13 +77,25 @@ LibtorchDRLControl::validParams() params.addParam("seed", "Seed for the random number generator."); - params.addParam("num_stems_in_period", 1, "Blabla"); - params.addParam("smoother", 1.0, "Blabla"); - - params.addParam("stochastic", true, "Blabla"); + params.addParam( + "num_steps_in_period", + 1, + "Preferred spelling for the number of timesteps to reuse the most recent sampled " + "action before evaluating the policy again."); + params.addParam( + "num_stems_in_period", 1, "Deprecated compatibility spelling for num_steps_in_period."); + params.addParam( + "smoother", 1.0, "Relaxation factor applied when smoothing control updates."); + + params.addParam( + "stochastic", + true, + "If true, sample from the policy distribution; otherwise use the deterministic action."); - params.addParam>("min_control_value", {}, "The minimum values of the control signal."); - params.addParam>("max_control_value", {}, "The maximum calue of the control signal."); + params.addParam>( + "min_control_value", {}, "The minimum values of the control signal."); + params.addParam>( + "max_control_value", {}, "The maximum values of the control signal."); params.addParam>( "action_standard_deviations", {}, @@ -48,7 +111,9 @@ LibtorchDRLControl::LibtorchDRLControl(const InputParameters & parameters) _previous_control_signal(std::vector(_control_names.size(), 0.0)), _current_smoothed_signal(std::vector(_control_names.size(), 0.0)), _call_counter(0), - _num_steps_in_period(getParam("num_stems_in_period")), + _num_steps_in_period(parameters.isParamSetByUser("num_steps_in_period") + ? getParam("num_steps_in_period") + : getParam("num_stems_in_period")), _smoother(getParam("smoother")), _stochastic(getParam("stochastic")) { @@ -56,7 +121,7 @@ LibtorchDRLControl::LibtorchDRLControl(const InputParameters & parameters) if (isParamValid("seed")) torch::manual_seed(getParam("seed")); - if (isParamValid("filename")) + if (parameters.isParamSetByUser("filename")) loadControlNeuralNetFromFile(parameters); } @@ -83,24 +148,26 @@ LibtorchDRLControl::loadControlNeuralNetFromFile(const InputParameters & paramet const std::vector & minimum_values = getParam>("min_control_value"); const std::vector & maximum_values = getParam>("max_control_value"); - auto nn = std::make_shared( - filename, num_inputs, num_outputs, num_neurons_per_layer, activation_functions, minimum_values, maximum_values); + auto nn = std::make_shared(filename, + num_inputs, + num_outputs, + num_neurons_per_layer, + activation_functions, + minimum_values, + maximum_values); try { torch::load(nn, filename); - _actor_nn = std::make_shared(*nn); - _nn = _actor_nn; } - catch (const c10::Error & e) + catch (const c10::Error &) { - mooseError( - "The requested pytorch parameter file could not be loaded. This can either be the" - "result of the file not existing or a misalignment in the generated container and" - "the data in the file. Make sure the dimensions of the generated neural net are the" - "same as the dimensions of the parameters in the input file!\n", - e.msg()); + loadActorParametersWithLegacyFallback(*nn, filename); + _console << "Loaded requested legacy .pt file." << std::endl; } + + _actor_nn = std::make_shared(*nn); + _nn = _actor_nn; } } From 10b2974d1f978802c168fa9c195949acfd84539f Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 21 Apr 2026 14:45:31 -0600 Subject: [PATCH 27/51] Simplify the trainer. --- .../surrogates/LibtorchDRLControlTrainer.h | 136 +--- .../trainers/LibtorchDRLControlTrainer.C | 762 ++++-------------- 2 files changed, 187 insertions(+), 711 deletions(-) diff --git a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h index 24a68a530d19..76a3becb5fd4 100644 --- a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h +++ b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h @@ -13,17 +13,19 @@ #include #include "LibtorchActorNeuralNet.h" +#include "LibtorchObservationHistory.h" +#include "LibtorchRLMiniBatchSampler.h" +#include "LibtorchRLPPOLoss.h" +#include "LibtorchRLTrajectoryBuffer.h" +#include "LibtorchRLValueEstimator.h" #include "libmesh/utility.h" #include "SurrogateTrainer.h" /** - * This trainer is responsible for training neural networks that efficiently control - * different processes. It utilizes the Proximal Policy Optimization algorithms. For more - * information on the algorithm, see the following resources: Schulman, John, et al. "Proximal - * policy optimization algorithms." arXiv preprint arXiv:1707.06347 (2017). - * https://medium.com/analytics-vidhya/coding-ppo-from-scratch-with-pytorch-part-1-4-613dfc1b14c8 - * https://stable-baselines.readthedocs.io/en/master/modules/ppo2.html + * Fixed-horizon actor-critic trainer that collects trajectories from MOOSE reporters and runs a + * PPO update on top of reusable RL-core components (observation history, trajectory buffer, + * mini-batch sampler, value estimator, and PPO loss). */ class LibtorchDRLControlTrainer : public SurrogateTrainerBase { @@ -46,7 +48,7 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase std::vector sampleStdEpsiodeRewards() { return _sample_std_episode_reward; } /// The condensed training function - void trainController(); + void trainController(const LibtorchRLTrajectoryBuffer::TensorBatch & batch); const Moose::LibtorchArtificialNeuralNet & controlNeuralNet() const { return *_control_nn; } @@ -54,47 +56,6 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase /// Compute the average eposiodic reward void computeEpisodeRewardStatistics(); - /** - * Function to convert input/output data from std::vector to torch::tensor - * @param vector_data The input data in vector-vectors format - * @param tensor_data The tensor where we would like to save the results - * @param detach If the gradient info needs to be detached from the tensor - */ - void convertDataToTensor(std::vector>> & vector_data, - torch::Tensor & tensor_data, - const bool detach = false); - - /** - * Function to convert input/output data from std::vector to torch::tensor - * @param vector_data The input data in vector-vectors format - * @param tensor_data The tensor where we would like to save the results - * @param detach If the gradient info needs to be detached from the tensor - */ - void convertDataToTensor(std::vector> & vector_data, - torch::Tensor & tensor_data, - const bool detach = false); - - /** - * Function which evaluates the critic to get the value (discounter reward) - * @param input The observation values (responses) - * @return The estimated value - */ - torch::Tensor evaluateValue(torch::Tensor & input); - - /** - * Function which evaluates the control net and then computes the logarithmic probability of the - * action - * @param input The observation values (responses) - * @param output The actions corresponding to the observations - * @return The estimated value for the logarithmic probability - */ - torch::Tensor evaluateAction(torch::Tensor & input, torch::Tensor & output); - - /// Compute the return value by discounting the rewards and summing them - void computeReturn(std::vector> & data, - const std::vector> & reward, - const Real decay_factor); - /// Reset data after updating the neural network void resetData(); @@ -140,21 +101,6 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase /// Number of outputs for the control neural network unsigned int _num_outputs; - ///@{ - std::vector>> _state_data; - std::vector>> _next_state_data; - std::vector>> _action_data; - std::vector>> _log_probability_data; - ///@} - - ///@{ - /// The reward and return data. The return is calculated using the _reward_data - std::vector> _reward_data; - std::vector> _return_data; - std::vector> _delta_data; - std::vector> _gae_data; - ///@} - /// Number of epochs for the training of the emulator const unsigned int _num_epochs; @@ -180,9 +126,6 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase const Real _decay_factor; const Real _lambda_factor; - /// Standard deviation for the actions - const std::vector _action_std; - /// Name of the pytorch output file. This is used for loading and storing /// already existing data const std::string _filename_base; @@ -203,7 +146,6 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase std::vector _sample_average_episode_reward; std::vector _sample_std_episode_reward; - std::vector _sample_lengths; /// Switch to enable the standardization of the advantages const bool _standardize_advantage; @@ -221,15 +163,6 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase /// Pointer to the critic neural net object std::shared_ptr _critic_nn; - /// Torch::tensor version of the input and action data - torch::Tensor _state_tensor; - torch::Tensor _next_state_tensor; - torch::Tensor _action_tensor; - torch::Tensor _gae_tensor; - torch::Tensor _return_tensor; - torch::Tensor _delta_tensor; - torch::Tensor _log_probability_tensor; - Real _highest_reward; Real _entropy_coeff; @@ -237,51 +170,28 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase std::unique_ptr _critic_optimizer; private: - /** - * Extract the response values from the postprocessors of the controlled system. - * This assumes that they are stored in an AccumulateReporter - * @param data The data where we would like to store the response values - * @param reporter_names The names of the reporters which need to be extracted - * @param num_timesteps The number of timesteps we want to use for training - */ - void getResponseDataFromReporter(std::vector>> & data, - std::vector>> & next_data, - const std::vector> *> & reporter_links, - const unsigned int num_timesteps); - /** - * Extract the signal (actions, logarithmic probabilities) values from the postprocessors - * of the controlled system. This assumes that they are stored in an AccumulateReporter - * @param data The data where we would like to store the output values - * @param reporter_names The names of the reporters which need to be extracted - */ - void getSignalDataFromReporter(std::vector>> & data, - const std::vector> *> & reporter_links); - - void computeCumulativeRewardEstimate(std::vector> & data, - std::vector>> & state, - std::vector>> & next_state, - std::vector> & reward); - - void normalizeResponseData(std::vector>> & data, - const unsigned int num_timesteps); - - /** - * Extract the reward values from the postprocessors of the controlled system - * This assumes that they are stored in an AccumulateReporter. - * @param data The data where we would like to store the reward values - * @param reporter_names The name of the reporter which need to be extracted - */ - void getRewardDataFromReporter(std::vector> & data, - const std::vector> * const reporter_link); - /// Getting reporter pointers with given names void getReporterPointers(const std::vector & reporter_names, std::vector> *> & pointer_storage); + void collectTrajectoriesFromReporters(); + + unsigned int computeNumTransitions(std::size_t raw_sequence_size) const; + + std::vector extractDownsampledSequence(const std::vector & sample, + unsigned int offset, + unsigned int num_entries) const; + /// Counter for number of transient simulations that have been run before updating the controller unsigned int _update_counter; unsigned int _timestep_window; + + const LibtorchObservationHistory _observation_history; + LibtorchRLTrajectoryBuffer _trajectory_buffer; + const LibtorchRLMiniBatchSampler _sampler; + const LibtorchRLValueEstimator _value_estimator; + const LibtorchRLPPOLoss _ppo_loss; }; #endif diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index 6e91f44807f2..858b0beaa8df 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -9,11 +9,10 @@ #ifdef MOOSE_LIBTORCH_ENABLED -#include "LibtorchDataset.h" -#include "LibtorchUtils.h" #include "LibtorchDRLControlTrainer.h" -#include "Sampler.h" -#include "Function.h" + +#include +#include registerMooseObject("StochasticToolsApp", LibtorchDRLControlTrainer); @@ -23,7 +22,7 @@ LibtorchDRLControlTrainer::validParams() InputParameters params = SurrogateTrainerBase::validParams(); params.addClassDescription( - "Trains a neural network controller using the Proximal Policy Optimization (PPO) algorithm."); + "Trains a neural network controller using fixed-horizon PPO on top of the libtorch RL core."); params.addRequiredParam>( "response", "Reporter values containing the response values from the model."); @@ -52,8 +51,8 @@ LibtorchDRLControlTrainer::validParams() "data from the previous timesteps will be used as inputs in the training."); params.addParam("skip_num_rows", 1, - "Number of rows to ignore from training. We usually skip the 1st " - "row from the reporter since it contains only initial values."); + "Unused compatibility parameter reserved for future reporter-row " + "offset handling."); params.addRequiredParam("num_epochs", "Number of epochs for the training."); @@ -104,11 +103,7 @@ LibtorchDRLControlTrainer::validParams() "Decay factor for calculating the return. This accounts for decreased " "reward values from the later steps."); - params.addRangeCheckedParam( - "lambda_factor", - 1.0, - "0.0<=lambda_factor<=1.0", - "GAE lambda."); + params.addRangeCheckedParam("lambda_factor", 1.0, "0.0<=lambda_factor<=1.0", "GAE lambda."); params.addParam( "read_from_file", false, "Switch to read the neural network parameters from a file."); @@ -125,17 +120,21 @@ LibtorchDRLControlTrainer::validParams() "The frequency which is used to print the loss values. If 0, the " "loss values are not printed."); params.addParam("batch_size", 100, "Batch size"); - params.addParam>("min_control_value", {}, "The minimum values of the control signal."); - params.addParam>("max_control_value", {}, "The maximum calue of the control signal."); + params.addParam>( + "min_control_value", {}, "The minimum values of the control signal."); + params.addParam>( + "max_control_value", {}, "The maximum values of the control signal."); params.addParam>( "action_standard_deviations", {}, "Deprecated compatibility parameter. Actor policies now learn their own action " "distribution widths."); - params.addParam("entropy_coeff", 0.01, "ASDASD"); + params.addParam( + "entropy_coeff", 0.01, "Entropy bonus coefficient used in the PPO actor loss."); - params.addParam("timestep_window", 1, "Data acquisition timesteps (every nth)"); + params.addParam( + "timestep_window", 1, "Use every nth reporter timestep when assembling trajectories."); return params; } @@ -144,11 +143,11 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par : SurrogateTrainerBase(parameters), _state_names(getParam>("response")), _state_shift_factors(isParamValid("response_shift_factors") - ? getParam>("response_shift_factors") - : std::vector(_state_names.size(), 0.0)), + ? getParam>("response_shift_factors") + : std::vector(_state_names.size(), 0.0)), _state_scaling_factors(isParamValid("response_scaling_factors") - ? getParam>("response_scaling_factors") - : std::vector(_state_names.size(), 1.0)), + ? getParam>("response_scaling_factors") + : std::vector(_state_names.size(), 1.0)), _action_names(getParam>("control")), _log_probability_names(getParam>("log_probability")), _reward_name(getParam("reward")), @@ -156,10 +155,6 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _input_timesteps(getParam("input_timesteps")), _num_inputs(_input_timesteps * _state_names.size()), _num_outputs(_action_names.size()), - _state_data(std::vector>>(_num_inputs)), - _next_state_data(std::vector>>(_num_inputs)), - _action_data(std::vector>>(_num_outputs)), - _log_probability_data(std::vector>>(_num_outputs)), _num_epochs(getParam("num_epochs")), _num_critic_neurons_per_layer( getParam>("num_critic_neurons_per_layer")), @@ -171,7 +166,6 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _clip_param(getParam("clip_parameter")), _decay_factor(getParam("decay_factor")), _lambda_factor(getParam("lambda_factor")), - _action_std(getParam>("action_standard_deviations")), _filename_base(isParamValid("filename_base") ? getParam("filename_base") : ""), _read_from_file(getParam("read_from_file")), _shift_outputs(getParam("shift_outputs")), @@ -183,7 +177,10 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _highest_reward(-1e8), _entropy_coeff(getParam("entropy_coeff")), _update_counter(_update_frequency), - _timestep_window(getParam("timestep_window")) + _timestep_window(getParam("timestep_window")), + _observation_history(_input_timesteps, _state_shift_factors, _state_scaling_factors), + _value_estimator(_decay_factor, _lambda_factor), + _ppo_loss(_clip_param, _entropy_coeff) { if (_state_names.size() != _state_shift_factors.size()) paramError("response_shift_factors", @@ -194,6 +191,11 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par "response_scaling_factors", "The number of normalization coefficients is not the same as the number of responses!"); + if (_action_names.size() != _log_probability_names.size()) + paramError("log_probability", + "The number of log-probability reporters must match the number of control " + "reporters."); + // We establish the links with the chosen reporters getReporterPointers(_state_names, _state_value_pointers); getReporterPointers(_action_names, _action_value_pointers); @@ -240,10 +242,10 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _num_critic_neurons_per_layer, getParam>("critic_activation_functions")); - _actor_optimizer = std::make_unique(_control_nn->parameters(), - torch::optim::AdamOptions(_control_learning_rate)); - _critic_optimizer = std::make_unique(_critic_nn->parameters(), - torch::optim::AdamOptions(_critic_learning_rate)); + _actor_optimizer = std::make_unique( + _control_nn->parameters(), torch::optim::AdamOptions(_control_learning_rate)); + _critic_optimizer = std::make_unique( + _critic_nn->parameters(), torch::optim::AdamOptions(_critic_learning_rate)); // We read parameters for the critic neural net if it is requested if (_read_from_file) @@ -262,452 +264,140 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par else if (filename_valid) torch::save(_critic_nn, _critic_nn->name()); - // Define the optimizers for the training - // torch::optim::Adam actor_optimizer(_control_nn->parameters(), - // torch::optim::AdamOptions(_control_learning_rate)); - - // torch::optim::Adam critic_optimizer(_critic_nn->parameters(), - // torch::optim::AdamOptions(_critic_learning_rate)); - - // auto obs = torch::zeros({4,10}, torch::TensorOptions().dtype(torch::kFloat64)); - // for (int i : make_range(10)) - // for (int j : make_range(4)) - // obs.index_put_({j, i}, j+0.1*(i+1)); - - // auto action = torch::zeros({4,1}, torch::TensorOptions().dtype(torch::kFloat64)); - // for (int j : make_range(4)) - // action.index_put_({j, 0}, 0.01+j*0.005); - - // auto log_prob = torch::zeros({4,1}, torch::TensorOptions().dtype(torch::kFloat64)); - // for (int j : make_range(4)) - // log_prob.index_put_({j, 0}, 2.3-j*0.2); - - // auto reward = torch::zeros({4,1}, torch::TensorOptions().dtype(torch::kFloat64)); - // for (int j : make_range(4)) - // reward.index_put_({j, 0}, -2.9-j*0.1); - - // auto ret = torch::zeros({4,1}, torch::TensorOptions().dtype(torch::kFloat64)); - // Real v = 0.0; - // for (int j : make_range(4)) - // { - // v = reward.index({3-j, 0}).item()+0.95*v; - // ret.index_put_({3-j, 0}, v); - // } - - // std::cout << "states" << std::endl; - // std::cout << obs << std::endl; - // std::cout << "actions" << std::endl; - // std::cout << action << std::endl; - // std::cout << "logprobs" << std::endl; - // std::cout << log_prob << std::endl; - // std::cout << "reward" << std::endl; - // std::cout << reward << std::endl; - // std::cout << "return" << std::endl; - // std::cout << ret << std::endl; - - // auto value = evaluateValue(obs).detach(); - - // std::cout << "evaluate V" << std::endl; - // std::cout << value << std::endl; - - // auto advantage = ret - value; - - // std::cout << "advantage" << std::endl; - // std::cout << advantage << std::endl; - - // // Get the approximate return from the neural net again (this one does have an associated - // // gradient) - // value = evaluateValue(obs); - - // auto new_action = _control_nn->evaluate(obs, true); - - // std::cout << "new action" << std::endl; - // std::cout << new_action << std::endl; - - // // std::cout << "new action " << new_action << std::endl; - // // Get the approximate logarithmic action probability using the control neural net - // auto curr_log_probability = _control_nn->logProbability(action); - - // // std::cout << "log probability " << curr_log_probability << std::endl; - - // // Prepare the ratio by using the e^(logx-logy)=x/y expression - // auto ratio = (curr_log_probability - log_prob).exp(); - - // std::cout << "ratio" << std::endl; - // std::cout << ratio << std::endl; - - // // Use clamping for limiting - // auto surr1 = ratio * advantage; - // auto surr2 = torch::clamp(ratio, 1.0 - _clip_param, 1.0 + _clip_param) * advantage; - - // // Compute loss values for the critic and the control neural net - // auto actor_loss = -(torch::min(surr1, surr2) + 0.01*_control_nn->entropy()).mean(); - // auto critic_loss = torch::mse_loss(value, ret); - - // std::cout << "actor loss" << std::endl; - // std::cout << actor_loss << std::endl; - - // std::cout << "critic loss" << std::endl; - // std::cout << critic_loss << std::endl; - - // // Update the weights in the neural nets - // actor_optimizer.zero_grad(); - // actor_loss.backward(); - // actor_optimizer.step(); - - // critic_optimizer.zero_grad(); - // critic_loss.backward(); - // critic_optimizer.step(); - _control_nn->initializeNeuralNetwork(); - - // std::cout << "Control NN" << std::endl; - // const auto & control_params = _control_nn->named_parameters(); - // for (const auto & param_i : make_range(control_params.size())) - // { - // // We cast the parameters into a 1D vector - // std::cout << Moose::stringify(std::vector( - // control_params[param_i].value().data_ptr(), - // control_params[param_i].value().data_ptr() +control_params[param_i].value().numel())) << std::endl; - // } - _critic_nn->initializeNeuralNetwork(); - - // std::cout << "Critic NN" << std::endl; - // const auto & critic_params = _critic_nn->named_parameters(); - // for (const auto & param_i : make_range(critic_params.size())) - // { - // // We cast the parameters into a 1D vector - // std::cout << Moose::stringify(std::vector( - // critic_params[param_i].value().data_ptr(), - // critic_params[param_i].value().data_ptr() + critic_params[param_i].value().numel())) << std::endl; - // } - - // mooseError("Bazinga"); } void LibtorchDRLControlTrainer::execute() { - // Extract data from the reporters - getResponseDataFromReporter(_state_data, _next_state_data, _state_value_pointers, _input_timesteps); - getSignalDataFromReporter(_action_data, _action_value_pointers); - getSignalDataFromReporter(_log_probability_data, _log_probability_value_pointers); - getRewardDataFromReporter(_reward_data, _reward_value_pointer); + collectTrajectoriesFromReporters(); _update_counter--; - // Only update the NNs when - if (_update_counter == 0) - { - // Calculate return from the reward (discounting the reward) - computeReturn(_return_data, _reward_data, _decay_factor); - - // We compute the average reward first - computeEpisodeRewardStatistics(); + if (_update_counter != 0 || _trajectory_buffer.empty()) + return; - if(_average_episode_reward > _highest_reward) - { - torch::save(_control_nn, _control_nn->name()+"_best"); - _highest_reward = _average_episode_reward; - } + computeEpisodeRewardStatistics(); - normalizeResponseData(_state_data, _input_timesteps); - normalizeResponseData(_next_state_data, _input_timesteps); - - computeCumulativeRewardEstimate(_delta_data, _state_data, _next_state_data, _reward_data); - - computeReturn(_gae_data, _delta_data, _decay_factor*_lambda_factor); - - // Transform input/output/return data to torch::Tensor - convertDataToTensor(_state_data, _state_tensor); - convertDataToTensor(_next_state_data, _next_state_tensor); - convertDataToTensor(_action_data, _action_tensor); - convertDataToTensor(_log_probability_data, _log_probability_tensor); - - // Discard (detach) the gradient info for return data - convertDataToTensor(_return_data, _return_tensor, true); - convertDataToTensor(_gae_data, _gae_tensor, true); + if (_average_episode_reward > _highest_reward) + { + torch::save(_control_nn, _control_nn->name() + "_best"); + _highest_reward = _average_episode_reward; + } - // We train the controller using the emulator to get a good control strategy - trainController(); + _value_estimator.computeValueTargets(_trajectory_buffer, *_critic_nn); + const auto batch = _trajectory_buffer.flatten(); - // We clean the training data after controller update and reset the counter - resetData(); - } + trainController(batch); + resetData(); } void LibtorchDRLControlTrainer::computeEpisodeRewardStatistics() { - if (_reward_data.size()) + if (_trajectory_buffer.empty()) { _average_episode_reward = 0.0; _std_episode_reward = 0.0; - unsigned int combined_sizes = 0; - _sample_average_episode_reward.clear(); _sample_std_episode_reward.clear(); + return; + } - for (const auto & sample : _reward_data) - { - const unsigned int sample_size = sample.size(); - - Real sum = std::accumulate(sample.begin(), sample.end(), 0.0); - Real mean = sum / sample_size; - _sample_average_episode_reward.push_back(mean); - - - Real variance = std::transform_reduce(sample.begin(), sample.end(), - 0.0, - std::plus<>(), - [mean](double value) { - return (value - mean) * (value - mean); - } - ); - _sample_std_episode_reward.push_back(std::sqrt(variance / sample_size)); - - _average_episode_reward += sum; - _std_episode_reward += variance; - combined_sizes += sample_size; - } - _average_episode_reward = _average_episode_reward/combined_sizes; - _std_episode_reward = std::sqrt(_std_episode_reward/combined_sizes); + _average_episode_reward = 0.0; + _std_episode_reward = 0.0; + unsigned int combined_sizes = 0; + _sample_average_episode_reward.clear(); + _sample_std_episode_reward.clear(); + for (const auto & trajectory : _trajectory_buffer.trajectories()) + { + const auto & sample = trajectory.rewards; + const unsigned int sample_size = sample.size(); + if (!sample_size) + continue; + + const Real sum = std::accumulate(sample.begin(), sample.end(), 0.0); + const Real mean = sum / sample_size; + _sample_average_episode_reward.push_back(mean); + + const Real variance = + std::transform_reduce(sample.begin(), + sample.end(), + 0.0, + std::plus<>(), + [mean](const Real value) { return (value - mean) * (value - mean); }); + _sample_std_episode_reward.push_back(std::sqrt(variance / sample_size)); + + _average_episode_reward += sum; + _std_episode_reward += variance; + combined_sizes += sample_size; } - else + + if (!combined_sizes) { _average_episode_reward = 0.0; _std_episode_reward = 0.0; + return; } -} -void -LibtorchDRLControlTrainer::computeReturn(std::vector> & data, - const std::vector> & reward, - const Real decay_factor) -{ - // Discount the reward to get the return value, we need this to be able to anticipate - // rewards based on the current behavior. We go backwards in samples and backwards in - // accumulation. - for (const auto sample_i : index_range(reward)) - { - std::vector sample_return; - Real discounted_reward(0.0); - const auto sample_size = reward[sample_i].size(); - for (const auto time_i : make_range(sample_size)) - { - discounted_reward = reward[sample_i][sample_size - time_i - 1] + discounted_reward * decay_factor; - - // We are inserting to the front of the vector and push the rest back, this will - // ensure that the first element of the vector is the discounter reward for the whole transient - sample_return.insert(sample_return.begin(), discounted_reward); - } - - // Save and accumulate the return values - data.push_back(std::move(sample_return)); - } + _average_episode_reward /= combined_sizes; + _std_episode_reward = std::sqrt(_std_episode_reward / combined_sizes); } void -LibtorchDRLControlTrainer::computeCumulativeRewardEstimate(std::vector> & data, - std::vector>> & state, - std::vector>> & next_state, - std::vector> & reward) +LibtorchDRLControlTrainer::trainController(const LibtorchRLTrajectoryBuffer::TensorBatch & batch) { - for (const auto sample_i : index_range(reward)) - { - torch::Tensor observations; - torch::Tensor next_observations; - torch::Tensor reward_tensor; - - LibtorchUtils::vectorToTensor(reward[sample_i], reward_tensor, true); - - for (const auto feature_i : index_range(state)) - { - torch::Tensor input_row; - torch::Tensor next_input_row; - LibtorchUtils::vectorToTensor(state[feature_i][sample_i], input_row, true); - LibtorchUtils::vectorToTensor(next_state[feature_i][sample_i], next_input_row, true); - - if (feature_i == 0) - { - observations = input_row; - next_observations = next_input_row; - } - else - { - observations = torch::cat({observations, input_row}, 1); - next_observations = torch::cat({next_observations, next_input_row}, 1); - } - } - - // std::cout << "going to GAE" << std::endl; - // std::cout << observations << std::endl; - // std::cout << next_observations << std::endl; + if (!batch.size()) + return; - - auto value = evaluateValue(observations).detach(); - auto value_next = evaluateValue(next_observations).detach(); - - // std::cout << "values" << std::endl; - // std::cout << value << std::endl; - // std::cout << value_next << std::endl; - - auto delta = reward_tensor + _decay_factor*value_next - value; - - // std::cout << "delta" << std::endl; - // std::cout << delta << std::endl; - - std::vector delta_vector; - LibtorchUtils::tensorToVector(delta, delta_vector); - - data.push_back(std::move(delta_vector)); - } -} - -void -LibtorchDRLControlTrainer::trainController() -{ // We only train on the rank 0 partition. Libtorch should still be able to // fetch the local threads which are available. if (processor_id() == 0) { - // std::cout << "Training" << std::endl; - // std::cout << "Input tensor" << std::endl << _state_tensor << std::endl; - // std::cout << "Input tensor" << std::endl << _next_state_tensor << std::endl; - // std::cout << "Signal tensor" << std::endl << _action_tensor << std::endl; - // std::cout << "Logprob tensor" << std::endl << _log_probability_tensor << std::endl; - // std::cout << "reward" << std::endl << Moose::stringify(_reward_data) << std::endl; - // std::cout << "Return tensor" << std::endl << _return_tensor << std::endl; - // std::cout << "GAE" << std::endl << _gae_tensor << std::endl; - - // Define the optimizers for the training - // torch::optim::Adam actor_optimizer(_control_nn->parameters(), - // torch::optim::AdamOptions(_control_learning_rate)); - - // torch::optim::Adam critic_optimizer(_critic_nn->parameters(), - // torch::optim::AdamOptions(_critic_learning_rate)); - - // Compute the approximate value (return) from the critic neural net and use it to compute an - // advantage - - // Transform the dataset se that the loader has an easier time - auto input_size = _state_tensor.sizes()[0]; - auto batch_size = getParam("batch_size"); - // auto data_loader = torch::data::make_data_loader(std::move(transformed_data_set), batch_size); - for (unsigned int epoch = 0; epoch < _num_epochs; ++epoch) { - auto permutation = torch::randperm(input_size); - unsigned int batch_begin = 0; - unsigned int batch_end = 0; - while (batch_end < input_size) + const auto mini_batches = + _sampler.sample(batch, getParam("batch_size"), _standardize_advantage); + bool printed_losses = false; + for (const auto & mini_batch : mini_batches) { - batch_end = batch_begin + batch_size > input_size ? input_size : batch_begin + batch_size; - unsigned int offset = batch_end - batch_begin; - auto batch_permutation = permutation.narrow(0, batch_begin, offset); - auto obs_batch = _state_tensor.index({batch_permutation}); - auto action_batch = _action_tensor.index({batch_permutation}); - auto log_prob_batch = _log_probability_tensor.index({batch_permutation}); - auto return_batch = _return_tensor.index({batch_permutation}); - auto advantage_batch = _gae_tensor.index({batch_permutation}); - - if (_standardize_advantage) - advantage_batch = (advantage_batch - advantage_batch.mean()) / (advantage_batch.std() + 1e-10); - - // Get the approximate return from the neural net again (this one does have an associated - // gradient) - auto value = evaluateValue(obs_batch); - - auto new_action = _control_nn->evaluate(obs_batch, false); - - // std::cout << "new action " << new_action << std::endl; - // Get the approximate logarithmic action probability using the control neural net - auto curr_log_probability = _control_nn->logProbability(action_batch); - - // std::cout << "log probability " << curr_log_probability << std::endl; - - // Prepare the ratio by using the e^(logx-logy)=x/y expression - auto ratio = (curr_log_probability - log_prob_batch).exp(); - - // Use clamping for limiting - auto surr1 = ratio * advantage_batch; - auto surr2 = torch::clamp(ratio, 1.0 - _clip_param, 1.0 + _clip_param) * advantage_batch; - - // Compute loss values for the critic and the control neural net - auto actor_loss = -(torch::min(surr1, surr2) + _entropy_coeff*_control_nn->entropy()).mean(); - auto critic_loss = torch::mse_loss(value, return_batch); + const auto losses = _ppo_loss.compute(*_control_nn, *_critic_nn, mini_batch); - // Update the weights in the neural nets _actor_optimizer->zero_grad(); - actor_loss.backward(); + losses.actor_loss.backward(); _actor_optimizer->step(); _critic_optimizer->zero_grad(); - critic_loss.backward(); + losses.critic_loss.backward(); _critic_optimizer->step(); - // std::cout << "Control NN" << std::endl; - // const auto & control_params = _control_nn->named_parameters(); - // for (const auto & param_i : make_range(control_params.size())) - // { - // // We cast the parameters into a 1D vector - // std::cout << Moose::stringify(std::vector( - // control_params[param_i].value().data_ptr(), - // control_params[param_i].value().data_ptr() +control_params[param_i].value().numel())) << std::endl; - // } - - // std::cout << "Critic NN" << std::endl; - // const auto & critic_params = _critic_nn->named_parameters(); - // for (const auto & param_i : make_range(critic_params.size())) - // { - // // We cast the parameters into a 1D vector - // std::cout << Moose::stringify(std::vector( - // critic_params[param_i].value().data_ptr(), - // critic_params[param_i].value().data_ptr() + critic_params[param_i].value().numel())) << std::endl; - // } - - // print loss per epoch - if (_loss_print_frequency) - if (epoch % _loss_print_frequency == 0 && batch_begin == 0) + if (_loss_print_frequency && epoch % _loss_print_frequency == 0 && !printed_losses) { _console << "Epoch: " << epoch << " | Actor Loss: " << COLOR_GREEN - << actor_loss.item() << COLOR_DEFAULT << " | Critic Loss: " - << COLOR_GREEN << critic_loss.item() << COLOR_DEFAULT << std::endl; + << losses.actor_loss.item() << COLOR_DEFAULT + << " | Critic Loss: " << COLOR_GREEN << losses.critic_loss.item() + << COLOR_DEFAULT << std::endl; + printed_losses = true; } - - batch_begin = batch_end; - } - // std::cout << _control_nn->stdTensor() << std::endl; - if (_min_values.size()) - { - std::cout << _control_nn->alphaTensor().mean() << std::endl; - std::cout << _control_nn->betaTensor().mean() << std::endl; } - _console << "Best model so far: " << _highest_reward << std::endl; } + + _console << "Best model so far: " << _highest_reward << std::endl; } // It is time to send the trained data to every other processor so that the neural networks // are the same on all ranks. TODO: Make sure this can be done on a GPU as well. for (auto & param : _control_nn->named_parameters()) { - MPI_Bcast(param.value().data_ptr(), - param.value().numel(), - MPI_DOUBLE, - 0, - _communicator.get()); + MPI_Bcast(param.value().data_ptr(), param.value().numel(), MPI_DOUBLE, 0, _communicator.get()); } for (auto & param : _critic_nn->named_parameters()) { - MPI_Bcast(param.value().data_ptr(), - param.value().numel(), - MPI_DOUBLE, - 0, - _communicator.get()); + MPI_Bcast(param.value().data_ptr(), param.value().numel(), MPI_DOUBLE, 0, _communicator.get()); } // Save the controller neural net so our controller can read it, we also save the critic if we @@ -716,224 +406,100 @@ LibtorchDRLControlTrainer::trainController() torch::save(_critic_nn, _critic_nn->name()); } -void -LibtorchDRLControlTrainer::convertDataToTensor(std::vector>> & vector_data, - torch::Tensor & tensor_data, - const bool detach) -{ - for (const auto feature_i : index_range(vector_data)) - { - if (vector_data[feature_i].size()) - { - torch::Tensor concatenated_feature; - convertDataToTensor(vector_data[feature_i], concatenated_feature, detach); - - if (feature_i == 0) - tensor_data = concatenated_feature; - else - tensor_data = torch::cat({tensor_data, concatenated_feature}, 1); - } - } - - if (detach) - tensor_data.detach(); -} - -void -LibtorchDRLControlTrainer::convertDataToTensor(std::vector> & vector_data, - torch::Tensor & tensor_data, - const bool detach) -{ - if (vector_data.size()) - { - for (const auto vector_i : index_range(vector_data)) - { - torch::Tensor input_row; - LibtorchUtils::vectorToTensor(vector_data[vector_i], input_row, detach); - - if (vector_i == 0) - tensor_data = input_row; - else - tensor_data = torch::cat({tensor_data, input_row}, 0); - } - - if (detach) - tensor_data.detach(); - } -} - -torch::Tensor -LibtorchDRLControlTrainer::evaluateValue(torch::Tensor & input) -{ - return _critic_nn->forward(input); -} - void LibtorchDRLControlTrainer::resetData() { - for (auto & data : _state_data) - data.clear(); - for (auto & data : _next_state_data) - data.clear(); - for (auto & data : _action_data) - data.clear(); - for (auto & data : _log_probability_data) - data.clear(); - - _reward_data.clear(); - _return_data.clear(); - _gae_data.clear(); - _delta_data.clear(); - - + _trajectory_buffer.clear(); _update_counter = _update_frequency; } void -LibtorchDRLControlTrainer::getResponseDataFromReporter( - std::vector>> & data_current, - std::vector>> & data_next, - const std::vector> *> & reporter_links, - const unsigned int num_timesteps) +LibtorchDRLControlTrainer::collectTrajectoriesFromReporters() { - for (const auto & state_i : index_range(reporter_links)) + for (const auto sample_i : index_range(*_reward_value_pointer)) { - // Fetch the vector of time series for a given reporter - const std::vector> & reporter_data = *reporter_links[state_i]; - - // Made it to the inner loop which is just the different samples - for (const auto & start_i : make_range(num_timesteps)) + const auto & reward_sample = (*_reward_value_pointer)[sample_i]; + const auto num_transitions = computeNumTransitions(reward_sample.size()); + if (!num_transitions) + continue; + + std::vector> normalized_responses(_state_names.size()); + for (const auto state_i : index_range(_state_value_pointers)) + normalized_responses[state_i] = extractDownsampledSequence( + (*_state_value_pointers[state_i])[sample_i], 0, num_transitions + 1); + + _observation_history.normalizeTrajectoryInPlace(normalized_responses); + + LibtorchRLTrajectoryBuffer::Trajectory trajectory; + trajectory.observations.reserve(num_transitions); + trajectory.next_observations.reserve(num_transitions); + trajectory.actions.assign(num_transitions, std::vector()); + trajectory.log_probabilities.assign(num_transitions, std::vector()); + + for (auto & action_row : trajectory.actions) + action_row.reserve(_action_names.size()); + for (auto & log_probability_row : trajectory.log_probabilities) + log_probability_row.reserve(_log_probability_names.size()); + + for (const auto step_i : make_range(num_transitions)) { - const auto input_i = start_i*reporter_links.size() + state_i; - for (const auto & sample : reporter_data) - { - const unsigned int sample_vector_size = sample.size() - _shift_outputs; - const unsigned int num_entries_kept = sample_vector_size / _timestep_window; - std::vector split_sample(num_entries_kept, 0.0); - std::vector next_split_sample(num_entries_kept, 0.0); - - unsigned int current_real_i = 0; - unsigned int next_current_real_i = 0; - for (unsigned int time_i = 0; time_i < sample_vector_size; ++time_i) - { - if (!(time_i % _timestep_window)) - { - if (time_i < start_i) - split_sample[current_real_i] = sample[0]; - else - { - const auto shifted_i = time_i - start_i; - split_sample[current_real_i] = sample[shifted_i]; - } - current_real_i++; - } - - if (!(time_i % _timestep_window) && (time_i + _timestep_window < sample_vector_size + _shift_outputs)) - { - const auto shifted_i = time_i + _timestep_window - start_i; - next_split_sample[next_current_real_i] = sample[shifted_i]; - next_current_real_i++; - } - } - - data_current[input_i].push_back(std::move(split_sample)); - data_next[input_i].push_back(std::move(next_split_sample)); - } + trajectory.observations.push_back( + _observation_history.stackTrajectoryObservation(normalized_responses, step_i)); + trajectory.next_observations.push_back( + _observation_history.stackTrajectoryObservation(normalized_responses, step_i + 1)); } - } - // std::cout << " finished " << std::endl; -} -void LibtorchDRLControlTrainer::normalizeResponseData(std::vector>> & data, - const unsigned int num_timesteps) -{ - // std::cout << " Normalizing " << Moose::stringify(data) << std::endl; - // We have multiple reporters, each has a time series for each sample - const auto num_reporters = data.size() / num_timesteps; - for (const auto & rep_i : make_range(num_reporters)) - { - // We shift and scale the inputs to get better training efficiency - for (const auto & start_step : make_range(num_timesteps)) + for (const auto action_i : index_range(_action_value_pointers)) { - unsigned int real_i = num_reporters * start_step + rep_i; + const auto action_sequence = extractDownsampledSequence( + (*_action_value_pointers[action_i])[sample_i], _shift_outputs, num_transitions); + const auto log_probability_sequence = extractDownsampledSequence( + (*_log_probability_value_pointers[action_i])[sample_i], _shift_outputs, num_transitions); - for (const auto sample_i : index_range(data[real_i])) + for (const auto step_i : make_range(num_transitions)) { - std::transform( - data[real_i][sample_i].begin(), - data[real_i][sample_i].end(), - data[real_i][sample_i].begin(), - [this, &rep_i](Real value) -> Real - { return (value - _state_shift_factors[rep_i]) * _state_scaling_factors[rep_i]; }); + trajectory.actions[step_i].push_back(action_sequence[step_i]); + trajectory.log_probabilities[step_i].push_back(log_probability_sequence[step_i]); } } + + trajectory.rewards = + extractDownsampledSequence(reward_sample, _timestep_window, num_transitions); + + _trajectory_buffer.addTrajectory(std::move(trajectory)); } } -void -LibtorchDRLControlTrainer::getSignalDataFromReporter( - std::vector>> & data, - const std::vector> *> & reporter_links) +unsigned int +LibtorchDRLControlTrainer::computeNumTransitions(const std::size_t raw_sequence_size) const { - for (const auto & action_i : index_range(reporter_links)) - { - // Fetch the vector of time series for a given reporter - const std::vector> & reporter_data = *reporter_links[action_i]; - - for (const auto & sample : reporter_data) - { - const unsigned int sample_vector_size = sample.size() - _shift_outputs; - const unsigned int num_entries_kept = sample_vector_size / _timestep_window; - std::vector action_for_sample(num_entries_kept, 0.0); + unsigned int num_transitions = 0; + for (std::size_t raw_index = 0; + raw_index + _timestep_window < raw_sequence_size + static_cast(_shift_outputs); + raw_index += _timestep_window) + ++num_transitions; - unsigned int real_i = 0; - for (const auto time_i : make_range(sample_vector_size)) - if (!(time_i % _timestep_window)) - { - action_for_sample[real_i] = sample[time_i + _shift_outputs]; - real_i++; - } - - data[action_i].push_back(std::move(action_for_sample)); - } - } + return num_transitions; } -void -LibtorchDRLControlTrainer::getRewardDataFromReporter(std::vector> & data, - const std::vector> * const reporter_link) +std::vector +LibtorchDRLControlTrainer::extractDownsampledSequence(const std::vector & sample, + const unsigned int offset, + const unsigned int num_entries) const { - // Fetch the vector of time series for a given reporter - const std::vector> & reporter_data = *reporter_link; + std::vector values; + values.reserve(num_entries); - for (const auto & sample : reporter_data) + for (const auto entry_i : make_range(num_entries)) { - const unsigned int sample_vector_size = sample.size() - _shift_outputs; - const unsigned int num_entries_kept = sample_vector_size / _timestep_window; - - std::vector reward_for_sample(num_entries_kept, 0.0); - - unsigned int real_i = 0; - for (const auto time_i : make_range(sample_vector_size)) - if (!(time_i % _timestep_window) && (time_i + _timestep_window < sample_vector_size + _shift_outputs)) - { - reward_for_sample[real_i] = sample[time_i + _timestep_window]; - real_i++; - } - - data.push_back(std::move(reward_for_sample)); + const auto raw_index = offset + entry_i * _timestep_window; + if (raw_index >= sample.size()) + mooseError("Reporter data is shorter than required by the configured timestep window and " + "history stacking."); + values.push_back(sample[raw_index]); } - // Fill the corresponding container - // for (const auto sample_i : index_range(*reporter_link)) - // { - // for (const unsigned int state_i = _shift_outputs; state_i < (*reporter_link)[sample_i].size(); state_i++) - // { - // if (!((state_i - _shift_outputs) % _timestep_window)) - // data.push_back((*reporter_link)[sample_i][state_i]); - // } - // } - // data.insert(data.end(), (*reporter_link)[sample_i].begin() + _shift_outputs, (*reporter_link)[sample_i].end()); + return values; } void From 559a99d93513aeae38cdf5f4255cdc7d08666861 Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 21 Apr 2026 14:45:53 -0600 Subject: [PATCH 28/51] Add unit tests for the controller. --- .../unit/src/TestLibtorchRLCore.C | 129 ++++++++++++++++++ unit/src/LibtorchActorNeuralNetTest.C | 10 +- 2 files changed, 133 insertions(+), 6 deletions(-) create mode 100644 modules/stochastic_tools/unit/src/TestLibtorchRLCore.C diff --git a/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C b/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C new file mode 100644 index 000000000000..1ec9cef14108 --- /dev/null +++ b/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C @@ -0,0 +1,129 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "gtest/gtest.h" + +#include "LibtorchActorNeuralNet.h" +#include "LibtorchArtificialNeuralNet.h" +#include "LibtorchObservationHistory.h" +#include "LibtorchRLMiniBatchSampler.h" +#include "LibtorchRLPPOLoss.h" +#include "LibtorchRLTrajectoryBuffer.h" +#include "LibtorchRLValueEstimator.h" + +#include + +namespace +{ + +TEST(LibtorchRLCoreTest, ObservationHistoryStacksCurrentAndTrajectoryData) +{ + LibtorchObservationHistory history(3, {10.0, -2.0}, {0.5, 2.0}); + + const auto normalized = history.normalize({16.0, -1.0}); + EXPECT_DOUBLE_EQ(normalized[0], 3.0); + EXPECT_DOUBLE_EQ(normalized[1], 2.0); + + std::vector> old_responses; + history.initializeHistory({1.0, 6.0}, old_responses); + + const auto stacked_current = history.stackCurrentObservation(normalized, old_responses); + EXPECT_EQ(stacked_current, std::vector({3.0, 2.0, 1.0, 6.0, 1.0, 6.0})); + + std::vector> trajectories = {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}; + const auto stacked_trajectory = history.stackTrajectoryObservation(trajectories, 2); + EXPECT_EQ(stacked_trajectory, std::vector({3.0, 6.0, 2.0, 5.0, 1.0, 4.0})); +} + +TEST(LibtorchRLCoreTest, ValueEstimatorComputesGAETargets) +{ + Moose::LibtorchArtificialNeuralNet value_network("value", 1, 1, {}, {"linear"}); + auto value_params = value_network.named_parameters(); + value_params[0].value().data().fill_(1.0); + value_params[1].value().data().fill_(0.0); + + LibtorchRLTrajectoryBuffer::Trajectory trajectory; + trajectory.observations = {{1.0}, {2.0}}; + trajectory.next_observations = {{2.0}, {3.0}}; + trajectory.rewards = {0.5, 1.0}; + + LibtorchRLValueEstimator estimator(0.9, 0.95); + const auto targets = estimator.estimate(trajectory, value_network); + + ASSERT_EQ(targets.advantages.size(), 2u); + ASSERT_EQ(targets.value_targets.size(), 2u); + EXPECT_NEAR(targets.advantages[0], 2.7535, 1e-12); + EXPECT_NEAR(targets.advantages[1], 1.7, 1e-12); + EXPECT_NEAR(targets.value_targets[0], 3.7535, 1e-12); + EXPECT_NEAR(targets.value_targets[1], 3.7, 1e-12); +} + +TEST(LibtorchRLCoreTest, PPOLossUsesStoredLogProbabilityAndValueTarget) +{ + constexpr Real pi = 3.14159265358979323846; + + Moose::LibtorchActorNeuralNet policy_network("policy", 1, 1, {}, {"linear"}); + policy_network.actionDistributionHead().primaryModule()->weight.data().fill_(0.0); + policy_network.actionDistributionHead().secondaryModule()->weight.data().fill_(0.0); + + Moose::LibtorchArtificialNeuralNet value_network("value", 1, 1, {}, {"linear"}); + auto value_params = value_network.named_parameters(); + value_params[0].value().data().fill_(0.0); + value_params[1].value().data().fill_(1.0); + + LibtorchRLMiniBatch batch; + batch.observations = torch::zeros({1, 1}, torch::TensorOptions().dtype(torch::kDouble)); + batch.actions = torch::zeros({1, 1}, torch::TensorOptions().dtype(torch::kDouble)); + batch.old_log_probabilities = + torch::tensor({{-0.5 * std::log(2.0 * pi)}}, torch::TensorOptions().dtype(torch::kDouble)); + batch.value_targets = torch::tensor({{1.5}}, torch::TensorOptions().dtype(torch::kDouble)); + batch.advantages = torch::tensor({{2.0}}, torch::TensorOptions().dtype(torch::kDouble)); + + LibtorchRLPPOLoss loss(0.2, 0.01); + const auto loss_values = loss.compute(policy_network, value_network, batch); + + const Real expected_entropy = 0.5 * std::log(2.0 * pi) + 0.5; + const Real expected_actor_loss = -(2.0 + 0.01 * expected_entropy); + const Real expected_critic_loss = 0.25; + + EXPECT_NEAR(loss_values.entropy.item(), expected_entropy, 1e-12); + EXPECT_NEAR(loss_values.actor_loss.item(), expected_actor_loss, 1e-12); + EXPECT_NEAR(loss_values.critic_loss.item(), expected_critic_loss, 1e-12); +} + +TEST(LibtorchRLCoreTest, MiniBatchSamplerStandardizesAdvantagesPerBatch) +{ + LibtorchRLTrajectoryBuffer::TensorBatch batch; + batch.observations = + torch::tensor({{0.0}, {1.0}, {2.0}, {3.0}}, torch::TensorOptions().dtype(torch::kDouble)); + batch.actions = + torch::tensor({{0.1}, {0.2}, {0.3}, {0.4}}, torch::TensorOptions().dtype(torch::kDouble)); + batch.log_probabilities = + torch::tensor({{-1.0}, {-1.1}, {-1.2}, {-1.3}}, torch::TensorOptions().dtype(torch::kDouble)); + batch.value_targets = + torch::tensor({{1.0}, {2.0}, {3.0}, {4.0}}, torch::TensorOptions().dtype(torch::kDouble)); + batch.advantages = + torch::tensor({{1.0}, {2.0}, {3.0}, {4.0}}, torch::TensorOptions().dtype(torch::kDouble)); + + LibtorchRLMiniBatchSampler sampler; + const auto mini_batches = sampler.sample(batch, 2, true); + + ASSERT_EQ(mini_batches.size(), 2u); + for (const auto & mini_batch : mini_batches) + { + ASSERT_EQ(mini_batch.size(), 2); + EXPECT_NEAR(mini_batch.advantages.mean().item(), 0.0, 1e-12); + } +} + +} // namespace + +#endif diff --git a/unit/src/LibtorchActorNeuralNetTest.C b/unit/src/LibtorchActorNeuralNetTest.C index 07f7bf12f05c..764509c1adbf 100644 --- a/unit/src/LibtorchActorNeuralNetTest.C +++ b/unit/src/LibtorchActorNeuralNetTest.C @@ -20,8 +20,6 @@ namespace class TestableLibtorchActorNeuralNet : public Moose::LibtorchActorNeuralNet { public: - using Moose::LibtorchActorNeuralNet::_alpha_module; - using Moose::LibtorchActorNeuralNet::_beta_module; using Moose::LibtorchActorNeuralNet::_weights; using Moose::LibtorchActorNeuralNet::LibtorchActorNeuralNet; }; @@ -46,13 +44,13 @@ TEST(LibtorchActorNeuralNetTest, boundedBetaLogProbability) "test_beta", 1, 1, {1}, {"linear"}, {min_value}, {max_value}); ASSERT_EQ(network._weights.size(), 1); - ASSERT_EQ(network._alpha_module.size(), 1); - ASSERT_EQ(network._beta_module.size(), 1); network._weights[0]->weight.data().fill_(0.0); network._weights[0]->bias.data().fill_(1.0); - network._alpha_module[0]->weight.data().fill_(inverseSoftplusPlusOne(alpha_target)); - network._beta_module[0]->weight.data().fill_(inverseSoftplusPlusOne(beta_target)); + network.actionDistributionHead().primaryModule()->weight.data().fill_( + inverseSoftplusPlusOne(alpha_target)); + network.actionDistributionHead().secondaryModule()->weight.data().fill_( + inverseSoftplusPlusOne(beta_target)); auto input = torch::zeros({1, 1}, at::kDouble); network.evaluate(input, false); From 661d0536301925a9113588ebc6a1bd032944a4d7 Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 21 Apr 2026 15:40:06 -0600 Subject: [PATCH 29/51] Move the scaling to the network itself instead of always doing it on the trainer/control object. (#32511) --- .../utils/LibtorchActionDistributionHead.h | 7 +- .../libtorch/utils/LibtorchActorNeuralNet.h | 46 ++-- .../utils/LibtorchArtificialNeuralNet.h | 45 +++- .../utils/LibtorchObservationHistory.h | 2 + .../controls/LibtorchNeuralNetControl.C | 35 ++- .../utils/LibtorchActionDistributionHead.C | 105 ++++++-- .../libtorch/utils/LibtorchActorNeuralNet.C | 252 ++++++++++++++++-- .../utils/LibtorchArtificialNeuralNet.C | 197 ++++++++++++-- .../utils/LibtorchObservationHistory.C | 18 ++ .../libtorch_drl_control_trainer.i | 1 + .../surrogates/LibtorchDRLControlTrainer.h | 3 + .../libtorch/controls/LibtorchDRLControl.C | 79 ++---- .../libtorch/trainers/LibtorchANNTrainer.C | 2 +- .../trainers/LibtorchDRLControlTrainer.C | 44 ++- .../gold/parameter_read.csv | 20 +- .../libtorch_drl_control_trainer.i | 27 +- unit/src/LibtorchActorNeuralNetTest.C | 81 ++++++ 17 files changed, 781 insertions(+), 183 deletions(-) diff --git a/framework/include/libtorch/utils/LibtorchActionDistributionHead.h b/framework/include/libtorch/utils/LibtorchActionDistributionHead.h index 254d9fbcf3c7..359c7771b14b 100644 --- a/framework/include/libtorch/utils/LibtorchActionDistributionHead.h +++ b/framework/include/libtorch/utils/LibtorchActionDistributionHead.h @@ -37,7 +37,8 @@ class LibtorchActionDistributionHead : public torch::nn::Module const std::vector & maximum_values = {}, torch::DeviceType device_type = torch::kCPU, torch::ScalarType scalar_type = torch::kDouble, - bool build_on_construct = true); + bool build_on_construct = true, + const std::vector & output_scaling_factors = {}); LibtorchActionDistributionHead(const LibtorchActionDistributionHead & head, bool build_on_construct = true); @@ -46,6 +47,8 @@ class LibtorchActionDistributionHead : public torch::nn::Module void initialize(); + void synchronizeScalingFactorsFromBuffer(); + void reset(const torch::Tensor & input); torch::Tensor sample() const; @@ -76,10 +79,12 @@ class LibtorchActionDistributionHead : public torch::nn::Module const std::vector _maximum_values; const torch::DeviceType _device_type; const torch::ScalarType _data_type; + std::vector _output_scaling_factors; torch::nn::Linear _primary_parameter_module{nullptr}; torch::nn::Linear _secondary_parameter_module{nullptr}; + torch::Tensor _action_scale_tensor; torch::Tensor _min_tensor; torch::Tensor _max_tensor; torch::Tensor _alpha_tensor; diff --git a/framework/include/libtorch/utils/LibtorchActorNeuralNet.h b/framework/include/libtorch/utils/LibtorchActorNeuralNet.h index 149550181ae4..514d6eac20ce 100644 --- a/framework/include/libtorch/utils/LibtorchActorNeuralNet.h +++ b/framework/include/libtorch/utils/LibtorchActorNeuralNet.h @@ -38,14 +38,17 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet const std::vector & maximum_values = {}, const torch::DeviceType device_type = torch::kCPU, const torch::ScalarType scalar_type = torch::kDouble, - const bool build_on_construct = true); + const bool build_on_construct = true, + const std::vector & input_shift_factors = {}, + const std::vector & input_scaling_factors = {}, + const std::vector & output_scaling_factors = {}); /** * Copy construct an artificial neural network * @param nn The neural network which needs to be copied */ LibtorchActorNeuralNet(const Moose::LibtorchActorNeuralNet & nn, - const bool build_on_construct = true); + const bool build_on_construct = true); /** * Overriding the forward substitution function for the neural network, unfortunately @@ -61,11 +64,11 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet /// Construct the neural network virtual void constructNeuralNetwork() override; - const torch::Tensor & stdTensor() const {return _std_tensor;} + const torch::Tensor & stdTensor() const { return _std_tensor; } - const torch::Tensor & alphaTensor() const {return _alpha_tensor;} + const torch::Tensor & alphaTensor() const { return _alpha_tensor; } - const torch::Tensor & betaTensor() const {return _beta_tensor;} + const torch::Tensor & betaTensor() const { return _beta_tensor; } const LibtorchActionDistributionHead & actionDistributionHead() const { return *_action_head; } LibtorchActionDistributionHead & actionDistributionHead() { return *_action_head; } @@ -87,29 +90,38 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet void to_json(nlohmann::json & json, const Moose::LibtorchActorNeuralNet * const & network); +void loadLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, + const std::string & filename); + +bool isLegacyLibtorchActorArchive(const std::string & filename); + +void loadLegacyLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, + const std::string & filename, + const std::vector & action_standard_deviations); + } template <> -void dataStore( - std::ostream & stream, - std::shared_ptr & nn, - void * context); +void dataStore(std::ostream & stream, + std::shared_ptr & nn, + void * context); template <> -void dataLoad( - std::istream & stream, - std::shared_ptr & nn, - void * context); +void dataLoad(std::istream & stream, + std::shared_ptr & nn, + void * context); // This is needed because the reporter which is used to ouput the neural net parameters to JSON // requires a dataStore/dataLoad. However, these functions will be empty due to the fact that // we are only interested in the JSON output and we don't want to output everything template <> -void dataStore( - std::ostream & stream, Moose::LibtorchActorNeuralNet const *& nn, void * context); +void dataStore(std::ostream & stream, + Moose::LibtorchActorNeuralNet const *& nn, + void * context); template <> -void dataLoad( - std::istream & stream, Moose::LibtorchActorNeuralNet const *& nn, void * context); +void dataLoad(std::istream & stream, + Moose::LibtorchActorNeuralNet const *& nn, + void * context); #endif diff --git a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h index f9e498c81c04..ec3a1a76e16e 100644 --- a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h +++ b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h @@ -13,6 +13,7 @@ #include #include +#include #include "LibtorchNeuralNetBase.h" #include "MooseError.h" #include "DataIO.h" @@ -42,14 +43,17 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu const std::vector & maximum_values = {}, const torch::DeviceType device_type = torch::kCPU, const torch::ScalarType scalar_type = torch::kDouble, - const bool build_on_construct = true); + const bool build_on_construct = true, + const std::vector & input_shift_factors = {}, + const std::vector & input_scaling_factors = {}, + const std::vector & output_scaling_factors = {}); /** * Copy construct an artificial neural network * @param nn The neural network which needs to be copied */ LibtorchArtificialNeuralNet(const Moose::LibtorchArtificialNeuralNet & nn, - const bool build_on_construct = true); + const bool build_on_construct = true); /** * Add layers to the neural network @@ -83,21 +87,42 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu torch::DeviceType deviceType() const { return _device_type; } /// Return the data type which is used by this neural network torch::ScalarType dataType() const { return _data_type; } + /// Return the affine input shift factors used before evaluation + const std::vector & inputShiftFactors() const { return _input_shift_factors; } + /// Return the affine input scaling factors used before evaluation + const std::vector & inputScalingFactors() const { return _input_scaling_factors; } + /// Return the output scaling factors applied after evaluation + const std::vector & outputScalingFactors() const { return _output_scaling_factors; } /// Construct the neural network virtual void constructNeuralNetwork(); + /// Update cached affine metadata vectors from the registered libtorch buffers. + void synchronizeAffineFactorsFromBuffers(); + Real determineGain(const std::string & activation); virtual void initializeNeuralNetwork(); - const std::vector & minValues() const {return _minimum_values;}; + const std::vector & minValues() const { return _minimum_values; }; - const std::vector & maxValues() const {return _maximum_values;}; + const std::vector & maxValues() const { return _maximum_values; }; /// Store the network architecture in a json file (for debugging, visualization) void store(nlohmann::json & json) const; protected: + static std::vector normalizeAffineFactors(const std::vector & factors, + unsigned int expected_size, + Real default_value, + const std::string & factor_name, + bool forbid_zero = false); + + void initializeAffineBuffers(); + + virtual torch::Tensor preprocessInput(const torch::Tensor & x) const; + + virtual torch::Tensor scaleOutput(const torch::Tensor & y) const; + /// Name of the neural network const std::string _name; /// Submodules that hold linear operations and the corresponding @@ -116,15 +141,27 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu const torch::DeviceType _device_type; /// The data type used in this neural network const torch::ScalarType _data_type; + /// Affine preprocessing applied to the flattened input + std::vector _input_shift_factors; + /// Multiplicative affine preprocessing applied after shifting the input + std::vector _input_scaling_factors; + /// Multiplicative scaling applied after the network output is formed + std::vector _output_scaling_factors; /// const std::vector _minimum_values; const std::vector _maximum_values; + torch::Tensor _input_shift_tensor; + torch::Tensor _input_scale_tensor; + torch::Tensor _output_scale_tensor; torch::Tensor _min_tensor; torch::Tensor _max_tensor; }; void to_json(nlohmann::json & json, const Moose::LibtorchArtificialNeuralNet * const & network); +void loadLibtorchArtificialNeuralNetState(Moose::LibtorchArtificialNeuralNet & nn, + const std::string & filename); + } template <> diff --git a/framework/include/libtorch/utils/LibtorchObservationHistory.h b/framework/include/libtorch/utils/LibtorchObservationHistory.h index b8c6c7c20a3c..d4ba6970bb46 100644 --- a/framework/include/libtorch/utils/LibtorchObservationHistory.h +++ b/framework/include/libtorch/utils/LibtorchObservationHistory.h @@ -40,6 +40,8 @@ class LibtorchObservationHistory void advanceHistory(const std::vector & normalized_response, std::vector> & old_responses) const; + std::vector expandFeatureFactors(const std::vector & feature_factors) const; + std::vector stackCurrentObservation( const std::vector & normalized_response, const std::vector> & old_responses) const; diff --git a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C index 2f4bd5ac3c25..517da31ef7fa 100644 --- a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C +++ b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C @@ -55,7 +55,8 @@ LibtorchNeuralNetControl::validParams() params.addParam>( "action_scaling_factors", - "Scale factor that multiplies the NN output to obtain a physically meaningful value."); + "Scale factors embedded into constructed neural-network outputs so checkpointed policies " + "carry their physical-unit action scaling."); return params; } @@ -76,8 +77,7 @@ LibtorchNeuralNetControl::LibtorchNeuralNetControl(const InputParameters & param _action_scaling_factors(isParamValid("action_scaling_factors") ? getParam>("action_scaling_factors") : std::vector(_control_names.size(), 1.0)), - _observation_history( - _input_timesteps, _response_shift_factors, _response_scaling_factors) + _observation_history(_input_timesteps, _response_shift_factors, _response_scaling_factors) { // We first check if the input parameters make sense and throw errors if different parameter // combinations are not allowed @@ -106,7 +106,7 @@ LibtorchNeuralNetControl::LibtorchNeuralNetControl(const InputParameters & param // If the user wants to read the neural net from file, we do it. We can read it from a // torchscript file, or we can create a shell and read back the parameters. - if (parameters.isParamSetByUser("filename")) + if (parameters.isParamSetByUser("filename") && type() == "LibtorchNeuralNetControl") this->loadControlNeuralNetFromFile(parameters); } @@ -126,12 +126,27 @@ LibtorchNeuralNetControl::loadControlNeuralNetFromFile(const InputParameters & p parameters.isParamSetByUser("activation_function") ? getParam>("activation_function") : std::vector({"relu"}); - auto nn = std::make_shared( - filename, num_inputs, num_outputs, num_neurons_per_layer, activation_functions); + const auto input_shift_factors = + _observation_history.expandFeatureFactors(_response_shift_factors); + const auto input_scaling_factors = + _observation_history.expandFeatureFactors(_response_scaling_factors); + auto nn = std::make_shared(filename, + num_inputs, + num_outputs, + num_neurons_per_layer, + activation_functions, + std::vector(), + std::vector(), + torch::kCPU, + torch::kDouble, + true, + input_shift_factors, + input_scaling_factors, + _action_scaling_factors); try { - torch::load(nn, filename); + Moose::loadLibtorchArtificialNeuralNetState(*nn, filename); _nn = std::make_shared(*nn); } catch (const c10::Error & e) @@ -171,10 +186,8 @@ LibtorchNeuralNetControl::execute() _current_control_signals = {action.data_ptr(), action.data_ptr() + action.size(1)}; for (unsigned int control_i = 0; control_i < n_controls; ++control_i) { - // We scale the controllable value for physically meaningful control action setControllableValueByName(_control_names[control_i], - _current_control_signals[control_i] * - _action_scaling_factors[control_i]); + _current_control_signals[control_i]); } // We add the curent solution to the old solutions and move everything in there one step @@ -219,7 +232,7 @@ LibtorchNeuralNetControl::updateCurrentResponse() for (const auto & resp_i : index_range(_response_names)) raw_response.push_back(*_response_values[resp_i]); - _current_response = _observation_history.normalize(raw_response); + _current_response = raw_response; } void diff --git a/framework/src/libtorch/utils/LibtorchActionDistributionHead.C b/framework/src/libtorch/utils/LibtorchActionDistributionHead.C index 4037473f5ba0..bcd4ff456c42 100644 --- a/framework/src/libtorch/utils/LibtorchActionDistributionHead.C +++ b/framework/src/libtorch/utils/LibtorchActionDistributionHead.C @@ -19,6 +19,27 @@ #include #include "libmesh/utility.h" +namespace +{ + +std::vector +normalizeActionScalingFactors(const std::vector & factors, const unsigned int expected_size) +{ + const auto normalized = factors.empty() ? std::vector(expected_size, 1.0) : factors; + + if (normalized.size() != expected_size) + mooseError("The number of output_scaling_factors entries must match the number of action " + "outputs."); + + for (const auto factor : normalized) + if (std::abs(factor) == 0.0) + mooseError("The output_scaling_factors entries must be non-zero."); + + return normalized; +} + +} // namespace + namespace Moose { @@ -30,15 +51,22 @@ LibtorchActionDistributionHead::LibtorchActionDistributionHead( const std::vector & maximum_values, const torch::DeviceType device_type, const torch::ScalarType data_type, - const bool build_on_construct) + const bool build_on_construct, + const std::vector & output_scaling_factors) : _name(name), _num_inputs(num_inputs), _num_outputs(num_outputs), _minimum_values(minimum_values), _maximum_values(maximum_values), _device_type(device_type), - _data_type(data_type) + _data_type(data_type), + _output_scaling_factors(normalizeActionScalingFactors(output_scaling_factors, num_outputs)) { + auto action_scale = _output_scaling_factors; + LibtorchUtils::vectorToTensor(action_scale, _action_scale_tensor); + _action_scale_tensor = register_buffer( + "action_scale", _action_scale_tensor.transpose(0, 1).to(_data_type).to(_device_type)); + const bool has_minimum_values = !_minimum_values.empty(); const bool has_maximum_values = !_maximum_values.empty(); if (has_minimum_values != has_maximum_values) @@ -56,10 +84,10 @@ LibtorchActionDistributionHead::LibtorchActionDistributionHead( auto min_value = _minimum_values; LibtorchUtils::vectorToTensor(min_value, _min_tensor); - _min_tensor = _min_tensor.to(_data_type).to(_device_type); + _min_tensor = _min_tensor.transpose(0, 1).to(_data_type).to(_device_type); auto max_value = _maximum_values; LibtorchUtils::vectorToTensor(max_value, _max_tensor); - _max_tensor = _max_tensor.to(_data_type).to(_device_type); + _max_tensor = _max_tensor.transpose(0, 1).to(_data_type).to(_device_type); } if (build_on_construct) @@ -67,8 +95,7 @@ LibtorchActionDistributionHead::LibtorchActionDistributionHead( } LibtorchActionDistributionHead::LibtorchActionDistributionHead( - const LibtorchActionDistributionHead & head, - const bool build_on_construct) + const LibtorchActionDistributionHead & head, const bool build_on_construct) : torch::nn::Module(), _name(head._name), _num_inputs(head._num_inputs), @@ -76,16 +103,22 @@ LibtorchActionDistributionHead::LibtorchActionDistributionHead( _minimum_values(head._minimum_values), _maximum_values(head._maximum_values), _device_type(head._device_type), - _data_type(head._data_type) + _data_type(head._data_type), + _output_scaling_factors(head._output_scaling_factors) { + auto action_scale = _output_scaling_factors; + LibtorchUtils::vectorToTensor(action_scale, _action_scale_tensor); + _action_scale_tensor = register_buffer( + "action_scale", _action_scale_tensor.transpose(0, 1).to(_data_type).to(_device_type)); + if (_minimum_values.size()) { auto min_value = _minimum_values; LibtorchUtils::vectorToTensor(min_value, _min_tensor); - _min_tensor = _min_tensor.to(_data_type).to(_device_type); + _min_tensor = _min_tensor.transpose(0, 1).to(_data_type).to(_device_type); auto max_value = _maximum_values; LibtorchUtils::vectorToTensor(max_value, _max_tensor); - _max_tensor = _max_tensor.to(_data_type).to(_device_type); + _max_tensor = _max_tensor.transpose(0, 1).to(_data_type).to(_device_type); } if (build_on_construct) @@ -95,6 +128,11 @@ LibtorchActionDistributionHead::LibtorchActionDistributionHead( auto to_params = this->named_parameters(); for (const auto param_i : make_range(from_params.size())) to_params[param_i].value().data() = from_params[param_i].value().data().clone(); + + const auto & from_buffers = head.named_buffers(); + auto to_buffers = this->named_buffers(); + for (const auto buffer_i : make_range(from_buffers.size())) + to_buffers[buffer_i].value().data() = from_buffers[buffer_i].value().data().clone(); } } @@ -106,10 +144,10 @@ LibtorchActionDistributionHead::constructHead() _primary_parameter_module = register_module( primary_name, - torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(false))); + torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true))); _secondary_parameter_module = register_module( secondary_name, - torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(false))); + torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true))); _primary_parameter_module->to(_device_type, _data_type); _secondary_parameter_module->to(_device_type, _data_type); @@ -121,12 +159,21 @@ LibtorchActionDistributionHead::initialize() const auto primary_sizes = _primary_parameter_module->weight.sizes(); const auto primary_max_dim_size = *std::max_element(primary_sizes.begin(), primary_sizes.end()); torch::nn::init::orthogonal_(_primary_parameter_module->weight, 1.0 / primary_max_dim_size); + torch::nn::init::zeros_(_primary_parameter_module->bias); const auto secondary_sizes = _secondary_parameter_module->weight.sizes(); const auto secondary_max_dim_size = *std::max_element(secondary_sizes.begin(), secondary_sizes.end()); - torch::nn::init::orthogonal_( - _secondary_parameter_module->weight, 1.0 / secondary_max_dim_size); + torch::nn::init::orthogonal_(_secondary_parameter_module->weight, 1.0 / secondary_max_dim_size); + torch::nn::init::zeros_(_secondary_parameter_module->bias); +} + +void +LibtorchActionDistributionHead::synchronizeScalingFactorsFromBuffer() +{ + auto action_scale = + _action_scale_tensor.detach().reshape({-1}).to(torch::kCPU).to(torch::kDouble); + LibtorchUtils::tensorToVector(action_scale, _output_scaling_factors); } void @@ -166,31 +213,41 @@ LibtorchActionDistributionHead::sample() const const auto alpha_sample = at::_standard_gamma(_alpha_tensor); const auto beta_sample = at::_standard_gamma(_beta_tensor); const auto sampled = alpha_sample / (alpha_sample + beta_sample); - return _min_tensor + (_max_tensor - _min_tensor) * sampled; + return (_min_tensor + (_max_tensor - _min_tensor) * sampled) * _action_scale_tensor; } - return at::normal(_mean, _std_tensor); + return at::normal(_mean, _std_tensor) * _action_scale_tensor; } torch::Tensor LibtorchActionDistributionHead::deterministicAction() const { if (isBounded()) - return _min_tensor + (_max_tensor - _min_tensor) * _mean; + return (_min_tensor + (_max_tensor - _min_tensor) * _mean) * _action_scale_tensor; - return _mean; + return _mean * _action_scale_tensor; } torch::Tensor LibtorchActionDistributionHead::logProbability(const torch::Tensor & action) const { + auto scaled_action = action; + if (_data_type != scaled_action.scalar_type()) + scaled_action = scaled_action.to(_data_type); + if (_device_type != scaled_action.device().type()) + scaled_action = scaled_action.to(_device_type); + + const auto log_action_scale = torch::log(torch::abs(_action_scale_tensor)); + const auto unscaled_action = scaled_action / _action_scale_tensor; + if (isBounded()) { const auto scale = torch::clamp_min(_max_tensor - _min_tensor, 1e-8); - const auto normalized = (action - _min_tensor) / scale; + const auto normalized = (unscaled_action - _min_tensor) / scale; const auto clipped = torch::clamp(normalized, 1e-8, 1.0 - 1e-8); auto log_prob = (_alpha_tensor - 1.0) * torch::log(clipped) + - (_beta_tensor - 1.0) * torch::log1p(-clipped) - _log_norm - torch::log(scale); + (_beta_tensor - 1.0) * torch::log1p(-clipped) - _log_norm - torch::log(scale) - + log_action_scale; const auto out_of_bounds = (normalized < 0.0) | (normalized > 1.0); if (out_of_bounds.any().item()) @@ -203,23 +260,25 @@ LibtorchActionDistributionHead::logProbability(const torch::Tensor & action) con constexpr Real pi = 3.14159265358979323846; const torch::Tensor var = _std_tensor * _std_tensor; - return -((action - _mean) * (action - _mean)) / (2.0 * var) - _log_std_tensor - - 0.5 * std::log(2.0 * pi); + return -((unscaled_action - _mean) * (unscaled_action - _mean)) / (2.0 * var) - _log_std_tensor - + 0.5 * std::log(2.0 * pi) - log_action_scale; } torch::Tensor LibtorchActionDistributionHead::entropy() const { + const auto log_action_scale = torch::log(torch::abs(_action_scale_tensor)); if (isBounded()) { const auto scale = torch::clamp_min(_max_tensor - _min_tensor, 1e-8); return _log_norm - (_beta_tensor - 1.0) * torch::digamma(_beta_tensor) - (_alpha_tensor - 1.0) * torch::digamma(_alpha_tensor) + - (_alpha_beta_tensor - 2.0) * torch::digamma(_alpha_beta_tensor) + torch::log(scale); + (_alpha_beta_tensor - 2.0) * torch::digamma(_alpha_beta_tensor) + torch::log(scale) + + log_action_scale; } constexpr Real pi = 3.14159265358979323846; - return 0.5 * std::log(2.0 * pi) + _log_std_tensor + 0.5; + return 0.5 * std::log(2.0 * pi) + _log_std_tensor + 0.5 + log_action_scale; } } // namespace Moose diff --git a/framework/src/libtorch/utils/LibtorchActorNeuralNet.C b/framework/src/libtorch/utils/LibtorchActorNeuralNet.C index 7a34bbc93581..c127020268ec 100644 --- a/framework/src/libtorch/utils/LibtorchActorNeuralNet.C +++ b/framework/src/libtorch/utils/LibtorchActorNeuralNet.C @@ -12,6 +12,74 @@ #include "LibtorchActorNeuralNet.h" #include "MooseError.h" +namespace +{ + +bool +readArchiveTensor(torch::serialize::InputArchive & archive, + const std::string & key, + torch::Tensor & tensor) +{ + try + { + archive.read(key, tensor); + return true; + } + catch (const c10::Error &) + { + return false; + } +} + +void +copyTensor(torch::Tensor & destination, const torch::Tensor & source) +{ + destination.data().copy_(source.to(destination.options())); +} + +bool +readActorStateTensor(torch::serialize::InputArchive & archive, + const std::string & key, + torch::Tensor & tensor) +{ + if (readArchiveTensor(archive, key, tensor)) + return true; + + if (key.rfind("action_head.", 0) == 0) + return readArchiveTensor(archive, key.substr(std::string("action_head.").size()), tensor); + + return false; +} + +bool +isOptionalActorBuffer(const std::string & key) +{ + return key == "input_shift" || key == "input_scale" || key == "output_scale" || + key == "action_head.action_scale"; +} + +bool +isOptionalActorParameter(const std::string & key) +{ + return key == "action_head.mean.bias" || key == "action_head.std.bias"; +} + +template +bool +findNamedTensor(const NamedTensorList & tensors, const std::string & key, torch::Tensor & tensor) +{ + for (const auto & entry : tensors) + if (entry.name == key) + { + tensor = entry.value; + return true; + } + + return false; +} + +} // namespace + namespace Moose { @@ -25,7 +93,10 @@ LibtorchActorNeuralNet::LibtorchActorNeuralNet( const std::vector & maximum_values, const torch::DeviceType device_type, const torch::ScalarType data_type, - const bool build_on_construct) + const bool build_on_construct, + const std::vector & input_shift_factors, + const std::vector & input_scaling_factors, + const std::vector & output_scaling_factors) : LibtorchArtificialNeuralNet(name, num_inputs, num_outputs, @@ -35,7 +106,10 @@ LibtorchActorNeuralNet::LibtorchActorNeuralNet( maximum_values, device_type, data_type, - false) + false, + input_shift_factors, + input_scaling_factors, + output_scaling_factors) { if (build_on_construct) constructNeuralNetwork(); @@ -54,6 +128,11 @@ LibtorchActorNeuralNet::LibtorchActorNeuralNet(const Moose::LibtorchActorNeuralN auto to_params = this->named_parameters(); for (unsigned int param_i : make_range(from_params.size())) to_params[param_i].value().data() = from_params[param_i].value().data().clone(); + + const auto & from_buffers = nn.named_buffers(); + auto to_buffers = this->named_buffers(); + for (unsigned int buffer_i : make_range(from_buffers.size())) + to_buffers[buffer_i].value().data() = from_buffers[buffer_i].value().data().clone(); } } @@ -91,14 +170,15 @@ LibtorchActorNeuralNet::constructNeuralNetwork() inp_neurons = _num_neurons_per_layer[i]; } - _action_head = std::make_shared( - "action_head", - inp_neurons, - _num_outputs, - _minimum_values, - _maximum_values, - _device_type, - _data_type); + _action_head = std::make_shared("action_head", + inp_neurons, + _num_outputs, + _minimum_values, + _maximum_values, + _device_type, + _data_type, + true, + _output_scaling_factors); register_module("action_head", _action_head); } @@ -120,11 +200,7 @@ LibtorchActorNeuralNet::resetDistributionParams(torch::Tensor input) torch::Tensor LibtorchActorNeuralNet::forward(const torch::Tensor & x) { - torch::Tensor output(x); - if (_data_type != output.scalar_type()) - output.to(_data_type); - if (_device_type != output.device().type()) - output.to(_device_type); + torch::Tensor output = preprocessInput(x); for (unsigned int i = 0; i < _weights.size(); ++i) { @@ -152,15 +228,7 @@ LibtorchActorNeuralNet::forward(const torch::Tensor & x) torch::Tensor LibtorchActorNeuralNet::evaluate(torch::Tensor & x, bool sampled) { - torch::Tensor output(x); - // std::cout << output << std::endl; - if (_data_type != output.scalar_type()) - output.to(_data_type); - if (_device_type != output.device().type()) - output.to(_device_type); - - // std::cout << "input" << output << std::endl; - output = forward(output); + torch::Tensor output = forward(x); // std::cout << "midresult" << output << std::endl; resetDistributionParams(output); @@ -183,6 +251,142 @@ LibtorchActorNeuralNet::logProbability(const torch::Tensor & action) return _action_head->logProbability(action); } +void +loadLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, const std::string & filename) +{ + torch::serialize::InputArchive archive; + archive.load_from(filename); + + for (auto & parameter : nn.named_parameters()) + { + torch::Tensor stored_tensor; + if (!readActorStateTensor(archive, parameter.key(), stored_tensor)) + { + if (isOptionalActorParameter(parameter.key())) + { + parameter.value().data().zero_(); + continue; + } + + mooseError("The requested pytorch parameter file could not be loaded. This can either be " + "the result of the file not existing or a misalignment in the generated " + "container and the data in the file. Make sure the dimensions of the generated " + "neural net are the same as the dimensions of the parameters in the input file!\n" + "Missing serialized parameter: ", + parameter.key()); + } + + copyTensor(parameter.value(), stored_tensor); + } + + for (auto & buffer : nn.named_buffers()) + { + torch::Tensor stored_tensor; + if (!readActorStateTensor(archive, buffer.key(), stored_tensor)) + { + if (isOptionalActorBuffer(buffer.key())) + continue; + + mooseError("The requested pytorch parameter file could not be loaded. This can either be " + "the result of the file not existing or a misalignment in the generated " + "container and the data in the file. Make sure the dimensions of the generated " + "neural net are the same as the dimensions of the parameters in the input file!\n" + "Missing serialized buffer: ", + buffer.key()); + } + + copyTensor(buffer.value(), stored_tensor); + } + + nn.synchronizeAffineFactorsFromBuffers(); + nn.actionDistributionHead().synchronizeScalingFactorsFromBuffer(); +} + +bool +isLegacyLibtorchActorArchive(const std::string & filename) +{ + try + { + const auto scripted = torch::jit::load(filename); + const auto parameters = scripted.named_parameters(); + + torch::Tensor ignored; + return findNamedTensor(parameters, "output_layer_.weight", ignored) && + !findNamedTensor(parameters, "action_head.mean.weight", ignored); + } + catch (const c10::Error &) + { + return false; + } +} + +void +loadLegacyLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, + const std::string & filename, + const std::vector & action_standard_deviations) +{ + if (nn.actionDistributionHead().isBounded()) + mooseError("Legacy deterministic DRL checkpoints are only supported for unbounded actors."); + + const auto legacy_std = action_standard_deviations.empty() + ? std::vector(nn.numOutputs(), 1e-12) + : action_standard_deviations; + + if (legacy_std.size() != nn.numOutputs()) + mooseError("The number of action_standard_deviations entries must match the number of action " + "outputs when loading a legacy deterministic DRL checkpoint."); + + for (const auto std_value : legacy_std) + if (!(std_value > 0.0)) + mooseError("Legacy action_standard_deviations entries must be strictly positive."); + + const auto scripted = torch::jit::load(filename); + const auto legacy_parameters = scripted.named_parameters(); + + for (auto & parameter : nn.named_parameters()) + { + const auto & key = parameter.key(); + torch::Tensor stored_tensor; + + if (key == "action_head.mean.weight") + { + if (!findNamedTensor(legacy_parameters, "output_layer_.weight", stored_tensor)) + mooseError("Legacy deterministic DRL checkpoint is missing output_layer_.weight."); + copyTensor(parameter.value(), stored_tensor); + continue; + } + + if (key == "action_head.mean.bias") + { + if (!findNamedTensor(legacy_parameters, "output_layer_.bias", stored_tensor)) + mooseError("Legacy deterministic DRL checkpoint is missing output_layer_.bias."); + copyTensor(parameter.value(), stored_tensor); + continue; + } + + if (key == "action_head.std.weight") + { + parameter.value().data().zero_(); + continue; + } + + if (key == "action_head.std.bias") + { + auto log_std = torch::log(torch::tensor(legacy_std, parameter.value().options())); + copyTensor(parameter.value(), log_std); + continue; + } + + if (!findNamedTensor(legacy_parameters, key, stored_tensor)) + mooseError("Legacy deterministic DRL checkpoint is missing serialized parameter: ", key); + + copyTensor(parameter.value(), stored_tensor); + } + + nn.synchronizeAffineFactorsFromBuffers(); + nn.actionDistributionHead().synchronizeScalingFactorsFromBuffer(); +} + } #endif diff --git a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C index 1618496e7996..9447ceb28d54 100644 --- a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C +++ b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C @@ -13,6 +13,41 @@ #include "MooseError.h" #include "LibtorchUtils.h" +#include + +namespace +{ + +bool +readArchiveTensor(torch::serialize::InputArchive & archive, + const std::string & key, + torch::Tensor & tensor) +{ + try + { + archive.read(key, tensor); + return true; + } + catch (const c10::Error &) + { + return false; + } +} + +void +copyTensor(torch::Tensor & destination, const torch::Tensor & source) +{ + destination.data().copy_(source.to(destination.options())); +} + +bool +isOptionalArtificialNeuralNetBuffer(const std::string & key) +{ + return key == "input_shift" || key == "input_scale" || key == "output_scale"; +} + +} // namespace + namespace Moose { @@ -26,7 +61,10 @@ LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( const std::vector & maximum_values, const torch::DeviceType device_type, const torch::ScalarType data_type, - const bool build_on_construct) + const bool build_on_construct, + const std::vector & input_shift_factors, + const std::vector & input_scaling_factors, + const std::vector & output_scaling_factors) : _name(name), _num_inputs(num_inputs), _num_outputs(num_outputs), @@ -34,10 +72,17 @@ LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( _activation_function(MultiMooseEnum("relu sigmoid elu gelu linear tanh", "relu")), _device_type(device_type), _data_type(data_type), + _input_shift_factors( + normalizeAffineFactors(input_shift_factors, num_inputs, 0.0, "input_shift_factors")), + _input_scaling_factors( + normalizeAffineFactors(input_scaling_factors, num_inputs, 1.0, "input_scaling_factors")), + _output_scaling_factors( + normalizeAffineFactors(output_scaling_factors, num_outputs, 1.0, "output_scaling_factors")), _minimum_values(minimum_values), _maximum_values(maximum_values) { _activation_function = activation_function; + initializeAffineBuffers(); // Check if the number of activation functions matches the number of hidden layers if ((_activation_function.size() != 1) && @@ -62,10 +107,10 @@ LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( auto min_value = _minimum_values; LibtorchUtils::vectorToTensor(min_value, _min_tensor); - _min_tensor.to(_data_type).to(_device_type); + _min_tensor = _min_tensor.transpose(0, 1).to(_data_type).to(_device_type); auto max_value = _maximum_values; LibtorchUtils::vectorToTensor(max_value, _max_tensor); - _max_tensor.to(_data_type).to(_device_type); + _max_tensor = _max_tensor.transpose(0, 1).to(_data_type).to(_device_type); } if (build_on_construct) @@ -82,9 +127,13 @@ LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( _activation_function(nn.activationFunctions()), _device_type(nn.deviceType()), _data_type(nn.dataType()), + _input_shift_factors(nn.inputShiftFactors()), + _input_scaling_factors(nn.inputScalingFactors()), + _output_scaling_factors(nn.outputScalingFactors()), _minimum_values(nn.minValues()), _maximum_values(nn.maxValues()) { + initializeAffineBuffers(); // We construct the NN architecture if (build_on_construct) @@ -95,16 +144,21 @@ LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( auto to_params = this->named_parameters(); for (unsigned int param_i : make_range(from_params.size())) to_params[param_i].value().data() = from_params[param_i].value().data().clone(); + + const auto & from_buffers = nn.named_buffers(); + auto to_buffers = this->named_buffers(); + for (unsigned int buffer_i : make_range(from_buffers.size())) + to_buffers[buffer_i].value().data() = from_buffers[buffer_i].value().data().clone(); } if (_minimum_values.size()) { auto min_value = _minimum_values; LibtorchUtils::vectorToTensor(min_value, _min_tensor); - _min_tensor.to(_data_type).to(_device_type); + _min_tensor = _min_tensor.transpose(0, 1).to(_data_type).to(_device_type); auto max_value = _maximum_values; LibtorchUtils::vectorToTensor(max_value, _max_tensor); - _max_tensor.to(_data_type).to(_device_type); + _max_tensor = _max_tensor.transpose(0, 1).to(_data_type).to(_device_type); } } @@ -135,6 +189,60 @@ LibtorchArtificialNeuralNet::initializeNeuralNetwork() torch::nn::init::zeros_(_weights.back()->bias); } +std::vector +LibtorchArtificialNeuralNet::normalizeAffineFactors(const std::vector & factors, + const unsigned int expected_size, + const Real default_value, + const std::string & factor_name, + const bool forbid_zero) +{ + const auto normalized = + factors.empty() ? std::vector(expected_size, default_value) : factors; + + if (normalized.size() != expected_size) + mooseError("The number of ", factor_name, " entries must match ", expected_size, "."); + + if (forbid_zero) + for (const auto factor : normalized) + if (std::abs(factor) == 0.0) + mooseError("The ", factor_name, " entries must be non-zero."); + + return normalized; +} + +void +LibtorchArtificialNeuralNet::initializeAffineBuffers() +{ + auto input_shift = _input_shift_factors; + LibtorchUtils::vectorToTensor(input_shift, _input_shift_tensor); + _input_shift_tensor = register_buffer( + "input_shift", _input_shift_tensor.transpose(0, 1).to(_data_type).to(_device_type)); + + auto input_scale = _input_scaling_factors; + LibtorchUtils::vectorToTensor(input_scale, _input_scale_tensor); + _input_scale_tensor = register_buffer( + "input_scale", _input_scale_tensor.transpose(0, 1).to(_data_type).to(_device_type)); + + auto output_scale = _output_scaling_factors; + LibtorchUtils::vectorToTensor(output_scale, _output_scale_tensor); + _output_scale_tensor = register_buffer( + "output_scale", _output_scale_tensor.transpose(0, 1).to(_data_type).to(_device_type)); +} + +void +LibtorchArtificialNeuralNet::synchronizeAffineFactorsFromBuffers() +{ + auto input_shift = _input_shift_tensor.detach().reshape({-1}).to(torch::kCPU).to(torch::kDouble); + LibtorchUtils::tensorToVector(input_shift, _input_shift_factors); + + auto input_scale = _input_scale_tensor.detach().reshape({-1}).to(torch::kCPU).to(torch::kDouble); + LibtorchUtils::tensorToVector(input_scale, _input_scaling_factors); + + auto output_scale = + _output_scale_tensor.detach().reshape({-1}).to(torch::kCPU).to(torch::kDouble); + LibtorchUtils::tensorToVector(output_scale, _output_scaling_factors); +} + void LibtorchArtificialNeuralNet::constructNeuralNetwork() { @@ -157,14 +265,28 @@ LibtorchArtificialNeuralNet::constructNeuralNetwork() _weights.back()->to(_device_type, _data_type); } +torch::Tensor +LibtorchArtificialNeuralNet::preprocessInput(const torch::Tensor & x) const +{ + torch::Tensor input(x); + if (_data_type != input.scalar_type()) + input = input.to(_data_type); + if (_device_type != input.device().type()) + input = input.to(_device_type); + + return (input - _input_shift_tensor) * _input_scale_tensor; +} + +torch::Tensor +LibtorchArtificialNeuralNet::scaleOutput(const torch::Tensor & y) const +{ + return y * _output_scale_tensor; +} + torch::Tensor LibtorchArtificialNeuralNet::forward(const torch::Tensor & x) { - torch::Tensor output(x); - if (_data_type != output.scalar_type()) - output.to(_data_type); - if (_device_type != output.device().type()) - output.to(_device_type); + torch::Tensor output = preprocessInput(x); for (unsigned int i = 0; i < _weights.size() - 1; ++i) { @@ -187,16 +309,14 @@ LibtorchArtificialNeuralNet::forward(const torch::Tensor & x) if (_minimum_values.size()) { output = torch::sigmoid(_weights[_weights.size() - 1]->forward(output)); - torch::Tensor scale = torch::sub(_max_tensor, _min_tensor).to(_data_type); + const auto scale = _max_tensor - _min_tensor; output = torch::mul(output, scale); output = output + _min_tensor; } else - { output = _weights[_weights.size() - 1]->forward(output); - } - return output; + return scaleOutput(output); } void @@ -230,6 +350,10 @@ LibtorchArtificialNeuralNet::store(nlohmann::json & json) const named_params[param_i].value().data_ptr(), named_params[param_i].value().data_ptr() + named_params[param_i].value().numel()); } + + json["input_shift_factors"] = _input_shift_factors; + json["input_scaling_factors"] = _input_scaling_factors; + json["output_scaling_factors"] = _output_scaling_factors; } void @@ -239,6 +363,49 @@ to_json(nlohmann::json & json, const Moose::LibtorchArtificialNeuralNet * const network->store(json); } +void +loadLibtorchArtificialNeuralNetState(Moose::LibtorchArtificialNeuralNet & nn, + const std::string & filename) +{ + torch::serialize::InputArchive archive; + archive.load_from(filename); + + for (auto & parameter : nn.named_parameters()) + { + torch::Tensor stored_tensor; + if (!readArchiveTensor(archive, parameter.key(), stored_tensor)) + mooseError("The requested pytorch parameter file could not be loaded. This can either be " + "the result of the file not existing or a misalignment in the generated " + "container and the data in the file. Make sure the dimensions of the generated " + "neural net are the same as the dimensions of the parameters in the input file!\n" + "Missing serialized parameter: ", + parameter.key()); + + copyTensor(parameter.value(), stored_tensor); + } + + for (auto & buffer : nn.named_buffers()) + { + torch::Tensor stored_tensor; + if (!readArchiveTensor(archive, buffer.key(), stored_tensor)) + { + if (isOptionalArtificialNeuralNetBuffer(buffer.key())) + continue; + + mooseError("The requested pytorch parameter file could not be loaded. This can either be " + "the result of the file not existing or a misalignment in the generated " + "container and the data in the file. Make sure the dimensions of the generated " + "neural net are the same as the dimensions of the parameters in the input file!\n" + "Missing serialized buffer: ", + buffer.key()); + } + + copyTensor(buffer.value(), stored_tensor); + } + + nn.synchronizeAffineFactorsFromBuffers(); +} + } template <> @@ -343,7 +510,7 @@ dataLoad( divt, datt); - torch::load(nn, name); + Moose::loadLibtorchArtificialNeuralNetState(*nn, name); } template <> diff --git a/framework/src/libtorch/utils/LibtorchObservationHistory.C b/framework/src/libtorch/utils/LibtorchObservationHistory.C index 352513af2f25..782e9b65208b 100644 --- a/framework/src/libtorch/utils/LibtorchObservationHistory.C +++ b/framework/src/libtorch/utils/LibtorchObservationHistory.C @@ -103,6 +103,24 @@ LibtorchObservationHistory::advanceHistory(const std::vector & normalized_ old_responses[0] = normalized_response; } +std::vector +LibtorchObservationHistory::expandFeatureFactors(const std::vector & feature_factors) const +{ + if (feature_factors.empty()) + return {}; + + std::vector expanded; + expanded.reserve(feature_factors.size() * _input_timesteps); + + for (const auto lag : make_range(_input_timesteps)) + { + libmesh_ignore(lag); + expanded.insert(expanded.end(), feature_factors.begin(), feature_factors.end()); + } + + return expanded; +} + std::vector LibtorchObservationHistory::stackCurrentObservation( const std::vector & normalized_response, diff --git a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i index dd401a99bc45..42d6f52a8ea5 100644 --- a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i +++ b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i @@ -62,6 +62,7 @@ input_timesteps = 1 response_scaling_factors = '0.03' response_shift_factors = '290' + action_scaling_factors = 20 standardize_advantage = true diff --git a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h index 76a3becb5fd4..230ac5009f3e 100644 --- a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h +++ b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h @@ -75,6 +75,9 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase /// Control reporter names const std::vector _action_names; + /// Multiplicative action scaling embedded in the actor outputs + const std::vector _action_scaling_factors; + /// Pointers to the current values of the control signals /// We can have multiple control signals, multiple samples, multiple timesteps std::vector> *> _action_value_pointers; diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C index aa5a21eb5953..867357f72ac9 100644 --- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C +++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C @@ -14,59 +14,8 @@ #include "Transient.h" #include "LibtorchUtils.h" -#include - registerMooseObject("StochasticToolsApp", LibtorchDRLControl); -namespace -{ - -bool -readArchiveTensor(torch::serialize::InputArchive & archive, - const std::string & key, - torch::Tensor & tensor) -{ - try - { - archive.read(key, tensor); - return true; - } - catch (const c10::Error &) - { - return false; - } -} - -void -loadActorParametersWithLegacyFallback(Moose::LibtorchActorNeuralNet & actor, - const std::string & filename) -{ - torch::serialize::InputArchive archive; - archive.load_from(filename); - - for (auto & parameter : actor.named_parameters()) - { - torch::Tensor stored_tensor; - bool loaded = readArchiveTensor(archive, parameter.key(), stored_tensor); - - if (!loaded && parameter.key().rfind("action_head.", 0) == 0) - loaded = readArchiveTensor( - archive, parameter.key().substr(std::string("action_head.").size()), stored_tensor); - - if (!loaded) - mooseError("The requested pytorch parameter file could not be loaded. This can either be " - "the result of the file not existing or a misalignment in the generated " - "container and the data in the file. Make sure the dimensions of the generated " - "neural net are the same as the dimensions of the parameters in the input file!\n" - "Missing serialized parameter: ", - parameter.key()); - - parameter.value().data().copy_(stored_tensor); - } -} - -} // namespace - InputParameters LibtorchDRLControl::validParams() { @@ -147,6 +96,10 @@ LibtorchDRLControl::loadControlNeuralNetFromFile(const InputParameters & paramet const std::vector & minimum_values = getParam>("min_control_value"); const std::vector & maximum_values = getParam>("max_control_value"); + const auto input_shift_factors = + _observation_history.expandFeatureFactors(_response_shift_factors); + const auto input_scaling_factors = + _observation_history.expandFeatureFactors(_response_scaling_factors); auto nn = std::make_shared(filename, num_inputs, @@ -154,16 +107,27 @@ LibtorchDRLControl::loadControlNeuralNetFromFile(const InputParameters & paramet num_neurons_per_layer, activation_functions, minimum_values, - maximum_values); + maximum_values, + torch::kCPU, + torch::kDouble, + true, + input_shift_factors, + input_scaling_factors, + _action_scaling_factors); try { - torch::load(nn, filename); + if (Moose::isLegacyLibtorchActorArchive(filename)) + Moose::loadLegacyLibtorchActorNeuralNetState( + *nn, filename, getParam>("action_standard_deviations")); + else + Moose::loadLibtorchActorNeuralNetState(*nn, filename); } - catch (const c10::Error &) + catch (const c10::Error & e) { - loadActorParametersWithLegacyFallback(*nn, filename); - _console << "Loaded requested legacy .pt file." << std::endl; + mooseError("The requested pytorch parameter file could not be loaded for the control neural " + "net.\n", + e.msg()); } _actor_nn = std::make_shared(*nn); @@ -230,8 +194,7 @@ LibtorchDRLControl::execute() for (unsigned int control_i = 0; control_i < n_controls; ++control_i) setControllableValueByName(_control_names[control_i], - _current_smoothed_signal[control_i] * - _action_scaling_factors[control_i]); + _current_smoothed_signal[control_i]); if (_old_responses.size()) { diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchANNTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchANNTrainer.C index e9951ed7277a..c2818b3570eb 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchANNTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchANNTrainer.C @@ -128,7 +128,7 @@ LibtorchANNTrainer::postTrain() if (_read_from_file) try { - torch::load(_nn, _nn_filename); + Moose::loadLibtorchArtificialNeuralNetState(*_nn, _nn_filename); _console << "Loaded requested .pt file." << std::endl; } catch (const c10::Error & e) diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index 858b0beaa8df..4a39a8165690 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -38,6 +38,10 @@ LibtorchDRLControlTrainer::validParams() "control", "Reporters containing the values of the controlled quantities (control signals) from the " "model simulations."); + params.addParam>( + "action_scaling_factors", + "Scale factors embedded into the trained policy outputs so transferred and checkpointed " + "controllers operate in physical units."); params.addRequiredParam>( "log_probability", "Reporters containing the log probabilities of the actions taken during the simulations."); @@ -149,6 +153,9 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par ? getParam>("response_scaling_factors") : std::vector(_state_names.size(), 1.0)), _action_names(getParam>("control")), + _action_scaling_factors(isParamValid("action_scaling_factors") + ? getParam>("action_scaling_factors") + : std::vector(_action_names.size(), 1.0)), _log_probability_names(getParam>("log_probability")), _reward_name(getParam("reward")), _reward_value_pointer(&getReporterValueByName>>(_reward_name)), @@ -196,6 +203,11 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par "The number of log-probability reporters must match the number of control " "reporters."); + if (_action_names.size() != _action_scaling_factors.size()) + paramError("action_scaling_factors", + "The number of action scaling factors must match the number of control " + "reporters."); + // We establish the links with the chosen reporters getReporterPointers(_state_names, _state_value_pointers); getReporterPointers(_action_names, _action_value_pointers); @@ -206,6 +218,9 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par torch::manual_seed(getParam("seed")); bool filename_valid = isParamValid("filename_base"); + const auto input_shift_factors = _observation_history.expandFeatureFactors(_state_shift_factors); + const auto input_scaling_factors = + _observation_history.expandFeatureFactors(_state_scaling_factors); // Initializing the control neural net so that the control can grab it right away _control_nn = std::make_shared( @@ -215,14 +230,26 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _num_control_neurons_per_layer, getParam>("control_activation_functions"), _min_values, - _max_values); + _max_values, + torch::kCPU, + torch::kDouble, + true, + input_shift_factors, + input_scaling_factors, + _action_scaling_factors); // We read parameters for the control neural net if it is requested if (_read_from_file) { try { - torch::load(_control_nn, _control_nn->name()); + if (Moose::isLegacyLibtorchActorArchive(_control_nn->name())) + Moose::loadLegacyLibtorchActorNeuralNetState( + *_control_nn, + _control_nn->name(), + getParam>("action_standard_deviations")); + else + Moose::loadLibtorchActorNeuralNetState(*_control_nn, _control_nn->name()); _console << "Loaded requested .pt file." << std::endl; } catch (const c10::Error & e) @@ -240,7 +267,14 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _num_inputs, 1, _num_critic_neurons_per_layer, - getParam>("critic_activation_functions")); + getParam>("critic_activation_functions"), + std::vector(), + std::vector(), + torch::kCPU, + torch::kDouble, + true, + input_shift_factors, + input_scaling_factors); _actor_optimizer = std::make_unique( _control_nn->parameters(), torch::optim::AdamOptions(_control_learning_rate)); @@ -252,7 +286,7 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par { try { - torch::load(_critic_nn, _critic_nn->name()); + Moose::loadLibtorchArtificialNeuralNetState(*_critic_nn, _critic_nn->name()); _console << "Loaded requested .pt file." << std::endl; } catch (const c10::Error & e) @@ -428,8 +462,6 @@ LibtorchDRLControlTrainer::collectTrajectoriesFromReporters() normalized_responses[state_i] = extractDownsampledSequence( (*_state_value_pointers[state_i])[sample_i], 0, num_transitions + 1); - _observation_history.normalizeTrajectoryInPlace(normalized_responses); - LibtorchRLTrajectoryBuffer::Trajectory trajectory; trajectory.observations.reserve(num_transitions); trajectory.next_observations.reserve(num_transitions); diff --git a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/gold/parameter_read.csv b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/gold/parameter_read.csv index 454a22e116dc..1df81f626a79 100644 --- a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/gold/parameter_read.csv +++ b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/gold/parameter_read.csv @@ -1,12 +1,12 @@ time,center_temp,center_temp_tend,env_temp,left_flux,log_prob_left_flux 0,300,300,273,0,0 -1800,300,270.35724380953,273.98104693845,-0.13793264034647,1.1115979988596 -3600,270.35724380953,274.51295145974,274.9578928833,-0.016781115951807,-0.50967346391778 -5400,274.51295145974,270.06590276071,275.92635483024,-0.22228241099481,1.1389673700336 -7200,270.06590276071,269.76297060624,276.88228567654,-0.26991716569164,0.53565739363602 -9000,269.76297060624,272.93107942871,277.82159197955,-0.18534271617753,1.2519275533428 -10800,272.93107942871,274.5965017921,278.74025148548,-0.15706446553999,1.3472992379419 -12600,274.5965017921,276.7588668075,279.63433035329,-0.10897031971508,1.353343147668 -14400,276.7588668075,278.0802103911,280.5,-0.091713226427664,1.2665547503991 -16200,278.0802103911,278.66431523589,281.33355349529,-0.10118636668795,1.2822084987121 -18000,278.66431523589,275.59443714345,282.13142143513,-0.24790007933615,0.92588151005597 +1800,300,271.57673855843,273.98104693845,-18.334498394569,-4.6349479692145 +3600,271.57673855843,272.46057412724,274.9578928833,-18.93250347727,-4.5957497134515 +5400,272.46057412724,272.899016984,275.92635483024,-22.95326764291,-3.9922233679219 +7200,272.899016984,269.81794459597,276.88228567654,-53.578809923663,-4.7726707362022 +9000,269.81794459597,279.64597914382,277.82159197955,13.875634892825,-6.0670584579465 +10800,279.64597914382,276.42458720301,278.74025148548,-17.573135974458,-4.0196727079328 +12600,276.42458720301,280.14763817482,279.63433035329,3.9077421760795,-5.2736274640494 +14400,280.14763817482,272.95496522133,280.5,-57.241609291066,-4.7661592246168 +16200,272.95496522133,276.2597915218,281.33355349529,-38.457322485066,-3.9868025342074 +18000,276.2597915218,278.57505613194,282.13142143513,-26.956170196589,-3.929639370556 diff --git a/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_trainer.i b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_trainer.i index 1660e9ed3aa1..8aadb3626c2b 100644 --- a/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_trainer.i +++ b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_trainer.i @@ -24,20 +24,21 @@ control_name = src_control [] [r_transfer] - type = MultiAppReporterTransfer + type = SamplerReporterTransfer from_multi_app = runner - to_reporters = 'results/center_temp results/env_temp results/reward results/left_flux results/log_prob_left_flux' - from_reporters = 'T_reporter/center_temp_tend:value T_reporter/env_temp:value T_reporter/reward:value T_reporter/left_flux:value T_reporter/log_prob_left_flux:value' + sampler = dummy + stochastic_reporter = storage + from_reporter = 'T_reporter/center_temp_tend:value T_reporter/env_temp:value T_reporter/reward:value T_reporter/left_flux:value T_reporter/log_prob_left_flux:value' [] [] [Trainers] [nn_trainer] type = LibtorchDRLControlTrainer - response = 'results/center_temp results/env_temp' - control = 'results/left_flux' - log_probability = 'results/log_prob_left_flux' - reward = 'results/reward' + response = 'storage/r_transfer:T_reporter:center_temp_tend:value storage/r_transfer:T_reporter:env_temp:value' + control = 'storage/r_transfer:T_reporter:left_flux:value' + log_probability = 'storage/r_transfer:T_reporter:log_prob_left_flux:value' + reward = 'storage/r_transfer:T_reporter:reward:value' num_epochs = 10 update_frequency = 2 @@ -55,19 +56,19 @@ input_timesteps = 2 response_scaling_factors = '0.03 0.03' response_shift_factors = '270 270' + action_scaling_factors = 100 action_standard_deviations = '0.1' read_from_file = false + shift_outputs = false [] [] [Reporters] - [results] - type = ConstantReporter - real_vector_names = 'center_temp env_temp reward left_flux log_prob_left_flux' - real_vector_values = '0; 0; 0; 0; 0' - outputs = 'csv_out' - execute_on = timestep_begin + [storage] + type = StochasticReporter + parallel_type = ROOT + outputs = none [] [nn_parameters] type = DRLControlNeuralNetParameters diff --git a/unit/src/LibtorchActorNeuralNetTest.C b/unit/src/LibtorchActorNeuralNetTest.C index 764509c1adbf..afa6d602caea 100644 --- a/unit/src/LibtorchActorNeuralNetTest.C +++ b/unit/src/LibtorchActorNeuralNetTest.C @@ -17,6 +17,13 @@ namespace { +class TestableLibtorchArtificialNeuralNet : public Moose::LibtorchArtificialNeuralNet +{ +public: + using Moose::LibtorchArtificialNeuralNet::_weights; + using Moose::LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet; +}; + class TestableLibtorchActorNeuralNet : public Moose::LibtorchActorNeuralNet { public: @@ -32,6 +39,35 @@ inverseSoftplusPlusOne(const Real target) } // namespace +TEST(LibtorchActorNeuralNetTest, artificialNetAppliesAffineInputAndOutputScaling) +{ + TestableLibtorchArtificialNeuralNet network("test_ann", + 2, + 1, + {}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {1.0, 2.0}, + {2.0, 3.0}, + {10.0}); + + ASSERT_EQ(network._weights.size(), 1); + + network._weights[0]->weight.data().fill_(0.0); + network._weights[0]->weight.data()[0][0] = 1.0; + network._weights[0]->weight.data()[0][1] = -1.0; + network._weights[0]->bias.data().fill_(0.0); + + auto input = torch::tensor({{2.0, 6.0}}, at::kDouble); + const Real actual = network.forward(input).item(); + + EXPECT_NEAR(actual, -100.0, 1e-12); +} + TEST(LibtorchActorNeuralNetTest, boundedBetaLogProbability) { constexpr Real min_value = -2.0; @@ -69,4 +105,49 @@ TEST(LibtorchActorNeuralNetTest, boundedBetaLogProbability) EXPECT_NEAR(actual, expected, 1e-12); } +TEST(LibtorchActorNeuralNetTest, gaussianActorUsesPhysicalActionScaling) +{ + constexpr Real input_shift = 1.0; + constexpr Real input_scale = 2.0; + constexpr Real action_scale = 5.0; + const Real log_std = std::log(2.0); + constexpr Real physical_action = 20.0; + constexpr Real expected_deterministic_action = 15.0; + + TestableLibtorchActorNeuralNet network("test_gaussian", + 1, + 1, + {}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {input_shift}, + {input_scale}, + {action_scale}); + + network.actionDistributionHead().primaryModule()->weight.data().fill_(1.5); + network.actionDistributionHead().primaryModule()->bias.data().fill_(0.0); + network.actionDistributionHead().secondaryModule()->weight.data().fill_(log_std / 2.0); + network.actionDistributionHead().secondaryModule()->bias.data().fill_(0.0); + + auto input = torch::tensor({{2.0}}, at::kDouble); + const Real deterministic_action = network.evaluate(input, false).item(); + EXPECT_NEAR(deterministic_action, expected_deterministic_action, 1e-12); + + const Real unscaled_mean = expected_deterministic_action / action_scale; + const Real unscaled_action = physical_action / action_scale; + constexpr Real pi = 3.14159265358979323846; + const Real expected_log_probability = + -std::pow(unscaled_action - unscaled_mean, 2) / (2.0 * 4.0) - log_std - + 0.5 * std::log(2.0 * pi) - std::log(action_scale); + + auto action = torch::tensor({{physical_action}}, at::kDouble); + const Real actual_log_probability = network.logProbability(action).item(); + + EXPECT_NEAR(actual_log_probability, expected_log_probability, 1e-12); +} + #endif From 36c4eeb6199071409ceb207133415025eefb252c Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 21 Apr 2026 22:24:53 -0600 Subject: [PATCH 30/51] Fix transfer update and shift. --- .../src/libtorch/trainers/LibtorchDRLControlTrainer.C | 2 +- .../src/libtorch/transfers/SamplerNeuralNetControlTransfer.C | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index 4a39a8165690..7ec2ce8b83fd 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -507,7 +507,7 @@ LibtorchDRLControlTrainer::computeNumTransitions(const std::size_t raw_sequence_ { unsigned int num_transitions = 0; for (std::size_t raw_index = 0; - raw_index + _timestep_window < raw_sequence_size + static_cast(_shift_outputs); + raw_index + _timestep_window < raw_sequence_size; raw_index += _timestep_window) ++num_transitions; diff --git a/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C b/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C index fb11c80479bc..9c4ea858ac44 100644 --- a/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C +++ b/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C @@ -113,13 +113,13 @@ SamplerNeuralNetControlTransfer::initializeToMultiapp() void SamplerNeuralNetControlTransfer::executeToMultiapp() { - if (getToMultiApp()->hasLocalApp(_global_index)) + if (getToMultiApp()->hasLocalApp(_app_index)) { // Get the control neural net from the trainer const Moose::LibtorchArtificialNeuralNet & trainer_nn = _trainer.controlNeuralNet(); // Get the control object from the other app - FEProblemBase & app_problem = _multi_app->appProblemBase(_global_index); + FEProblemBase & app_problem = _multi_app->appProblemBase(_app_index); auto & control_warehouse = app_problem.getControlWarehouse(); std::shared_ptr control_ptr = control_warehouse.getActiveObject(_control_name); LibtorchNeuralNetControl * control_object = From 10ef12546e87a123bd81ab053a08d3815bce822a Mon Sep 17 00:00:00 2001 From: Peter German Date: Wed, 22 Apr 2026 16:31:04 -0600 Subject: [PATCH 31/51] Add state-independent variance. Fix restart. --- .../utils/LibtorchActionDistributionHead.h | 5 +- .../libtorch/utils/LibtorchActorNeuralNet.h | 5 +- .../utils/LibtorchActionDistributionHead.C | 24 ++- .../libtorch/utils/LibtorchActorNeuralNet.C | 198 +++++++++++++----- .../surrogates/LibtorchDRLControlTrainer.h | 4 + .../libtorch/controls/LibtorchDRLControl.C | 33 +-- .../trainers/LibtorchDRLControlTrainer.C | 19 +- .../SamplerNeuralNetControlTransfer.C | 14 +- unit/src/LibtorchActorNeuralNetTest.C | 124 ++++++++++- 9 files changed, 347 insertions(+), 79 deletions(-) diff --git a/framework/include/libtorch/utils/LibtorchActionDistributionHead.h b/framework/include/libtorch/utils/LibtorchActionDistributionHead.h index 359c7771b14b..700f4173799f 100644 --- a/framework/include/libtorch/utils/LibtorchActionDistributionHead.h +++ b/framework/include/libtorch/utils/LibtorchActionDistributionHead.h @@ -38,7 +38,8 @@ class LibtorchActionDistributionHead : public torch::nn::Module torch::DeviceType device_type = torch::kCPU, torch::ScalarType scalar_type = torch::kDouble, bool build_on_construct = true, - const std::vector & output_scaling_factors = {}); + const std::vector & output_scaling_factors = {}, + bool state_independent_std = true); LibtorchActionDistributionHead(const LibtorchActionDistributionHead & head, bool build_on_construct = true); @@ -60,6 +61,7 @@ class LibtorchActionDistributionHead : public torch::nn::Module torch::Tensor entropy() const; bool isBounded() const { return !_minimum_values.empty(); } + bool stateIndependentStd() const { return _state_independent_std; } torch::nn::Linear & primaryModule() { return _primary_parameter_module; } const torch::nn::Linear & primaryModule() const { return _primary_parameter_module; } @@ -79,6 +81,7 @@ class LibtorchActionDistributionHead : public torch::nn::Module const std::vector _maximum_values; const torch::DeviceType _device_type; const torch::ScalarType _data_type; + const bool _state_independent_std; std::vector _output_scaling_factors; torch::nn::Linear _primary_parameter_module{nullptr}; diff --git a/framework/include/libtorch/utils/LibtorchActorNeuralNet.h b/framework/include/libtorch/utils/LibtorchActorNeuralNet.h index 514d6eac20ce..61a62fc355c9 100644 --- a/framework/include/libtorch/utils/LibtorchActorNeuralNet.h +++ b/framework/include/libtorch/utils/LibtorchActorNeuralNet.h @@ -41,7 +41,8 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet const bool build_on_construct = true, const std::vector & input_shift_factors = {}, const std::vector & input_scaling_factors = {}, - const std::vector & output_scaling_factors = {}); + const std::vector & output_scaling_factors = {}, + const bool state_independent_std = true); /** * Copy construct an artificial neural network @@ -72,6 +73,7 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet const LibtorchActionDistributionHead & actionDistributionHead() const { return *_action_head; } LibtorchActionDistributionHead & actionDistributionHead() { return *_action_head; } + bool stateIndependentStd() const { return _state_independent_std; } void resetDistributionParams(torch::Tensor input); @@ -85,6 +87,7 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet torch::Tensor _alpha_tensor; torch::Tensor _beta_tensor; torch::Tensor _std_tensor; + const bool _state_independent_std; std::shared_ptr _action_head; }; diff --git a/framework/src/libtorch/utils/LibtorchActionDistributionHead.C b/framework/src/libtorch/utils/LibtorchActionDistributionHead.C index bcd4ff456c42..ea0f97270bd3 100644 --- a/framework/src/libtorch/utils/LibtorchActionDistributionHead.C +++ b/framework/src/libtorch/utils/LibtorchActionDistributionHead.C @@ -52,7 +52,8 @@ LibtorchActionDistributionHead::LibtorchActionDistributionHead( const torch::DeviceType device_type, const torch::ScalarType data_type, const bool build_on_construct, - const std::vector & output_scaling_factors) + const std::vector & output_scaling_factors, + const bool state_independent_std) : _name(name), _num_inputs(num_inputs), _num_outputs(num_outputs), @@ -60,6 +61,7 @@ LibtorchActionDistributionHead::LibtorchActionDistributionHead( _maximum_values(maximum_values), _device_type(device_type), _data_type(data_type), + _state_independent_std(state_independent_std), _output_scaling_factors(normalizeActionScalingFactors(output_scaling_factors, num_outputs)) { auto action_scale = _output_scaling_factors; @@ -104,6 +106,7 @@ LibtorchActionDistributionHead::LibtorchActionDistributionHead( _maximum_values(head._maximum_values), _device_type(head._device_type), _data_type(head._data_type), + _state_independent_std(head._state_independent_std), _output_scaling_factors(head._output_scaling_factors) { auto action_scale = _output_scaling_factors; @@ -161,6 +164,15 @@ LibtorchActionDistributionHead::initialize() torch::nn::init::orthogonal_(_primary_parameter_module->weight, 1.0 / primary_max_dim_size); torch::nn::init::zeros_(_primary_parameter_module->bias); + if (!isBounded() && _state_independent_std) + { + // Match the TorchRL reference more closely: learn one global log-std per action dimension + // instead of conditioning the exploration scale on the current state features. + _secondary_parameter_module->weight.data().zero_(); + torch::nn::init::zeros_(_secondary_parameter_module->bias); + return; + } + const auto secondary_sizes = _secondary_parameter_module->weight.sizes(); const auto secondary_max_dim_size = *std::max_element(secondary_sizes.begin(), secondary_sizes.end()); @@ -200,7 +212,15 @@ LibtorchActionDistributionHead::reset(const torch::Tensor & input) } _mean = _primary_parameter_module->forward(features); - _log_std_tensor = _secondary_parameter_module->forward(features); + if (_state_independent_std) + { + if (_mean.dim() <= 1) + _log_std_tensor = _secondary_parameter_module->bias; + else + _log_std_tensor = _secondary_parameter_module->bias.view({1, -1}).expand(_mean.sizes()); + } + else + _log_std_tensor = _secondary_parameter_module->forward(features); _log_std_tensor = torch::clamp(_log_std_tensor, std::log(1e-12), -std::log(1e-12)); _std_tensor = torch::exp(_log_std_tensor); } diff --git a/framework/src/libtorch/utils/LibtorchActorNeuralNet.C b/framework/src/libtorch/utils/LibtorchActorNeuralNet.C index c127020268ec..b011723d9565 100644 --- a/framework/src/libtorch/utils/LibtorchActorNeuralNet.C +++ b/framework/src/libtorch/utils/LibtorchActorNeuralNet.C @@ -78,6 +78,130 @@ findNamedTensor(const NamedTensorList & tensors, const std::string & key, torch: return false; } +template +bool +readScriptedActorStateTensor(const NamedTensorList & tensors, + const std::string & key, + torch::Tensor & tensor) +{ + if (findNamedTensor(tensors, key, tensor)) + return true; + + if (key.rfind("action_head.", 0) == 0) + return findNamedTensor(tensors, key.substr(std::string("action_head.").size()), tensor); + + return false; +} + +bool +loadActorStateFromArchive(Moose::LibtorchActorNeuralNet & nn, + const std::string & filename, + std::string & error) +{ + try + { + torch::serialize::InputArchive archive; + archive.load_from(filename); + + for (auto & parameter : nn.named_parameters()) + { + torch::Tensor stored_tensor; + if (!readActorStateTensor(archive, parameter.key(), stored_tensor)) + { + if (isOptionalActorParameter(parameter.key())) + { + parameter.value().data().zero_(); + continue; + } + + error = "Missing serialized parameter: " + parameter.key(); + return false; + } + + copyTensor(parameter.value(), stored_tensor); + } + + for (auto & buffer : nn.named_buffers()) + { + torch::Tensor stored_tensor; + if (!readActorStateTensor(archive, buffer.key(), stored_tensor)) + { + if (isOptionalActorBuffer(buffer.key())) + continue; + + error = "Missing serialized buffer: " + buffer.key(); + return false; + } + + copyTensor(buffer.value(), stored_tensor); + } + + nn.synchronizeAffineFactorsFromBuffers(); + nn.actionDistributionHead().synchronizeScalingFactorsFromBuffer(); + return true; + } + catch (const c10::Error & e) + { + error = e.msg(); + return false; + } +} + +bool +loadActorStateFromTorchScript(Moose::LibtorchActorNeuralNet & nn, + const std::string & filename, + std::string & error) +{ + try + { + const auto scripted = torch::jit::load(filename); + const auto scripted_parameters = scripted.named_parameters(); + const auto scripted_buffers = scripted.named_buffers(); + + for (auto & parameter : nn.named_parameters()) + { + torch::Tensor stored_tensor; + if (!readScriptedActorStateTensor(scripted_parameters, parameter.key(), stored_tensor)) + { + if (isOptionalActorParameter(parameter.key())) + { + parameter.value().data().zero_(); + continue; + } + + error = "Missing scripted parameter: " + parameter.key(); + return false; + } + + copyTensor(parameter.value(), stored_tensor); + } + + for (auto & buffer : nn.named_buffers()) + { + torch::Tensor stored_tensor; + if (!readScriptedActorStateTensor(scripted_buffers, buffer.key(), stored_tensor)) + { + if (isOptionalActorBuffer(buffer.key())) + continue; + + error = "Missing scripted buffer: " + buffer.key(); + return false; + } + + copyTensor(buffer.value(), stored_tensor); + } + + nn.synchronizeAffineFactorsFromBuffers(); + nn.actionDistributionHead().synchronizeScalingFactorsFromBuffer(); + return true; + } + catch (const c10::Error & e) + { + error = e.msg(); + return false; + } +} + } // namespace namespace Moose @@ -96,7 +220,8 @@ LibtorchActorNeuralNet::LibtorchActorNeuralNet( const bool build_on_construct, const std::vector & input_shift_factors, const std::vector & input_scaling_factors, - const std::vector & output_scaling_factors) + const std::vector & output_scaling_factors, + const bool state_independent_std) : LibtorchArtificialNeuralNet(name, num_inputs, num_outputs, @@ -109,7 +234,8 @@ LibtorchActorNeuralNet::LibtorchActorNeuralNet( false, input_shift_factors, input_scaling_factors, - output_scaling_factors) + output_scaling_factors), + _state_independent_std(state_independent_std) { if (build_on_construct) constructNeuralNetwork(); @@ -117,7 +243,8 @@ LibtorchActorNeuralNet::LibtorchActorNeuralNet( LibtorchActorNeuralNet::LibtorchActorNeuralNet(const Moose::LibtorchActorNeuralNet & nn, const bool build_on_construct) - : LibtorchArtificialNeuralNet(dynamic_cast(nn), false) + : LibtorchArtificialNeuralNet(dynamic_cast(nn), false), + _state_independent_std(nn.stateIndependentStd()) { // We construct the NN architecture if (build_on_construct) @@ -178,7 +305,8 @@ LibtorchActorNeuralNet::constructNeuralNetwork() _device_type, _data_type, true, - _output_scaling_factors); + _output_scaling_factors, + _state_independent_std); register_module("action_head", _action_head); } @@ -254,52 +382,22 @@ LibtorchActorNeuralNet::logProbability(const torch::Tensor & action) void loadLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, const std::string & filename) { - torch::serialize::InputArchive archive; - archive.load_from(filename); - - for (auto & parameter : nn.named_parameters()) - { - torch::Tensor stored_tensor; - if (!readActorStateTensor(archive, parameter.key(), stored_tensor)) - { - if (isOptionalActorParameter(parameter.key())) - { - parameter.value().data().zero_(); - continue; - } - - mooseError("The requested pytorch parameter file could not be loaded. This can either be " - "the result of the file not existing or a misalignment in the generated " - "container and the data in the file. Make sure the dimensions of the generated " - "neural net are the same as the dimensions of the parameters in the input file!\n" - "Missing serialized parameter: ", - parameter.key()); - } - - copyTensor(parameter.value(), stored_tensor); - } - - for (auto & buffer : nn.named_buffers()) - { - torch::Tensor stored_tensor; - if (!readActorStateTensor(archive, buffer.key(), stored_tensor)) - { - if (isOptionalActorBuffer(buffer.key())) - continue; - - mooseError("The requested pytorch parameter file could not be loaded. This can either be " - "the result of the file not existing or a misalignment in the generated " - "container and the data in the file. Make sure the dimensions of the generated " - "neural net are the same as the dimensions of the parameters in the input file!\n" - "Missing serialized buffer: ", - buffer.key()); - } - - copyTensor(buffer.value(), stored_tensor); - } - - nn.synchronizeAffineFactorsFromBuffers(); - nn.actionDistributionHead().synchronizeScalingFactorsFromBuffer(); + std::string archive_error; + if (loadActorStateFromArchive(nn, filename, archive_error)) + return; + + std::string torchscript_error; + if (loadActorStateFromTorchScript(nn, filename, torchscript_error)) + return; + + mooseError("The requested pytorch parameter file could not be loaded. This can either be " + "the result of the file not existing or a misalignment in the generated " + "container and the data in the file. Make sure the dimensions of the generated " + "neural net are the same as the dimensions of the parameters in the input file!\n" + "InputArchive load failed with: ", + archive_error, + "\nTorchScript load failed with: ", + torchscript_error); } bool diff --git a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h index 230ac5009f3e..42472cb929dc 100644 --- a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h +++ b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h @@ -51,6 +51,7 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase void trainController(const LibtorchRLTrajectoryBuffer::TensorBatch & batch); const Moose::LibtorchArtificialNeuralNet & controlNeuralNet() const { return *_control_nn; } + unsigned int seed() const { return _seed; } protected: /// Compute the average eposiodic reward @@ -156,6 +157,9 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase /// The frequency the loss should be printed const unsigned int _loss_print_frequency; + /// Base seed for stochastic optimizers and policy sampling. + const unsigned int _seed; + /// min std::vector _min_values; /// max diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C index 867357f72ac9..870060acbf0a 100644 --- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C +++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C @@ -50,6 +50,11 @@ LibtorchDRLControl::validParams() {}, "Deprecated compatibility parameter. Actor policies now learn their own action " "distribution widths."); + params.addParam( + "state_independent_std", + true, + "If true, interpret the unbounded Gaussian actor as learning one log-std per action " + "dimension. If false, use a state-dependent std head."); return params; } @@ -101,19 +106,21 @@ LibtorchDRLControl::loadControlNeuralNetFromFile(const InputParameters & paramet const auto input_scaling_factors = _observation_history.expandFeatureFactors(_response_scaling_factors); - auto nn = std::make_shared(filename, - num_inputs, - num_outputs, - num_neurons_per_layer, - activation_functions, - minimum_values, - maximum_values, - torch::kCPU, - torch::kDouble, - true, - input_shift_factors, - input_scaling_factors, - _action_scaling_factors); + auto nn = + std::make_shared(filename, + num_inputs, + num_outputs, + num_neurons_per_layer, + activation_functions, + minimum_values, + maximum_values, + torch::kCPU, + torch::kDouble, + true, + input_shift_factors, + input_scaling_factors, + _action_scaling_factors, + getParam("state_independent_std")); try { diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index 7ec2ce8b83fd..0ffcffb14287 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -133,6 +133,11 @@ LibtorchDRLControlTrainer::validParams() {}, "Deprecated compatibility parameter. Actor policies now learn their own action " "distribution widths."); + params.addParam( + "state_independent_std", + true, + "If true, learn one Gaussian log-std parameter per action dimension. If false, learn a " + "state-dependent std head as in the older MOOSE actor implementation."); params.addParam( "entropy_coeff", 0.01, "Entropy bonus coefficient used in the PPO actor loss."); @@ -179,6 +184,7 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _average_episode_reward(0.0), _standardize_advantage(getParam("standardize_advantage")), _loss_print_frequency(getParam("loss_print_frequency")), + _seed(getParam("seed")), _min_values(getParam>("min_control_value")), _max_values(getParam>("max_control_value")), _highest_reward(-1e8), @@ -215,7 +221,7 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par // Fixing the RNG seed to make sure every experiment is the same. // Otherwise sampling / stochastic gradient descent would be different. - torch::manual_seed(getParam("seed")); + torch::manual_seed(_seed); bool filename_valid = isParamValid("filename_base"); const auto input_shift_factors = _observation_history.expandFeatureFactors(_state_shift_factors); @@ -236,7 +242,8 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par true, input_shift_factors, input_scaling_factors, - _action_scaling_factors); + _action_scaling_factors, + getParam("state_independent_std")); // We read parameters for the control neural net if it is requested if (_read_from_file) @@ -391,6 +398,11 @@ LibtorchDRLControlTrainer::trainController(const LibtorchRLTrajectoryBuffer::Ten // fetch the local threads which are available. if (processor_id() == 0) { + // Reset the mini-batch RNG for each outer training step so optimizer shuffling remains + // independent of how rollout sampling happened to be partitioned across MPI ranks. + torch::manual_seed(static_cast(_seed) + + static_cast(_fe_problem.timeStep())); + for (unsigned int epoch = 0; epoch < _num_epochs; ++epoch) { const auto mini_batches = @@ -506,8 +518,7 @@ unsigned int LibtorchDRLControlTrainer::computeNumTransitions(const std::size_t raw_sequence_size) const { unsigned int num_transitions = 0; - for (std::size_t raw_index = 0; - raw_index + _timestep_window < raw_sequence_size; + for (std::size_t raw_index = 0; raw_index + _timestep_window < raw_sequence_size; raw_index += _timestep_window) ++num_transitions; diff --git a/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C b/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C index 9c4ea858ac44..1ab783ff3f9b 100644 --- a/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C +++ b/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C @@ -45,11 +45,6 @@ SamplerNeuralNetControlTransfer::SamplerNeuralNetControlTransfer( void SamplerNeuralNetControlTransfer::initialSetup() { - const auto multi_app = getToMultiApp(); - const dof_id_type n = multi_app->numGlobalApps(); - for (MooseIndex(n) i = 0; i < n; i++) - if (multi_app->hasLocalApp(i)) - torch::manual_seed(i); } void @@ -115,6 +110,15 @@ SamplerNeuralNetControlTransfer::executeToMultiapp() { if (getToMultiApp()->hasLocalApp(_app_index)) { + // Use a rank-invariant seed based on the configured trainer seed, the current main-app + // training step, and the sampler row being executed. This keeps the stochastic rollout path + // tied to the actual sample instead of the transient local app slot chosen by batch-reset. + const uint64_t sample_seed = static_cast(_trainer.seed()) + + static_cast(_global_index) + + static_cast(_sampler_ptr->getNumberOfRows()) * + static_cast(_fe_problem.timeStep()); + torch::manual_seed(sample_seed); + // Get the control neural net from the trainer const Moose::LibtorchArtificialNeuralNet & trainer_nn = _trainer.controlNeuralNet(); diff --git a/unit/src/LibtorchActorNeuralNetTest.C b/unit/src/LibtorchActorNeuralNetTest.C index afa6d602caea..b46c49daadfe 100644 --- a/unit/src/LibtorchActorNeuralNetTest.C +++ b/unit/src/LibtorchActorNeuralNetTest.C @@ -11,6 +11,7 @@ #include "gtest/gtest.h" #include "LibtorchActorNeuralNet.h" +#include "MooseUnitUtils.h" #include @@ -105,7 +106,7 @@ TEST(LibtorchActorNeuralNetTest, boundedBetaLogProbability) EXPECT_NEAR(actual, expected, 1e-12); } -TEST(LibtorchActorNeuralNetTest, gaussianActorUsesPhysicalActionScaling) +TEST(LibtorchActorNeuralNetTest, gaussianActorUsesPhysicalActionScalingAndStateIndependentStd) { constexpr Real input_shift = 1.0; constexpr Real input_scale = 2.0; @@ -130,12 +131,13 @@ TEST(LibtorchActorNeuralNetTest, gaussianActorUsesPhysicalActionScaling) network.actionDistributionHead().primaryModule()->weight.data().fill_(1.5); network.actionDistributionHead().primaryModule()->bias.data().fill_(0.0); - network.actionDistributionHead().secondaryModule()->weight.data().fill_(log_std / 2.0); - network.actionDistributionHead().secondaryModule()->bias.data().fill_(0.0); + network.actionDistributionHead().secondaryModule()->weight.data().fill_(123.0); + network.actionDistributionHead().secondaryModule()->bias.data().fill_(log_std); auto input = torch::tensor({{2.0}}, at::kDouble); const Real deterministic_action = network.evaluate(input, false).item(); EXPECT_NEAR(deterministic_action, expected_deterministic_action, 1e-12); + EXPECT_NEAR(network.stdTensor().item(), std::exp(log_std), 1e-12); const Real unscaled_mean = expected_deterministic_action / action_scale; const Real unscaled_action = physical_action / action_scale; @@ -148,6 +150,122 @@ TEST(LibtorchActorNeuralNetTest, gaussianActorUsesPhysicalActionScaling) const Real actual_log_probability = network.logProbability(action).item(); EXPECT_NEAR(actual_log_probability, expected_log_probability, 1e-12); + + auto second_input = torch::tensor({{4.0}}, at::kDouble); + network.evaluate(second_input, false); + EXPECT_NEAR(network.stdTensor().item(), std::exp(log_std), 1e-12); +} + +TEST(LibtorchActorNeuralNetTest, gaussianActorCanUseStateDependentStdWhenRequested) +{ + TestableLibtorchActorNeuralNet network("test_state_dependent_gaussian", + 1, + 1, + {}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {1.0}, + {2.0}, + {1.0}, + false); + + network.actionDistributionHead().primaryModule()->weight.data().fill_(0.0); + network.actionDistributionHead().primaryModule()->bias.data().fill_(0.0); + network.actionDistributionHead().secondaryModule()->weight.data().fill_(0.5); + network.actionDistributionHead().secondaryModule()->bias.data().fill_(0.0); + + auto first_input = torch::tensor({{2.0}}, at::kDouble); + network.evaluate(first_input, false); + const Real first_std = network.stdTensor().item(); + + auto second_input = torch::tensor({{4.0}}, at::kDouble); + network.evaluate(second_input, false); + const Real second_std = network.stdTensor().item(); + + EXPECT_NEAR(first_std, std::exp(1.0), 1e-12); + EXPECT_NEAR(second_std, std::exp(3.0), 1e-12); + EXPECT_GT(second_std, first_std); +} + +TEST(LibtorchActorNeuralNetTest, loadActorStateAcceptsTorchSaveArchive) +{ + TestableLibtorchActorNeuralNet saved("saved_actor", + 2, + 1, + {2}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {1.0, -2.0}, + {0.5, 3.0}, + {4.0}); + + saved._weights[0]->weight.data() = torch::tensor({{1.0, 2.0}, {3.0, 4.0}}, at::kDouble); + saved._weights[0]->bias.data() = torch::tensor({5.0, 6.0}, at::kDouble); + saved.actionDistributionHead().primaryModule()->weight.data() = + torch::tensor({{7.0, 8.0}}, at::kDouble); + saved.actionDistributionHead().primaryModule()->bias.data() = torch::tensor({9.0}, at::kDouble); + saved.actionDistributionHead().secondaryModule()->weight.data() = + torch::tensor({{-1.5, 2.5}}, at::kDouble); + saved.actionDistributionHead().secondaryModule()->bias.data() = + torch::tensor({-3.5}, at::kDouble); + + Moose::UnitUtils::TempFile archive; + torch::save(std::make_shared(saved), archive.path().string()); + + TestableLibtorchActorNeuralNet restored("restored_actor", + 2, + 1, + {2}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {1.0, -2.0}, + {0.5, 3.0}, + {4.0}); + + Moose::loadLibtorchActorNeuralNetState(restored, archive.path().string()); + + const auto saved_parameters = saved.named_parameters(); + const auto restored_parameters = restored.named_parameters(); + ASSERT_EQ(saved_parameters.size(), restored_parameters.size()); + for (std::size_t i = 0; i < saved_parameters.size(); ++i) + { + EXPECT_EQ(saved_parameters[i].key(), restored_parameters[i].key()); + EXPECT_TRUE(torch::allclose(saved_parameters[i].value(), + restored_parameters[i].value(), + /*rtol=*/0.0, + /*atol=*/0.0)); + } + + const auto saved_buffers = saved.named_buffers(); + const auto restored_buffers = restored.named_buffers(); + ASSERT_EQ(saved_buffers.size(), restored_buffers.size()); + for (std::size_t i = 0; i < saved_buffers.size(); ++i) + { + EXPECT_EQ(saved_buffers[i].key(), restored_buffers[i].key()); + EXPECT_TRUE(torch::allclose(saved_buffers[i].value(), + restored_buffers[i].value(), + /*rtol=*/0.0, + /*atol=*/0.0)); + } + + auto saved_input = torch::tensor({{3.0, -1.0}}, at::kDouble); + auto restored_input = saved_input.clone(); + EXPECT_TRUE(torch::allclose(saved.evaluate(saved_input, false), + restored.evaluate(restored_input, false), + /*rtol=*/0.0, + /*atol=*/0.0)); } #endif From d0fc79a0986361f8a44aa953ccb7d55951cb1ddc Mon Sep 17 00:00:00 2001 From: Peter German Date: Wed, 22 Apr 2026 17:00:31 -0600 Subject: [PATCH 32/51] Separate Beta and Gaussian distributions. --- .../utils/LibtorchActionDistributionHead.h | 154 +++++-- .../libtorch/utils/LibtorchActorNeuralNet.h | 20 +- .../utils/LibtorchActionDistributionHead.C | 385 +++++++++--------- .../libtorch/utils/LibtorchActorNeuralNet.C | 118 ++++-- .../unit/src/TestLibtorchRLCore.C | 4 +- unit/src/LibtorchActorNeuralNetTest.C | 43 +- 6 files changed, 440 insertions(+), 284 deletions(-) diff --git a/framework/include/libtorch/utils/LibtorchActionDistributionHead.h b/framework/include/libtorch/utils/LibtorchActionDistributionHead.h index 700f4173799f..7cb3dddb4199 100644 --- a/framework/include/libtorch/utils/LibtorchActionDistributionHead.h +++ b/framework/include/libtorch/utils/LibtorchActionDistributionHead.h @@ -22,81 +22,147 @@ namespace Moose { /** - * Reusable continuous-action distribution head for actor policies. - * - * Unbounded actions use a Gaussian parameterization. If both minimum and maximum values are - * provided, the head switches to a bounded Beta parameterization. + * Reusable continuous-action distribution interface for actor policies. */ -class LibtorchActionDistributionHead : public torch::nn::Module +class LibtorchActionDistribution : public torch::nn::Module { public: - LibtorchActionDistributionHead(const std::string & name, - unsigned int num_inputs, - unsigned int num_outputs, - const std::vector & minimum_values = {}, - const std::vector & maximum_values = {}, - torch::DeviceType device_type = torch::kCPU, - torch::ScalarType scalar_type = torch::kDouble, - bool build_on_construct = true, - const std::vector & output_scaling_factors = {}, - bool state_independent_std = true); + LibtorchActionDistribution(const std::string & name, + unsigned int num_inputs, + unsigned int num_outputs, + torch::DeviceType device_type = torch::kCPU, + torch::ScalarType scalar_type = torch::kDouble, + const std::vector & output_scaling_factors = {}); + + virtual void initialize() = 0; + + virtual void reset(const torch::Tensor & input) = 0; + + virtual torch::Tensor sample() const = 0; - LibtorchActionDistributionHead(const LibtorchActionDistributionHead & head, - bool build_on_construct = true); + virtual torch::Tensor deterministicAction() const = 0; - void constructHead(); + virtual torch::Tensor logProbability(const torch::Tensor & action) const = 0; - void initialize(); + virtual torch::Tensor entropy() const = 0; + + virtual bool isBounded() const = 0; void synchronizeScalingFactorsFromBuffer(); - void reset(const torch::Tensor & input); +protected: + torch::Tensor prepareFeatures(const torch::Tensor & input) const; + torch::Tensor prepareAction(const torch::Tensor & action) const; + const torch::Tensor & actionScaleTensor() const { return _action_scale_tensor; } + + const std::string _name; + const unsigned int _num_inputs; + const unsigned int _num_outputs; + const torch::DeviceType _device_type; + const torch::ScalarType _data_type; + std::vector _output_scaling_factors; + + torch::Tensor _action_scale_tensor; +}; - torch::Tensor sample() const; +/** + * Gaussian action distribution for unbounded action spaces. + */ +class LibtorchGaussianActionDistribution : public LibtorchActionDistribution +{ +public: + LibtorchGaussianActionDistribution(const std::string & name, + unsigned int num_inputs, + unsigned int num_outputs, + torch::DeviceType device_type = torch::kCPU, + torch::ScalarType scalar_type = torch::kDouble, + bool build_on_construct = true, + const std::vector & output_scaling_factors = {}, + bool state_independent_std = true); - torch::Tensor deterministicAction() const; + virtual void initialize() override; - torch::Tensor logProbability(const torch::Tensor & action) const; + virtual void reset(const torch::Tensor & input) override; - torch::Tensor entropy() const; + virtual torch::Tensor sample() const override; - bool isBounded() const { return !_minimum_values.empty(); } - bool stateIndependentStd() const { return _state_independent_std; } + virtual torch::Tensor deterministicAction() const override; - torch::nn::Linear & primaryModule() { return _primary_parameter_module; } - const torch::nn::Linear & primaryModule() const { return _primary_parameter_module; } + virtual torch::Tensor logProbability(const torch::Tensor & action) const override; - torch::nn::Linear & secondaryModule() { return _secondary_parameter_module; } - const torch::nn::Linear & secondaryModule() const { return _secondary_parameter_module; } + virtual torch::Tensor entropy() const override; + virtual bool isBounded() const override { return false; } + + bool stateIndependentStd() const { return _state_independent_std; } + torch::nn::Linear & meanModule() { return _mean_module; } + const torch::nn::Linear & meanModule() const { return _mean_module; } + torch::nn::Linear & stdModule() { return _std_module; } + const torch::nn::Linear & stdModule() const { return _std_module; } const torch::Tensor & stdTensor() const { return _std_tensor; } + +private: + void constructDistribution(); + + const bool _state_independent_std; + torch::nn::Linear _mean_module{nullptr}; + torch::nn::Linear _std_module{nullptr}; + torch::Tensor _mean; + torch::Tensor _std_tensor; + torch::Tensor _log_std_tensor; +}; + +/** + * Beta action distribution for bounded action spaces. + */ +class LibtorchBetaActionDistribution : public LibtorchActionDistribution +{ +public: + LibtorchBetaActionDistribution(const std::string & name, + unsigned int num_inputs, + unsigned int num_outputs, + const std::vector & minimum_values, + const std::vector & maximum_values, + torch::DeviceType device_type = torch::kCPU, + torch::ScalarType scalar_type = torch::kDouble, + bool build_on_construct = true, + const std::vector & output_scaling_factors = {}); + + virtual void initialize() override; + + virtual void reset(const torch::Tensor & input) override; + + virtual torch::Tensor sample() const override; + + virtual torch::Tensor deterministicAction() const override; + + virtual torch::Tensor logProbability(const torch::Tensor & action) const override; + + virtual torch::Tensor entropy() const override; + + virtual bool isBounded() const override { return true; } + + torch::nn::Linear & alphaModule() { return _alpha_module; } + const torch::nn::Linear & alphaModule() const { return _alpha_module; } + torch::nn::Linear & betaModule() { return _beta_module; } + const torch::nn::Linear & betaModule() const { return _beta_module; } const torch::Tensor & alphaTensor() const { return _alpha_tensor; } const torch::Tensor & betaTensor() const { return _beta_tensor; } private: - const std::string _name; - const unsigned int _num_inputs; - const unsigned int _num_outputs; + void constructDistribution(); + const std::vector _minimum_values; const std::vector _maximum_values; - const torch::DeviceType _device_type; - const torch::ScalarType _data_type; - const bool _state_independent_std; - std::vector _output_scaling_factors; - torch::nn::Linear _primary_parameter_module{nullptr}; - torch::nn::Linear _secondary_parameter_module{nullptr}; - - torch::Tensor _action_scale_tensor; + torch::nn::Linear _alpha_module{nullptr}; + torch::nn::Linear _beta_module{nullptr}; torch::Tensor _min_tensor; torch::Tensor _max_tensor; torch::Tensor _alpha_tensor; torch::Tensor _beta_tensor; torch::Tensor _alpha_beta_tensor; torch::Tensor _log_norm; - torch::Tensor _mean_tensor; - torch::Tensor _std_tensor; - torch::Tensor _log_std_tensor; torch::Tensor _mean; }; diff --git a/framework/include/libtorch/utils/LibtorchActorNeuralNet.h b/framework/include/libtorch/utils/LibtorchActorNeuralNet.h index 61a62fc355c9..615ef5a4f0c3 100644 --- a/framework/include/libtorch/utils/LibtorchActorNeuralNet.h +++ b/framework/include/libtorch/utils/LibtorchActorNeuralNet.h @@ -65,14 +65,19 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet /// Construct the neural network virtual void constructNeuralNetwork() override; - const torch::Tensor & stdTensor() const { return _std_tensor; } + const LibtorchActionDistribution & actionDistribution() const { return *_action_distribution; } + LibtorchActionDistribution & actionDistribution() { return *_action_distribution; } - const torch::Tensor & alphaTensor() const { return _alpha_tensor; } + const LibtorchGaussianActionDistribution * gaussianActionDistributionPtr() const; + LibtorchGaussianActionDistribution * gaussianActionDistributionPtr(); + const LibtorchGaussianActionDistribution & gaussianActionDistribution() const; + LibtorchGaussianActionDistribution & gaussianActionDistribution(); - const torch::Tensor & betaTensor() const { return _beta_tensor; } + const LibtorchBetaActionDistribution * betaActionDistributionPtr() const; + LibtorchBetaActionDistribution * betaActionDistributionPtr(); + const LibtorchBetaActionDistribution & betaActionDistribution() const; + LibtorchBetaActionDistribution & betaActionDistribution(); - const LibtorchActionDistributionHead & actionDistributionHead() const { return *_action_head; } - LibtorchActionDistributionHead & actionDistributionHead() { return *_action_head; } bool stateIndependentStd() const { return _state_independent_std; } void resetDistributionParams(torch::Tensor input); @@ -84,11 +89,8 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet virtual void initializeNeuralNetwork() override; protected: - torch::Tensor _alpha_tensor; - torch::Tensor _beta_tensor; - torch::Tensor _std_tensor; const bool _state_independent_std; - std::shared_ptr _action_head; + std::shared_ptr _action_distribution; }; void to_json(nlohmann::json & json, const Moose::LibtorchActorNeuralNet * const & network); diff --git a/framework/src/libtorch/utils/LibtorchActionDistributionHead.C b/framework/src/libtorch/utils/LibtorchActionDistributionHead.C index ea0f97270bd3..f018e41531a9 100644 --- a/framework/src/libtorch/utils/LibtorchActionDistributionHead.C +++ b/framework/src/libtorch/utils/LibtorchActionDistributionHead.C @@ -43,240 +43,144 @@ normalizeActionScalingFactors(const std::vector & factors, const unsigned namespace Moose { -LibtorchActionDistributionHead::LibtorchActionDistributionHead( +LibtorchActionDistribution::LibtorchActionDistribution( const std::string & name, const unsigned int num_inputs, const unsigned int num_outputs, - const std::vector & minimum_values, - const std::vector & maximum_values, const torch::DeviceType device_type, const torch::ScalarType data_type, - const bool build_on_construct, - const std::vector & output_scaling_factors, - const bool state_independent_std) + const std::vector & output_scaling_factors) : _name(name), _num_inputs(num_inputs), _num_outputs(num_outputs), - _minimum_values(minimum_values), - _maximum_values(maximum_values), _device_type(device_type), _data_type(data_type), - _state_independent_std(state_independent_std), _output_scaling_factors(normalizeActionScalingFactors(output_scaling_factors, num_outputs)) { auto action_scale = _output_scaling_factors; LibtorchUtils::vectorToTensor(action_scale, _action_scale_tensor); _action_scale_tensor = register_buffer( "action_scale", _action_scale_tensor.transpose(0, 1).to(_data_type).to(_device_type)); +} - const bool has_minimum_values = !_minimum_values.empty(); - const bool has_maximum_values = !_maximum_values.empty(); - if (has_minimum_values != has_maximum_values) - mooseError("Bounded action heads require both minimum_values and maximum_values."); - - if (has_minimum_values) - { - if (_minimum_values.size() != _num_outputs || _maximum_values.size() != _num_outputs) - mooseError("The number of minimum_values and maximum_values entries must match the number " - "of action outputs."); - - for (const auto i : make_range(_minimum_values.size())) - if (!(_maximum_values[i] > _minimum_values[i])) - mooseError("maximum_values entries must be strictly greater than minimum_values entries."); - - auto min_value = _minimum_values; - LibtorchUtils::vectorToTensor(min_value, _min_tensor); - _min_tensor = _min_tensor.transpose(0, 1).to(_data_type).to(_device_type); - auto max_value = _maximum_values; - LibtorchUtils::vectorToTensor(max_value, _max_tensor); - _max_tensor = _max_tensor.transpose(0, 1).to(_data_type).to(_device_type); - } - - if (build_on_construct) - constructHead(); +void +LibtorchActionDistribution::synchronizeScalingFactorsFromBuffer() +{ + auto action_scale = + _action_scale_tensor.detach().reshape({-1}).to(torch::kCPU).to(torch::kDouble); + LibtorchUtils::tensorToVector(action_scale, _output_scaling_factors); } -LibtorchActionDistributionHead::LibtorchActionDistributionHead( - const LibtorchActionDistributionHead & head, const bool build_on_construct) - : torch::nn::Module(), - _name(head._name), - _num_inputs(head._num_inputs), - _num_outputs(head._num_outputs), - _minimum_values(head._minimum_values), - _maximum_values(head._maximum_values), - _device_type(head._device_type), - _data_type(head._data_type), - _state_independent_std(head._state_independent_std), - _output_scaling_factors(head._output_scaling_factors) +torch::Tensor +LibtorchActionDistribution::prepareFeatures(const torch::Tensor & input) const { - auto action_scale = _output_scaling_factors; - LibtorchUtils::vectorToTensor(action_scale, _action_scale_tensor); - _action_scale_tensor = register_buffer( - "action_scale", _action_scale_tensor.transpose(0, 1).to(_data_type).to(_device_type)); + auto features = input; + if (_data_type != features.scalar_type()) + features = features.to(_data_type); + if (_device_type != features.device().type()) + features = features.to(_device_type); + return features; +} - if (_minimum_values.size()) - { - auto min_value = _minimum_values; - LibtorchUtils::vectorToTensor(min_value, _min_tensor); - _min_tensor = _min_tensor.transpose(0, 1).to(_data_type).to(_device_type); - auto max_value = _maximum_values; - LibtorchUtils::vectorToTensor(max_value, _max_tensor); - _max_tensor = _max_tensor.transpose(0, 1).to(_data_type).to(_device_type); - } +torch::Tensor +LibtorchActionDistribution::prepareAction(const torch::Tensor & action) const +{ + auto scaled_action = action; + if (_data_type != scaled_action.scalar_type()) + scaled_action = scaled_action.to(_data_type); + if (_device_type != scaled_action.device().type()) + scaled_action = scaled_action.to(_device_type); + return scaled_action; +} +LibtorchGaussianActionDistribution::LibtorchGaussianActionDistribution( + const std::string & name, + const unsigned int num_inputs, + const unsigned int num_outputs, + const torch::DeviceType device_type, + const torch::ScalarType data_type, + const bool build_on_construct, + const std::vector & output_scaling_factors, + const bool state_independent_std) + : LibtorchActionDistribution( + name, num_inputs, num_outputs, device_type, data_type, output_scaling_factors), + _state_independent_std(state_independent_std) +{ if (build_on_construct) - { - constructHead(); - const auto & from_params = head.named_parameters(); - auto to_params = this->named_parameters(); - for (const auto param_i : make_range(from_params.size())) - to_params[param_i].value().data() = from_params[param_i].value().data().clone(); - - const auto & from_buffers = head.named_buffers(); - auto to_buffers = this->named_buffers(); - for (const auto buffer_i : make_range(from_buffers.size())) - to_buffers[buffer_i].value().data() = from_buffers[buffer_i].value().data().clone(); - } + constructDistribution(); } void -LibtorchActionDistributionHead::constructHead() +LibtorchGaussianActionDistribution::constructDistribution() { - const auto primary_name = isBounded() ? "alpha" : "mean"; - const auto secondary_name = isBounded() ? "beta" : "std"; - - _primary_parameter_module = register_module( - primary_name, - torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true))); - _secondary_parameter_module = register_module( - secondary_name, - torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true))); - - _primary_parameter_module->to(_device_type, _data_type); - _secondary_parameter_module->to(_device_type, _data_type); + _mean_module = register_module( + "mean", torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true))); + _std_module = register_module( + "std", torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true))); + + _mean_module->to(_device_type, _data_type); + _std_module->to(_device_type, _data_type); } void -LibtorchActionDistributionHead::initialize() +LibtorchGaussianActionDistribution::initialize() { - const auto primary_sizes = _primary_parameter_module->weight.sizes(); - const auto primary_max_dim_size = *std::max_element(primary_sizes.begin(), primary_sizes.end()); - torch::nn::init::orthogonal_(_primary_parameter_module->weight, 1.0 / primary_max_dim_size); - torch::nn::init::zeros_(_primary_parameter_module->bias); + const auto mean_sizes = _mean_module->weight.sizes(); + const auto mean_max_dim_size = *std::max_element(mean_sizes.begin(), mean_sizes.end()); + torch::nn::init::orthogonal_(_mean_module->weight, 1.0 / mean_max_dim_size); + torch::nn::init::zeros_(_mean_module->bias); - if (!isBounded() && _state_independent_std) + if (_state_independent_std) { - // Match the TorchRL reference more closely: learn one global log-std per action dimension - // instead of conditioning the exploration scale on the current state features. - _secondary_parameter_module->weight.data().zero_(); - torch::nn::init::zeros_(_secondary_parameter_module->bias); + _std_module->weight.data().zero_(); + torch::nn::init::zeros_(_std_module->bias); return; } - const auto secondary_sizes = _secondary_parameter_module->weight.sizes(); - const auto secondary_max_dim_size = - *std::max_element(secondary_sizes.begin(), secondary_sizes.end()); - torch::nn::init::orthogonal_(_secondary_parameter_module->weight, 1.0 / secondary_max_dim_size); - torch::nn::init::zeros_(_secondary_parameter_module->bias); -} - -void -LibtorchActionDistributionHead::synchronizeScalingFactorsFromBuffer() -{ - auto action_scale = - _action_scale_tensor.detach().reshape({-1}).to(torch::kCPU).to(torch::kDouble); - LibtorchUtils::tensorToVector(action_scale, _output_scaling_factors); + const auto std_sizes = _std_module->weight.sizes(); + const auto std_max_dim_size = *std::max_element(std_sizes.begin(), std_sizes.end()); + torch::nn::init::orthogonal_(_std_module->weight, 1.0 / std_max_dim_size); + torch::nn::init::zeros_(_std_module->bias); } void -LibtorchActionDistributionHead::reset(const torch::Tensor & input) +LibtorchGaussianActionDistribution::reset(const torch::Tensor & input) { - auto features = input; - if (_data_type != features.scalar_type()) - features = features.to(_data_type); - if (_device_type != features.device().type()) - features = features.to(_device_type); + const auto features = prepareFeatures(input); + _mean = _mean_module->forward(features); - if (isBounded()) - { - const auto alpha = _primary_parameter_module->forward(features); - _alpha_tensor = torch::log(torch::exp(alpha) + 1.0) + 1.0; - const auto beta = _secondary_parameter_module->forward(features); - _beta_tensor = torch::log(torch::exp(beta) + 1.0) + 1.0; - - _alpha_beta_tensor = torch::clamp_min(_alpha_tensor + _beta_tensor, 1e-8); - _mean = _alpha_tensor / _alpha_beta_tensor; - _log_norm = - at::lgamma(_alpha_tensor) + at::lgamma(_beta_tensor) - at::lgamma(_alpha_beta_tensor); - return; - } - - _mean = _primary_parameter_module->forward(features); if (_state_independent_std) { if (_mean.dim() <= 1) - _log_std_tensor = _secondary_parameter_module->bias; + _log_std_tensor = _std_module->bias; else - _log_std_tensor = _secondary_parameter_module->bias.view({1, -1}).expand(_mean.sizes()); + _log_std_tensor = _std_module->bias.view({1, -1}).expand(_mean.sizes()); } else - _log_std_tensor = _secondary_parameter_module->forward(features); + _log_std_tensor = _std_module->forward(features); + _log_std_tensor = torch::clamp(_log_std_tensor, std::log(1e-12), -std::log(1e-12)); _std_tensor = torch::exp(_log_std_tensor); } torch::Tensor -LibtorchActionDistributionHead::sample() const +LibtorchGaussianActionDistribution::sample() const { - if (isBounded()) - { - const auto alpha_sample = at::_standard_gamma(_alpha_tensor); - const auto beta_sample = at::_standard_gamma(_beta_tensor); - const auto sampled = alpha_sample / (alpha_sample + beta_sample); - return (_min_tensor + (_max_tensor - _min_tensor) * sampled) * _action_scale_tensor; - } - - return at::normal(_mean, _std_tensor) * _action_scale_tensor; + return at::normal(_mean, _std_tensor) * actionScaleTensor(); } torch::Tensor -LibtorchActionDistributionHead::deterministicAction() const +LibtorchGaussianActionDistribution::deterministicAction() const { - if (isBounded()) - return (_min_tensor + (_max_tensor - _min_tensor) * _mean) * _action_scale_tensor; - - return _mean * _action_scale_tensor; + return _mean * actionScaleTensor(); } torch::Tensor -LibtorchActionDistributionHead::logProbability(const torch::Tensor & action) const +LibtorchGaussianActionDistribution::logProbability(const torch::Tensor & action) const { - auto scaled_action = action; - if (_data_type != scaled_action.scalar_type()) - scaled_action = scaled_action.to(_data_type); - if (_device_type != scaled_action.device().type()) - scaled_action = scaled_action.to(_device_type); - - const auto log_action_scale = torch::log(torch::abs(_action_scale_tensor)); - const auto unscaled_action = scaled_action / _action_scale_tensor; - - if (isBounded()) - { - const auto scale = torch::clamp_min(_max_tensor - _min_tensor, 1e-8); - const auto normalized = (unscaled_action - _min_tensor) / scale; - const auto clipped = torch::clamp(normalized, 1e-8, 1.0 - 1e-8); - auto log_prob = (_alpha_tensor - 1.0) * torch::log(clipped) + - (_beta_tensor - 1.0) * torch::log1p(-clipped) - _log_norm - torch::log(scale) - - log_action_scale; - - const auto out_of_bounds = (normalized < 0.0) | (normalized > 1.0); - if (out_of_bounds.any().item()) - log_prob = torch::where(out_of_bounds, - torch::full_like(log_prob, -std::numeric_limits::infinity()), - log_prob); - - return log_prob; - } + const auto scaled_action = prepareAction(action); + const auto log_action_scale = torch::log(torch::abs(actionScaleTensor())); + const auto unscaled_action = scaled_action / actionScaleTensor(); constexpr Real pi = 3.14159265358979323846; const torch::Tensor var = _std_tensor * _std_tensor; @@ -285,22 +189,135 @@ LibtorchActionDistributionHead::logProbability(const torch::Tensor & action) con } torch::Tensor -LibtorchActionDistributionHead::entropy() const +LibtorchGaussianActionDistribution::entropy() const { - const auto log_action_scale = torch::log(torch::abs(_action_scale_tensor)); - if (isBounded()) - { - const auto scale = torch::clamp_min(_max_tensor - _min_tensor, 1e-8); - return _log_norm - (_beta_tensor - 1.0) * torch::digamma(_beta_tensor) - - (_alpha_tensor - 1.0) * torch::digamma(_alpha_tensor) + - (_alpha_beta_tensor - 2.0) * torch::digamma(_alpha_beta_tensor) + torch::log(scale) + - log_action_scale; - } - + const auto log_action_scale = torch::log(torch::abs(actionScaleTensor())); constexpr Real pi = 3.14159265358979323846; return 0.5 * std::log(2.0 * pi) + _log_std_tensor + 0.5 + log_action_scale; } +LibtorchBetaActionDistribution::LibtorchBetaActionDistribution( + const std::string & name, + const unsigned int num_inputs, + const unsigned int num_outputs, + const std::vector & minimum_values, + const std::vector & maximum_values, + const torch::DeviceType device_type, + const torch::ScalarType data_type, + const bool build_on_construct, + const std::vector & output_scaling_factors) + : LibtorchActionDistribution( + name, num_inputs, num_outputs, device_type, data_type, output_scaling_factors), + _minimum_values(minimum_values), + _maximum_values(maximum_values) +{ + if (_minimum_values.size() != _num_outputs || _maximum_values.size() != _num_outputs) + mooseError("The number of minimum_values and maximum_values entries must match the number " + "of action outputs."); + + for (const auto i : make_range(_minimum_values.size())) + if (!(_maximum_values[i] > _minimum_values[i])) + mooseError("maximum_values entries must be strictly greater than minimum_values entries."); + + auto min_value = _minimum_values; + LibtorchUtils::vectorToTensor(min_value, _min_tensor); + _min_tensor = _min_tensor.transpose(0, 1).to(_data_type).to(_device_type); + auto max_value = _maximum_values; + LibtorchUtils::vectorToTensor(max_value, _max_tensor); + _max_tensor = _max_tensor.transpose(0, 1).to(_data_type).to(_device_type); + + if (build_on_construct) + constructDistribution(); +} + +void +LibtorchBetaActionDistribution::constructDistribution() +{ + _alpha_module = register_module( + "alpha", torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true))); + _beta_module = register_module( + "beta", torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true))); + + _alpha_module->to(_device_type, _data_type); + _beta_module->to(_device_type, _data_type); +} + +void +LibtorchBetaActionDistribution::initialize() +{ + const auto alpha_sizes = _alpha_module->weight.sizes(); + const auto alpha_max_dim_size = *std::max_element(alpha_sizes.begin(), alpha_sizes.end()); + torch::nn::init::orthogonal_(_alpha_module->weight, 1.0 / alpha_max_dim_size); + torch::nn::init::zeros_(_alpha_module->bias); + + const auto beta_sizes = _beta_module->weight.sizes(); + const auto beta_max_dim_size = *std::max_element(beta_sizes.begin(), beta_sizes.end()); + torch::nn::init::orthogonal_(_beta_module->weight, 1.0 / beta_max_dim_size); + torch::nn::init::zeros_(_beta_module->bias); +} + +void +LibtorchBetaActionDistribution::reset(const torch::Tensor & input) +{ + const auto features = prepareFeatures(input); + const auto alpha = _alpha_module->forward(features); + _alpha_tensor = torch::log(torch::exp(alpha) + 1.0) + 1.0; + const auto beta = _beta_module->forward(features); + _beta_tensor = torch::log(torch::exp(beta) + 1.0) + 1.0; + + _alpha_beta_tensor = torch::clamp_min(_alpha_tensor + _beta_tensor, 1e-8); + _mean = _alpha_tensor / _alpha_beta_tensor; + _log_norm = at::lgamma(_alpha_tensor) + at::lgamma(_beta_tensor) - at::lgamma(_alpha_beta_tensor); +} + +torch::Tensor +LibtorchBetaActionDistribution::sample() const +{ + const auto alpha_sample = at::_standard_gamma(_alpha_tensor); + const auto beta_sample = at::_standard_gamma(_beta_tensor); + const auto sampled = alpha_sample / (alpha_sample + beta_sample); + return (_min_tensor + (_max_tensor - _min_tensor) * sampled) * actionScaleTensor(); +} + +torch::Tensor +LibtorchBetaActionDistribution::deterministicAction() const +{ + return (_min_tensor + (_max_tensor - _min_tensor) * _mean) * actionScaleTensor(); +} + +torch::Tensor +LibtorchBetaActionDistribution::logProbability(const torch::Tensor & action) const +{ + const auto scaled_action = prepareAction(action); + const auto log_action_scale = torch::log(torch::abs(actionScaleTensor())); + const auto unscaled_action = scaled_action / actionScaleTensor(); + const auto scale = torch::clamp_min(_max_tensor - _min_tensor, 1e-8); + const auto normalized = (unscaled_action - _min_tensor) / scale; + const auto clipped = torch::clamp(normalized, 1e-8, 1.0 - 1e-8); + auto log_prob = (_alpha_tensor - 1.0) * torch::log(clipped) + + (_beta_tensor - 1.0) * torch::log1p(-clipped) - _log_norm - torch::log(scale) - + log_action_scale; + + const auto out_of_bounds = (normalized < 0.0) | (normalized > 1.0); + if (out_of_bounds.any().item()) + log_prob = torch::where(out_of_bounds, + torch::full_like(log_prob, -std::numeric_limits::infinity()), + log_prob); + + return log_prob; +} + +torch::Tensor +LibtorchBetaActionDistribution::entropy() const +{ + const auto log_action_scale = torch::log(torch::abs(actionScaleTensor())); + const auto scale = torch::clamp_min(_max_tensor - _min_tensor, 1e-8); + return _log_norm - (_beta_tensor - 1.0) * torch::digamma(_beta_tensor) - + (_alpha_tensor - 1.0) * torch::digamma(_alpha_tensor) + + (_alpha_beta_tensor - 2.0) * torch::digamma(_alpha_beta_tensor) + torch::log(scale) + + log_action_scale; +} + } // namespace Moose #endif diff --git a/framework/src/libtorch/utils/LibtorchActorNeuralNet.C b/framework/src/libtorch/utils/LibtorchActorNeuralNet.C index b011723d9565..1a11f2e46cd7 100644 --- a/framework/src/libtorch/utils/LibtorchActorNeuralNet.C +++ b/framework/src/libtorch/utils/LibtorchActorNeuralNet.C @@ -137,7 +137,7 @@ loadActorStateFromArchive(Moose::LibtorchActorNeuralNet & nn, } nn.synchronizeAffineFactorsFromBuffers(); - nn.actionDistributionHead().synchronizeScalingFactorsFromBuffer(); + nn.actionDistribution().synchronizeScalingFactorsFromBuffer(); return true; } catch (const c10::Error & e) @@ -192,7 +192,7 @@ loadActorStateFromTorchScript(Moose::LibtorchActorNeuralNet & nn, } nn.synchronizeAffineFactorsFromBuffers(); - nn.actionDistributionHead().synchronizeScalingFactorsFromBuffer(); + nn.actionDistribution().synchronizeScalingFactorsFromBuffer(); return true; } catch (const c10::Error & e) @@ -278,7 +278,7 @@ LibtorchActorNeuralNet::initializeNeuralNetwork() torch::nn::init::zeros_(_weights[i]->bias); } - _action_head->initialize(); + _action_distribution->initialize(); } void @@ -297,32 +297,102 @@ LibtorchActorNeuralNet::constructNeuralNetwork() inp_neurons = _num_neurons_per_layer[i]; } - _action_head = std::make_shared("action_head", - inp_neurons, - _num_outputs, - _minimum_values, - _maximum_values, - _device_type, - _data_type, - true, - _output_scaling_factors, - _state_independent_std); - register_module("action_head", _action_head); + if (_minimum_values.empty() && _maximum_values.empty()) + _action_distribution = + std::make_shared("action_distribution", + inp_neurons, + _num_outputs, + _device_type, + _data_type, + true, + _output_scaling_factors, + _state_independent_std); + else + _action_distribution = + std::make_shared("action_distribution", + inp_neurons, + _num_outputs, + _minimum_values, + _maximum_values, + _device_type, + _data_type, + true, + _output_scaling_factors); + + // Keep the serialized module name stable so existing checkpoints continue to load. + register_module("action_head", _action_distribution); } torch::Tensor LibtorchActorNeuralNet::entropy() { - return _action_head->entropy(); + return _action_distribution->entropy(); +} + +const LibtorchGaussianActionDistribution * +LibtorchActorNeuralNet::gaussianActionDistributionPtr() const +{ + return dynamic_cast(_action_distribution.get()); +} + +LibtorchGaussianActionDistribution * +LibtorchActorNeuralNet::gaussianActionDistributionPtr() +{ + return dynamic_cast(_action_distribution.get()); +} + +const LibtorchGaussianActionDistribution & +LibtorchActorNeuralNet::gaussianActionDistribution() const +{ + const auto * distribution = gaussianActionDistributionPtr(); + if (!distribution) + mooseError("Requested a Gaussian action distribution from a bounded actor."); + return *distribution; +} + +LibtorchGaussianActionDistribution & +LibtorchActorNeuralNet::gaussianActionDistribution() +{ + auto * distribution = gaussianActionDistributionPtr(); + if (!distribution) + mooseError("Requested a Gaussian action distribution from a bounded actor."); + return *distribution; +} + +const LibtorchBetaActionDistribution * +LibtorchActorNeuralNet::betaActionDistributionPtr() const +{ + return dynamic_cast(_action_distribution.get()); +} + +LibtorchBetaActionDistribution * +LibtorchActorNeuralNet::betaActionDistributionPtr() +{ + return dynamic_cast(_action_distribution.get()); +} + +const LibtorchBetaActionDistribution & +LibtorchActorNeuralNet::betaActionDistribution() const +{ + const auto * distribution = betaActionDistributionPtr(); + if (!distribution) + mooseError("Requested a Beta action distribution from an unbounded actor."); + return *distribution; +} + +LibtorchBetaActionDistribution & +LibtorchActorNeuralNet::betaActionDistribution() +{ + auto * distribution = betaActionDistributionPtr(); + if (!distribution) + mooseError("Requested a Beta action distribution from an unbounded actor."); + return *distribution; } void LibtorchActorNeuralNet::resetDistributionParams(torch::Tensor input) { - _action_head->reset(input); - _alpha_tensor = _action_head->alphaTensor(); - _beta_tensor = _action_head->betaTensor(); - _std_tensor = _action_head->stdTensor(); + _action_distribution->reset(input); } torch::Tensor @@ -364,19 +434,19 @@ LibtorchActorNeuralNet::evaluate(torch::Tensor & x, bool sampled) if (sampled) return sample(); - return _action_head->deterministicAction(); + return _action_distribution->deterministicAction(); } torch::Tensor LibtorchActorNeuralNet::sample() { - return _action_head->sample(); + return _action_distribution->sample(); } torch::Tensor LibtorchActorNeuralNet::logProbability(const torch::Tensor & action) { - return _action_head->logProbability(action); + return _action_distribution->logProbability(action); } void @@ -423,7 +493,7 @@ loadLegacyLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, const std::string & filename, const std::vector & action_standard_deviations) { - if (nn.actionDistributionHead().isBounded()) + if (nn.actionDistribution().isBounded()) mooseError("Legacy deterministic DRL checkpoints are only supported for unbounded actors."); const auto legacy_std = action_standard_deviations.empty() @@ -482,7 +552,7 @@ loadLegacyLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, } nn.synchronizeAffineFactorsFromBuffers(); - nn.actionDistributionHead().synchronizeScalingFactorsFromBuffer(); + nn.actionDistribution().synchronizeScalingFactorsFromBuffer(); } } diff --git a/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C b/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C index 1ec9cef14108..d53dafada6ae 100644 --- a/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C +++ b/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C @@ -71,8 +71,8 @@ TEST(LibtorchRLCoreTest, PPOLossUsesStoredLogProbabilityAndValueTarget) constexpr Real pi = 3.14159265358979323846; Moose::LibtorchActorNeuralNet policy_network("policy", 1, 1, {}, {"linear"}); - policy_network.actionDistributionHead().primaryModule()->weight.data().fill_(0.0); - policy_network.actionDistributionHead().secondaryModule()->weight.data().fill_(0.0); + policy_network.gaussianActionDistribution().meanModule()->weight.data().fill_(0.0); + policy_network.gaussianActionDistribution().stdModule()->weight.data().fill_(0.0); Moose::LibtorchArtificialNeuralNet value_network("value", 1, 1, {}, {"linear"}); auto value_params = value_network.named_parameters(); diff --git a/unit/src/LibtorchActorNeuralNetTest.C b/unit/src/LibtorchActorNeuralNetTest.C index b46c49daadfe..bc43670042ff 100644 --- a/unit/src/LibtorchActorNeuralNetTest.C +++ b/unit/src/LibtorchActorNeuralNetTest.C @@ -84,16 +84,16 @@ TEST(LibtorchActorNeuralNetTest, boundedBetaLogProbability) network._weights[0]->weight.data().fill_(0.0); network._weights[0]->bias.data().fill_(1.0); - network.actionDistributionHead().primaryModule()->weight.data().fill_( + network.betaActionDistribution().alphaModule()->weight.data().fill_( inverseSoftplusPlusOne(alpha_target)); - network.actionDistributionHead().secondaryModule()->weight.data().fill_( + network.betaActionDistribution().betaModule()->weight.data().fill_( inverseSoftplusPlusOne(beta_target)); auto input = torch::zeros({1, 1}, at::kDouble); network.evaluate(input, false); - const Real alpha = network.alphaTensor().item(); - const Real beta = network.betaTensor().item(); + const Real alpha = network.betaActionDistribution().alphaTensor().item(); + const Real beta = network.betaActionDistribution().betaTensor().item(); const Real normalized = (action_value - min_value) / (max_value - min_value); const Real log_norm = std::lgamma(alpha) + std::lgamma(beta) - std::lgamma(alpha + beta); const Real expected = (alpha - 1.0) * std::log(normalized) + @@ -129,15 +129,16 @@ TEST(LibtorchActorNeuralNetTest, gaussianActorUsesPhysicalActionScalingAndStateI {input_scale}, {action_scale}); - network.actionDistributionHead().primaryModule()->weight.data().fill_(1.5); - network.actionDistributionHead().primaryModule()->bias.data().fill_(0.0); - network.actionDistributionHead().secondaryModule()->weight.data().fill_(123.0); - network.actionDistributionHead().secondaryModule()->bias.data().fill_(log_std); + network.gaussianActionDistribution().meanModule()->weight.data().fill_(1.5); + network.gaussianActionDistribution().meanModule()->bias.data().fill_(0.0); + network.gaussianActionDistribution().stdModule()->weight.data().fill_(123.0); + network.gaussianActionDistribution().stdModule()->bias.data().fill_(log_std); auto input = torch::tensor({{2.0}}, at::kDouble); const Real deterministic_action = network.evaluate(input, false).item(); EXPECT_NEAR(deterministic_action, expected_deterministic_action, 1e-12); - EXPECT_NEAR(network.stdTensor().item(), std::exp(log_std), 1e-12); + EXPECT_NEAR( + network.gaussianActionDistribution().stdTensor().item(), std::exp(log_std), 1e-12); const Real unscaled_mean = expected_deterministic_action / action_scale; const Real unscaled_action = physical_action / action_scale; @@ -153,7 +154,8 @@ TEST(LibtorchActorNeuralNetTest, gaussianActorUsesPhysicalActionScalingAndStateI auto second_input = torch::tensor({{4.0}}, at::kDouble); network.evaluate(second_input, false); - EXPECT_NEAR(network.stdTensor().item(), std::exp(log_std), 1e-12); + EXPECT_NEAR( + network.gaussianActionDistribution().stdTensor().item(), std::exp(log_std), 1e-12); } TEST(LibtorchActorNeuralNetTest, gaussianActorCanUseStateDependentStdWhenRequested) @@ -173,18 +175,18 @@ TEST(LibtorchActorNeuralNetTest, gaussianActorCanUseStateDependentStdWhenRequest {1.0}, false); - network.actionDistributionHead().primaryModule()->weight.data().fill_(0.0); - network.actionDistributionHead().primaryModule()->bias.data().fill_(0.0); - network.actionDistributionHead().secondaryModule()->weight.data().fill_(0.5); - network.actionDistributionHead().secondaryModule()->bias.data().fill_(0.0); + network.gaussianActionDistribution().meanModule()->weight.data().fill_(0.0); + network.gaussianActionDistribution().meanModule()->bias.data().fill_(0.0); + network.gaussianActionDistribution().stdModule()->weight.data().fill_(0.5); + network.gaussianActionDistribution().stdModule()->bias.data().fill_(0.0); auto first_input = torch::tensor({{2.0}}, at::kDouble); network.evaluate(first_input, false); - const Real first_std = network.stdTensor().item(); + const Real first_std = network.gaussianActionDistribution().stdTensor().item(); auto second_input = torch::tensor({{4.0}}, at::kDouble); network.evaluate(second_input, false); - const Real second_std = network.stdTensor().item(); + const Real second_std = network.gaussianActionDistribution().stdTensor().item(); EXPECT_NEAR(first_std, std::exp(1.0), 1e-12); EXPECT_NEAR(second_std, std::exp(3.0), 1e-12); @@ -209,13 +211,12 @@ TEST(LibtorchActorNeuralNetTest, loadActorStateAcceptsTorchSaveArchive) saved._weights[0]->weight.data() = torch::tensor({{1.0, 2.0}, {3.0, 4.0}}, at::kDouble); saved._weights[0]->bias.data() = torch::tensor({5.0, 6.0}, at::kDouble); - saved.actionDistributionHead().primaryModule()->weight.data() = + saved.gaussianActionDistribution().meanModule()->weight.data() = torch::tensor({{7.0, 8.0}}, at::kDouble); - saved.actionDistributionHead().primaryModule()->bias.data() = torch::tensor({9.0}, at::kDouble); - saved.actionDistributionHead().secondaryModule()->weight.data() = + saved.gaussianActionDistribution().meanModule()->bias.data() = torch::tensor({9.0}, at::kDouble); + saved.gaussianActionDistribution().stdModule()->weight.data() = torch::tensor({{-1.5, 2.5}}, at::kDouble); - saved.actionDistributionHead().secondaryModule()->bias.data() = - torch::tensor({-3.5}, at::kDouble); + saved.gaussianActionDistribution().stdModule()->bias.data() = torch::tensor({-3.5}, at::kDouble); Moose::UnitUtils::TempFile archive; torch::save(std::make_shared(saved), archive.path().string()); From 38db9600236a2c1f2a248f7ac56590794e469685 Mon Sep 17 00:00:00 2001 From: Peter German Date: Wed, 22 Apr 2026 17:15:45 -0600 Subject: [PATCH 33/51] Move action distribution to STM. --- .../utils/LibtorchActionDistribution.h | 171 ++++++ .../libtorch/utils/LibtorchActorNeuralNet.h | 132 +++++ .../utils/LibtorchActionDistribution.C | 323 ++++++++++ .../libtorch/utils/LibtorchActorNeuralNet.C | 560 ++++++++++++++++++ .../unit/src/TestLibtorchActorNeuralNet.C | 272 +++++++++ 5 files changed, 1458 insertions(+) create mode 100644 modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h create mode 100644 modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h create mode 100644 modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C create mode 100644 modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C create mode 100644 modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h new file mode 100644 index 000000000000..7cb3dddb4199 --- /dev/null +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h @@ -0,0 +1,171 @@ +//* This file is part of the MOOSE framework +//* https://www.mooseframework.org +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#pragma once + +#include + +#include "MooseTypes.h" + +#include +#include + +namespace Moose +{ + +/** + * Reusable continuous-action distribution interface for actor policies. + */ +class LibtorchActionDistribution : public torch::nn::Module +{ +public: + LibtorchActionDistribution(const std::string & name, + unsigned int num_inputs, + unsigned int num_outputs, + torch::DeviceType device_type = torch::kCPU, + torch::ScalarType scalar_type = torch::kDouble, + const std::vector & output_scaling_factors = {}); + + virtual void initialize() = 0; + + virtual void reset(const torch::Tensor & input) = 0; + + virtual torch::Tensor sample() const = 0; + + virtual torch::Tensor deterministicAction() const = 0; + + virtual torch::Tensor logProbability(const torch::Tensor & action) const = 0; + + virtual torch::Tensor entropy() const = 0; + + virtual bool isBounded() const = 0; + + void synchronizeScalingFactorsFromBuffer(); + +protected: + torch::Tensor prepareFeatures(const torch::Tensor & input) const; + torch::Tensor prepareAction(const torch::Tensor & action) const; + const torch::Tensor & actionScaleTensor() const { return _action_scale_tensor; } + + const std::string _name; + const unsigned int _num_inputs; + const unsigned int _num_outputs; + const torch::DeviceType _device_type; + const torch::ScalarType _data_type; + std::vector _output_scaling_factors; + + torch::Tensor _action_scale_tensor; +}; + +/** + * Gaussian action distribution for unbounded action spaces. + */ +class LibtorchGaussianActionDistribution : public LibtorchActionDistribution +{ +public: + LibtorchGaussianActionDistribution(const std::string & name, + unsigned int num_inputs, + unsigned int num_outputs, + torch::DeviceType device_type = torch::kCPU, + torch::ScalarType scalar_type = torch::kDouble, + bool build_on_construct = true, + const std::vector & output_scaling_factors = {}, + bool state_independent_std = true); + + virtual void initialize() override; + + virtual void reset(const torch::Tensor & input) override; + + virtual torch::Tensor sample() const override; + + virtual torch::Tensor deterministicAction() const override; + + virtual torch::Tensor logProbability(const torch::Tensor & action) const override; + + virtual torch::Tensor entropy() const override; + + virtual bool isBounded() const override { return false; } + + bool stateIndependentStd() const { return _state_independent_std; } + torch::nn::Linear & meanModule() { return _mean_module; } + const torch::nn::Linear & meanModule() const { return _mean_module; } + torch::nn::Linear & stdModule() { return _std_module; } + const torch::nn::Linear & stdModule() const { return _std_module; } + const torch::Tensor & stdTensor() const { return _std_tensor; } + +private: + void constructDistribution(); + + const bool _state_independent_std; + torch::nn::Linear _mean_module{nullptr}; + torch::nn::Linear _std_module{nullptr}; + torch::Tensor _mean; + torch::Tensor _std_tensor; + torch::Tensor _log_std_tensor; +}; + +/** + * Beta action distribution for bounded action spaces. + */ +class LibtorchBetaActionDistribution : public LibtorchActionDistribution +{ +public: + LibtorchBetaActionDistribution(const std::string & name, + unsigned int num_inputs, + unsigned int num_outputs, + const std::vector & minimum_values, + const std::vector & maximum_values, + torch::DeviceType device_type = torch::kCPU, + torch::ScalarType scalar_type = torch::kDouble, + bool build_on_construct = true, + const std::vector & output_scaling_factors = {}); + + virtual void initialize() override; + + virtual void reset(const torch::Tensor & input) override; + + virtual torch::Tensor sample() const override; + + virtual torch::Tensor deterministicAction() const override; + + virtual torch::Tensor logProbability(const torch::Tensor & action) const override; + + virtual torch::Tensor entropy() const override; + + virtual bool isBounded() const override { return true; } + + torch::nn::Linear & alphaModule() { return _alpha_module; } + const torch::nn::Linear & alphaModule() const { return _alpha_module; } + torch::nn::Linear & betaModule() { return _beta_module; } + const torch::nn::Linear & betaModule() const { return _beta_module; } + const torch::Tensor & alphaTensor() const { return _alpha_tensor; } + const torch::Tensor & betaTensor() const { return _beta_tensor; } + +private: + void constructDistribution(); + + const std::vector _minimum_values; + const std::vector _maximum_values; + + torch::nn::Linear _alpha_module{nullptr}; + torch::nn::Linear _beta_module{nullptr}; + torch::Tensor _min_tensor; + torch::Tensor _max_tensor; + torch::Tensor _alpha_tensor; + torch::Tensor _beta_tensor; + torch::Tensor _alpha_beta_tensor; + torch::Tensor _log_norm; + torch::Tensor _mean; +}; + +} // namespace Moose + +#endif diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h new file mode 100644 index 000000000000..cd4cbefecf2a --- /dev/null +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h @@ -0,0 +1,132 @@ +//* This file is part of the MOOSE framework +//* https://www.mooseframework.org +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#pragma once + +#include +#include +#include "LibtorchActionDistribution.h" +#include "LibtorchArtificialNeuralNet.h" + +namespace Moose +{ + +class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet +{ +public: + /** + * Construct using input parameters + * @param name Name of the neural network + * @param num_inputs The number of input neurons/parameters + * @param num_neurons_per_layer Number of neurons per hidden layer + * @param num_outputs The number of output neurons + */ + LibtorchActorNeuralNet(const std::string name, + const unsigned int num_inputs, + const unsigned int num_outputs, + const std::vector & num_neurons_per_layer, + const std::vector & activation_function = {"relu"}, + const std::vector & minimum_values = {}, + const std::vector & maximum_values = {}, + const torch::DeviceType device_type = torch::kCPU, + const torch::ScalarType scalar_type = torch::kDouble, + const bool build_on_construct = true, + const std::vector & input_shift_factors = {}, + const std::vector & input_scaling_factors = {}, + const std::vector & output_scaling_factors = {}, + const bool state_independent_std = true); + + /** + * Copy construct an artificial neural network + * @param nn The neural network which needs to be copied + */ + LibtorchActorNeuralNet(const Moose::LibtorchActorNeuralNet & nn, + const bool build_on_construct = true); + + /** + * Overriding the forward substitution function for the neural network, unfortunately + * this cannot be const since it creates a graph in the background + * @param x Input tensor for the evaluation + */ + virtual torch::Tensor forward(const torch::Tensor & x) override; + + virtual torch::Tensor evaluate(torch::Tensor & input, bool sampled); + + virtual torch::Tensor sample(); + + /// Construct the neural network + virtual void constructNeuralNetwork() override; + + const LibtorchActionDistribution & actionDistribution() const { return *_action_distribution; } + LibtorchActionDistribution & actionDistribution() { return *_action_distribution; } + + const LibtorchGaussianActionDistribution * gaussianActionDistributionPtr() const; + LibtorchGaussianActionDistribution * gaussianActionDistributionPtr(); + const LibtorchGaussianActionDistribution & gaussianActionDistribution() const; + LibtorchGaussianActionDistribution & gaussianActionDistribution(); + + const LibtorchBetaActionDistribution * betaActionDistributionPtr() const; + LibtorchBetaActionDistribution * betaActionDistributionPtr(); + const LibtorchBetaActionDistribution & betaActionDistribution() const; + LibtorchBetaActionDistribution & betaActionDistribution(); + + bool stateIndependentStd() const { return _state_independent_std; } + + void resetDistributionParams(torch::Tensor input); + + torch::Tensor logProbability(const torch::Tensor & other); + + torch::Tensor entropy(); + + virtual void initializeNeuralNetwork() override; + +protected: + const bool _state_independent_std; + std::shared_ptr _action_distribution; +}; + +void to_json(nlohmann::json & json, const Moose::LibtorchActorNeuralNet * const & network); + +void loadLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, + const std::string & filename); + +bool isLegacyLibtorchActorArchive(const std::string & filename); + +void loadLegacyLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, + const std::string & filename, + const std::vector & action_standard_deviations); + +} + +template <> +void dataStore(std::ostream & stream, + std::shared_ptr & nn, + void * context); + +template <> +void dataLoad(std::istream & stream, + std::shared_ptr & nn, + void * context); + +// This is needed because the reporter which is used to ouput the neural net parameters to JSON +// requires a dataStore/dataLoad. However, these functions will be empty due to the fact that +// we are only interested in the JSON output and we don't want to output everything +template <> +void dataStore(std::ostream & stream, + Moose::LibtorchActorNeuralNet const *& nn, + void * context); + +template <> +void dataLoad(std::istream & stream, + Moose::LibtorchActorNeuralNet const *& nn, + void * context); + +#endif diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C new file mode 100644 index 000000000000..53dc7288999b --- /dev/null +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C @@ -0,0 +1,323 @@ +//* This file is part of the MOOSE framework +//* https://www.mooseframework.org +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "LibtorchActionDistribution.h" + +#include "LibtorchUtils.h" +#include "MooseError.h" + +#include +#include +#include +#include "libmesh/utility.h" + +namespace +{ + +std::vector +normalizeActionScalingFactors(const std::vector & factors, const unsigned int expected_size) +{ + const auto normalized = factors.empty() ? std::vector(expected_size, 1.0) : factors; + + if (normalized.size() != expected_size) + mooseError("The number of output_scaling_factors entries must match the number of action " + "outputs."); + + for (const auto factor : normalized) + if (std::abs(factor) == 0.0) + mooseError("The output_scaling_factors entries must be non-zero."); + + return normalized; +} + +} // namespace + +namespace Moose +{ + +LibtorchActionDistribution::LibtorchActionDistribution( + const std::string & name, + const unsigned int num_inputs, + const unsigned int num_outputs, + const torch::DeviceType device_type, + const torch::ScalarType data_type, + const std::vector & output_scaling_factors) + : _name(name), + _num_inputs(num_inputs), + _num_outputs(num_outputs), + _device_type(device_type), + _data_type(data_type), + _output_scaling_factors(normalizeActionScalingFactors(output_scaling_factors, num_outputs)) +{ + auto action_scale = _output_scaling_factors; + LibtorchUtils::vectorToTensor(action_scale, _action_scale_tensor); + _action_scale_tensor = register_buffer( + "action_scale", _action_scale_tensor.transpose(0, 1).to(_data_type).to(_device_type)); +} + +void +LibtorchActionDistribution::synchronizeScalingFactorsFromBuffer() +{ + auto action_scale = + _action_scale_tensor.detach().reshape({-1}).to(torch::kCPU).to(torch::kDouble); + LibtorchUtils::tensorToVector(action_scale, _output_scaling_factors); +} + +torch::Tensor +LibtorchActionDistribution::prepareFeatures(const torch::Tensor & input) const +{ + auto features = input; + if (_data_type != features.scalar_type()) + features = features.to(_data_type); + if (_device_type != features.device().type()) + features = features.to(_device_type); + return features; +} + +torch::Tensor +LibtorchActionDistribution::prepareAction(const torch::Tensor & action) const +{ + auto scaled_action = action; + if (_data_type != scaled_action.scalar_type()) + scaled_action = scaled_action.to(_data_type); + if (_device_type != scaled_action.device().type()) + scaled_action = scaled_action.to(_device_type); + return scaled_action; +} + +LibtorchGaussianActionDistribution::LibtorchGaussianActionDistribution( + const std::string & name, + const unsigned int num_inputs, + const unsigned int num_outputs, + const torch::DeviceType device_type, + const torch::ScalarType data_type, + const bool build_on_construct, + const std::vector & output_scaling_factors, + const bool state_independent_std) + : LibtorchActionDistribution( + name, num_inputs, num_outputs, device_type, data_type, output_scaling_factors), + _state_independent_std(state_independent_std) +{ + if (build_on_construct) + constructDistribution(); +} + +void +LibtorchGaussianActionDistribution::constructDistribution() +{ + _mean_module = register_module( + "mean", torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true))); + _std_module = register_module( + "std", torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true))); + + _mean_module->to(_device_type, _data_type); + _std_module->to(_device_type, _data_type); +} + +void +LibtorchGaussianActionDistribution::initialize() +{ + const auto mean_sizes = _mean_module->weight.sizes(); + const auto mean_max_dim_size = *std::max_element(mean_sizes.begin(), mean_sizes.end()); + torch::nn::init::orthogonal_(_mean_module->weight, 1.0 / mean_max_dim_size); + torch::nn::init::zeros_(_mean_module->bias); + + if (_state_independent_std) + { + _std_module->weight.data().zero_(); + torch::nn::init::zeros_(_std_module->bias); + return; + } + + const auto std_sizes = _std_module->weight.sizes(); + const auto std_max_dim_size = *std::max_element(std_sizes.begin(), std_sizes.end()); + torch::nn::init::orthogonal_(_std_module->weight, 1.0 / std_max_dim_size); + torch::nn::init::zeros_(_std_module->bias); +} + +void +LibtorchGaussianActionDistribution::reset(const torch::Tensor & input) +{ + const auto features = prepareFeatures(input); + _mean = _mean_module->forward(features); + + if (_state_independent_std) + { + if (_mean.dim() <= 1) + _log_std_tensor = _std_module->bias; + else + _log_std_tensor = _std_module->bias.view({1, -1}).expand(_mean.sizes()); + } + else + _log_std_tensor = _std_module->forward(features); + + _log_std_tensor = torch::clamp(_log_std_tensor, std::log(1e-12), -std::log(1e-12)); + _std_tensor = torch::exp(_log_std_tensor); +} + +torch::Tensor +LibtorchGaussianActionDistribution::sample() const +{ + return at::normal(_mean, _std_tensor) * actionScaleTensor(); +} + +torch::Tensor +LibtorchGaussianActionDistribution::deterministicAction() const +{ + return _mean * actionScaleTensor(); +} + +torch::Tensor +LibtorchGaussianActionDistribution::logProbability(const torch::Tensor & action) const +{ + const auto scaled_action = prepareAction(action); + const auto log_action_scale = torch::log(torch::abs(actionScaleTensor())); + const auto unscaled_action = scaled_action / actionScaleTensor(); + + constexpr Real pi = 3.14159265358979323846; + const torch::Tensor var = _std_tensor * _std_tensor; + return -((unscaled_action - _mean) * (unscaled_action - _mean)) / (2.0 * var) - _log_std_tensor - + 0.5 * std::log(2.0 * pi) - log_action_scale; +} + +torch::Tensor +LibtorchGaussianActionDistribution::entropy() const +{ + const auto log_action_scale = torch::log(torch::abs(actionScaleTensor())); + constexpr Real pi = 3.14159265358979323846; + return 0.5 * std::log(2.0 * pi) + _log_std_tensor + 0.5 + log_action_scale; +} + +LibtorchBetaActionDistribution::LibtorchBetaActionDistribution( + const std::string & name, + const unsigned int num_inputs, + const unsigned int num_outputs, + const std::vector & minimum_values, + const std::vector & maximum_values, + const torch::DeviceType device_type, + const torch::ScalarType data_type, + const bool build_on_construct, + const std::vector & output_scaling_factors) + : LibtorchActionDistribution( + name, num_inputs, num_outputs, device_type, data_type, output_scaling_factors), + _minimum_values(minimum_values), + _maximum_values(maximum_values) +{ + if (_minimum_values.size() != _num_outputs || _maximum_values.size() != _num_outputs) + mooseError("The number of minimum_values and maximum_values entries must match the number " + "of action outputs."); + + for (const auto i : make_range(_minimum_values.size())) + if (!(_maximum_values[i] > _minimum_values[i])) + mooseError("maximum_values entries must be strictly greater than minimum_values entries."); + + auto min_value = _minimum_values; + LibtorchUtils::vectorToTensor(min_value, _min_tensor); + _min_tensor = _min_tensor.transpose(0, 1).to(_data_type).to(_device_type); + auto max_value = _maximum_values; + LibtorchUtils::vectorToTensor(max_value, _max_tensor); + _max_tensor = _max_tensor.transpose(0, 1).to(_data_type).to(_device_type); + + if (build_on_construct) + constructDistribution(); +} + +void +LibtorchBetaActionDistribution::constructDistribution() +{ + _alpha_module = register_module( + "alpha", torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true))); + _beta_module = register_module( + "beta", torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true))); + + _alpha_module->to(_device_type, _data_type); + _beta_module->to(_device_type, _data_type); +} + +void +LibtorchBetaActionDistribution::initialize() +{ + const auto alpha_sizes = _alpha_module->weight.sizes(); + const auto alpha_max_dim_size = *std::max_element(alpha_sizes.begin(), alpha_sizes.end()); + torch::nn::init::orthogonal_(_alpha_module->weight, 1.0 / alpha_max_dim_size); + torch::nn::init::zeros_(_alpha_module->bias); + + const auto beta_sizes = _beta_module->weight.sizes(); + const auto beta_max_dim_size = *std::max_element(beta_sizes.begin(), beta_sizes.end()); + torch::nn::init::orthogonal_(_beta_module->weight, 1.0 / beta_max_dim_size); + torch::nn::init::zeros_(_beta_module->bias); +} + +void +LibtorchBetaActionDistribution::reset(const torch::Tensor & input) +{ + const auto features = prepareFeatures(input); + const auto alpha = _alpha_module->forward(features); + _alpha_tensor = torch::log(torch::exp(alpha) + 1.0) + 1.0; + const auto beta = _beta_module->forward(features); + _beta_tensor = torch::log(torch::exp(beta) + 1.0) + 1.0; + + _alpha_beta_tensor = torch::clamp_min(_alpha_tensor + _beta_tensor, 1e-8); + _mean = _alpha_tensor / _alpha_beta_tensor; + _log_norm = at::lgamma(_alpha_tensor) + at::lgamma(_beta_tensor) - at::lgamma(_alpha_beta_tensor); +} + +torch::Tensor +LibtorchBetaActionDistribution::sample() const +{ + const auto alpha_sample = at::_standard_gamma(_alpha_tensor); + const auto beta_sample = at::_standard_gamma(_beta_tensor); + const auto sampled = alpha_sample / (alpha_sample + beta_sample); + return (_min_tensor + (_max_tensor - _min_tensor) * sampled) * actionScaleTensor(); +} + +torch::Tensor +LibtorchBetaActionDistribution::deterministicAction() const +{ + return (_min_tensor + (_max_tensor - _min_tensor) * _mean) * actionScaleTensor(); +} + +torch::Tensor +LibtorchBetaActionDistribution::logProbability(const torch::Tensor & action) const +{ + const auto scaled_action = prepareAction(action); + const auto log_action_scale = torch::log(torch::abs(actionScaleTensor())); + const auto unscaled_action = scaled_action / actionScaleTensor(); + const auto scale = torch::clamp_min(_max_tensor - _min_tensor, 1e-8); + const auto normalized = (unscaled_action - _min_tensor) / scale; + const auto clipped = torch::clamp(normalized, 1e-8, 1.0 - 1e-8); + auto log_prob = (_alpha_tensor - 1.0) * torch::log(clipped) + + (_beta_tensor - 1.0) * torch::log1p(-clipped) - _log_norm - torch::log(scale) - + log_action_scale; + + const auto out_of_bounds = (normalized < 0.0) | (normalized > 1.0); + if (out_of_bounds.any().item()) + log_prob = torch::where(out_of_bounds, + torch::full_like(log_prob, -std::numeric_limits::infinity()), + log_prob); + + return log_prob; +} + +torch::Tensor +LibtorchBetaActionDistribution::entropy() const +{ + const auto log_action_scale = torch::log(torch::abs(actionScaleTensor())); + const auto scale = torch::clamp_min(_max_tensor - _min_tensor, 1e-8); + return _log_norm - (_beta_tensor - 1.0) * torch::digamma(_beta_tensor) - + (_alpha_tensor - 1.0) * torch::digamma(_alpha_tensor) + + (_alpha_beta_tensor - 2.0) * torch::digamma(_alpha_beta_tensor) + torch::log(scale) + + log_action_scale; +} + +} // namespace Moose + +#endif diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C new file mode 100644 index 000000000000..1a11f2e46cd7 --- /dev/null +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C @@ -0,0 +1,560 @@ +//* This file is part of the MOOSE framework +//* https://www.mooseframework.org +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "LibtorchActorNeuralNet.h" +#include "MooseError.h" + +namespace +{ + +bool +readArchiveTensor(torch::serialize::InputArchive & archive, + const std::string & key, + torch::Tensor & tensor) +{ + try + { + archive.read(key, tensor); + return true; + } + catch (const c10::Error &) + { + return false; + } +} + +void +copyTensor(torch::Tensor & destination, const torch::Tensor & source) +{ + destination.data().copy_(source.to(destination.options())); +} + +bool +readActorStateTensor(torch::serialize::InputArchive & archive, + const std::string & key, + torch::Tensor & tensor) +{ + if (readArchiveTensor(archive, key, tensor)) + return true; + + if (key.rfind("action_head.", 0) == 0) + return readArchiveTensor(archive, key.substr(std::string("action_head.").size()), tensor); + + return false; +} + +bool +isOptionalActorBuffer(const std::string & key) +{ + return key == "input_shift" || key == "input_scale" || key == "output_scale" || + key == "action_head.action_scale"; +} + +bool +isOptionalActorParameter(const std::string & key) +{ + return key == "action_head.mean.bias" || key == "action_head.std.bias"; +} + +template +bool +findNamedTensor(const NamedTensorList & tensors, const std::string & key, torch::Tensor & tensor) +{ + for (const auto & entry : tensors) + if (entry.name == key) + { + tensor = entry.value; + return true; + } + + return false; +} + +template +bool +readScriptedActorStateTensor(const NamedTensorList & tensors, + const std::string & key, + torch::Tensor & tensor) +{ + if (findNamedTensor(tensors, key, tensor)) + return true; + + if (key.rfind("action_head.", 0) == 0) + return findNamedTensor(tensors, key.substr(std::string("action_head.").size()), tensor); + + return false; +} + +bool +loadActorStateFromArchive(Moose::LibtorchActorNeuralNet & nn, + const std::string & filename, + std::string & error) +{ + try + { + torch::serialize::InputArchive archive; + archive.load_from(filename); + + for (auto & parameter : nn.named_parameters()) + { + torch::Tensor stored_tensor; + if (!readActorStateTensor(archive, parameter.key(), stored_tensor)) + { + if (isOptionalActorParameter(parameter.key())) + { + parameter.value().data().zero_(); + continue; + } + + error = "Missing serialized parameter: " + parameter.key(); + return false; + } + + copyTensor(parameter.value(), stored_tensor); + } + + for (auto & buffer : nn.named_buffers()) + { + torch::Tensor stored_tensor; + if (!readActorStateTensor(archive, buffer.key(), stored_tensor)) + { + if (isOptionalActorBuffer(buffer.key())) + continue; + + error = "Missing serialized buffer: " + buffer.key(); + return false; + } + + copyTensor(buffer.value(), stored_tensor); + } + + nn.synchronizeAffineFactorsFromBuffers(); + nn.actionDistribution().synchronizeScalingFactorsFromBuffer(); + return true; + } + catch (const c10::Error & e) + { + error = e.msg(); + return false; + } +} + +bool +loadActorStateFromTorchScript(Moose::LibtorchActorNeuralNet & nn, + const std::string & filename, + std::string & error) +{ + try + { + const auto scripted = torch::jit::load(filename); + const auto scripted_parameters = scripted.named_parameters(); + const auto scripted_buffers = scripted.named_buffers(); + + for (auto & parameter : nn.named_parameters()) + { + torch::Tensor stored_tensor; + if (!readScriptedActorStateTensor(scripted_parameters, parameter.key(), stored_tensor)) + { + if (isOptionalActorParameter(parameter.key())) + { + parameter.value().data().zero_(); + continue; + } + + error = "Missing scripted parameter: " + parameter.key(); + return false; + } + + copyTensor(parameter.value(), stored_tensor); + } + + for (auto & buffer : nn.named_buffers()) + { + torch::Tensor stored_tensor; + if (!readScriptedActorStateTensor(scripted_buffers, buffer.key(), stored_tensor)) + { + if (isOptionalActorBuffer(buffer.key())) + continue; + + error = "Missing scripted buffer: " + buffer.key(); + return false; + } + + copyTensor(buffer.value(), stored_tensor); + } + + nn.synchronizeAffineFactorsFromBuffers(); + nn.actionDistribution().synchronizeScalingFactorsFromBuffer(); + return true; + } + catch (const c10::Error & e) + { + error = e.msg(); + return false; + } +} + +} // namespace + +namespace Moose +{ + +LibtorchActorNeuralNet::LibtorchActorNeuralNet( + const std::string name, + const unsigned int num_inputs, + const unsigned int num_outputs, + const std::vector & num_neurons_per_layer, + const std::vector & activation_function, + const std::vector & minimum_values, + const std::vector & maximum_values, + const torch::DeviceType device_type, + const torch::ScalarType data_type, + const bool build_on_construct, + const std::vector & input_shift_factors, + const std::vector & input_scaling_factors, + const std::vector & output_scaling_factors, + const bool state_independent_std) + : LibtorchArtificialNeuralNet(name, + num_inputs, + num_outputs, + num_neurons_per_layer, + activation_function, + minimum_values, + maximum_values, + device_type, + data_type, + false, + input_shift_factors, + input_scaling_factors, + output_scaling_factors), + _state_independent_std(state_independent_std) +{ + if (build_on_construct) + constructNeuralNetwork(); +} + +LibtorchActorNeuralNet::LibtorchActorNeuralNet(const Moose::LibtorchActorNeuralNet & nn, + const bool build_on_construct) + : LibtorchArtificialNeuralNet(dynamic_cast(nn), false), + _state_independent_std(nn.stateIndependentStd()) +{ + // We construct the NN architecture + if (build_on_construct) + { + constructNeuralNetwork(); + // We fill it up with the current parameter values + const auto & from_params = nn.named_parameters(); + auto to_params = this->named_parameters(); + for (unsigned int param_i : make_range(from_params.size())) + to_params[param_i].value().data() = from_params[param_i].value().data().clone(); + + const auto & from_buffers = nn.named_buffers(); + auto to_buffers = this->named_buffers(); + for (unsigned int buffer_i : make_range(from_buffers.size())) + to_buffers[buffer_i].value().data() = from_buffers[buffer_i].value().data().clone(); + } +} + +void +LibtorchActorNeuralNet::initializeNeuralNetwork() +{ + for (unsigned int i = 0; i < numHiddenLayers(); ++i) + { + const auto & activation = + _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0]; + const Real gain = determineGain(activation); + + auto sizes = _weights[i]->weight.sizes(); + auto max_dim_size = *std::max_element(sizes.begin(), sizes.end()); + torch::nn::init::orthogonal_(_weights[i]->weight, gain / max_dim_size); + torch::nn::init::zeros_(_weights[i]->bias); + } + + _action_distribution->initialize(); +} + +void +LibtorchActorNeuralNet::constructNeuralNetwork() +{ + // Adding hidden layers + unsigned int inp_neurons = _num_inputs; + for (unsigned int i = 0; i < numHiddenLayers(); ++i) + { + std::unordered_map parameters = { + {"inp_neurons", inp_neurons}, {"out_neurons", _num_neurons_per_layer[i]}}; + addLayer("hidden_layer_" + std::to_string(i + 1), parameters); + + // Necessary to retain double precision (and error-free runs) + _weights[i]->to(_device_type, _data_type); + inp_neurons = _num_neurons_per_layer[i]; + } + + if (_minimum_values.empty() && _maximum_values.empty()) + _action_distribution = + std::make_shared("action_distribution", + inp_neurons, + _num_outputs, + _device_type, + _data_type, + true, + _output_scaling_factors, + _state_independent_std); + else + _action_distribution = + std::make_shared("action_distribution", + inp_neurons, + _num_outputs, + _minimum_values, + _maximum_values, + _device_type, + _data_type, + true, + _output_scaling_factors); + + // Keep the serialized module name stable so existing checkpoints continue to load. + register_module("action_head", _action_distribution); +} + +torch::Tensor +LibtorchActorNeuralNet::entropy() +{ + return _action_distribution->entropy(); +} + +const LibtorchGaussianActionDistribution * +LibtorchActorNeuralNet::gaussianActionDistributionPtr() const +{ + return dynamic_cast(_action_distribution.get()); +} + +LibtorchGaussianActionDistribution * +LibtorchActorNeuralNet::gaussianActionDistributionPtr() +{ + return dynamic_cast(_action_distribution.get()); +} + +const LibtorchGaussianActionDistribution & +LibtorchActorNeuralNet::gaussianActionDistribution() const +{ + const auto * distribution = gaussianActionDistributionPtr(); + if (!distribution) + mooseError("Requested a Gaussian action distribution from a bounded actor."); + return *distribution; +} + +LibtorchGaussianActionDistribution & +LibtorchActorNeuralNet::gaussianActionDistribution() +{ + auto * distribution = gaussianActionDistributionPtr(); + if (!distribution) + mooseError("Requested a Gaussian action distribution from a bounded actor."); + return *distribution; +} + +const LibtorchBetaActionDistribution * +LibtorchActorNeuralNet::betaActionDistributionPtr() const +{ + return dynamic_cast(_action_distribution.get()); +} + +LibtorchBetaActionDistribution * +LibtorchActorNeuralNet::betaActionDistributionPtr() +{ + return dynamic_cast(_action_distribution.get()); +} + +const LibtorchBetaActionDistribution & +LibtorchActorNeuralNet::betaActionDistribution() const +{ + const auto * distribution = betaActionDistributionPtr(); + if (!distribution) + mooseError("Requested a Beta action distribution from an unbounded actor."); + return *distribution; +} + +LibtorchBetaActionDistribution & +LibtorchActorNeuralNet::betaActionDistribution() +{ + auto * distribution = betaActionDistributionPtr(); + if (!distribution) + mooseError("Requested a Beta action distribution from an unbounded actor."); + return *distribution; +} + +void +LibtorchActorNeuralNet::resetDistributionParams(torch::Tensor input) +{ + _action_distribution->reset(input); +} + +torch::Tensor +LibtorchActorNeuralNet::forward(const torch::Tensor & x) +{ + torch::Tensor output = preprocessInput(x); + + for (unsigned int i = 0; i < _weights.size(); ++i) + { + std::string activation = + _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0]; + if (activation == "relu") + output = torch::relu(_weights[i]->forward(output)); + else if (activation == "sigmoid") + output = torch::sigmoid(_weights[i]->forward(output)); + else if (activation == "tanh") + output = torch::tanh(_weights[i]->forward(output)); + else if (activation == "elu") + output = torch::elu(_weights[i]->forward(output)); + else if (activation == "gelu") + output = torch::gelu(_weights[i]->forward(output)); + else if (activation == "linear") + output = _weights[i]->forward(output); + + // std::cout << "midresult" << i << output << std::endl; + } + + return output; +} + +torch::Tensor +LibtorchActorNeuralNet::evaluate(torch::Tensor & x, bool sampled) +{ + torch::Tensor output = forward(x); + + // std::cout << "midresult" << output << std::endl; + resetDistributionParams(output); + + if (sampled) + return sample(); + + return _action_distribution->deterministicAction(); +} + +torch::Tensor +LibtorchActorNeuralNet::sample() +{ + return _action_distribution->sample(); +} + +torch::Tensor +LibtorchActorNeuralNet::logProbability(const torch::Tensor & action) +{ + return _action_distribution->logProbability(action); +} + +void +loadLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, const std::string & filename) +{ + std::string archive_error; + if (loadActorStateFromArchive(nn, filename, archive_error)) + return; + + std::string torchscript_error; + if (loadActorStateFromTorchScript(nn, filename, torchscript_error)) + return; + + mooseError("The requested pytorch parameter file could not be loaded. This can either be " + "the result of the file not existing or a misalignment in the generated " + "container and the data in the file. Make sure the dimensions of the generated " + "neural net are the same as the dimensions of the parameters in the input file!\n" + "InputArchive load failed with: ", + archive_error, + "\nTorchScript load failed with: ", + torchscript_error); +} + +bool +isLegacyLibtorchActorArchive(const std::string & filename) +{ + try + { + const auto scripted = torch::jit::load(filename); + const auto parameters = scripted.named_parameters(); + + torch::Tensor ignored; + return findNamedTensor(parameters, "output_layer_.weight", ignored) && + !findNamedTensor(parameters, "action_head.mean.weight", ignored); + } + catch (const c10::Error &) + { + return false; + } +} + +void +loadLegacyLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, + const std::string & filename, + const std::vector & action_standard_deviations) +{ + if (nn.actionDistribution().isBounded()) + mooseError("Legacy deterministic DRL checkpoints are only supported for unbounded actors."); + + const auto legacy_std = action_standard_deviations.empty() + ? std::vector(nn.numOutputs(), 1e-12) + : action_standard_deviations; + + if (legacy_std.size() != nn.numOutputs()) + mooseError("The number of action_standard_deviations entries must match the number of action " + "outputs when loading a legacy deterministic DRL checkpoint."); + + for (const auto std_value : legacy_std) + if (!(std_value > 0.0)) + mooseError("Legacy action_standard_deviations entries must be strictly positive."); + + const auto scripted = torch::jit::load(filename); + const auto legacy_parameters = scripted.named_parameters(); + + for (auto & parameter : nn.named_parameters()) + { + const auto & key = parameter.key(); + torch::Tensor stored_tensor; + + if (key == "action_head.mean.weight") + { + if (!findNamedTensor(legacy_parameters, "output_layer_.weight", stored_tensor)) + mooseError("Legacy deterministic DRL checkpoint is missing output_layer_.weight."); + copyTensor(parameter.value(), stored_tensor); + continue; + } + + if (key == "action_head.mean.bias") + { + if (!findNamedTensor(legacy_parameters, "output_layer_.bias", stored_tensor)) + mooseError("Legacy deterministic DRL checkpoint is missing output_layer_.bias."); + copyTensor(parameter.value(), stored_tensor); + continue; + } + + if (key == "action_head.std.weight") + { + parameter.value().data().zero_(); + continue; + } + + if (key == "action_head.std.bias") + { + auto log_std = torch::log(torch::tensor(legacy_std, parameter.value().options())); + copyTensor(parameter.value(), log_std); + continue; + } + + if (!findNamedTensor(legacy_parameters, key, stored_tensor)) + mooseError("Legacy deterministic DRL checkpoint is missing serialized parameter: ", key); + + copyTensor(parameter.value(), stored_tensor); + } + + nn.synchronizeAffineFactorsFromBuffers(); + nn.actionDistribution().synchronizeScalingFactorsFromBuffer(); +} + +} + +#endif diff --git a/modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C b/modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C new file mode 100644 index 000000000000..3062eebaa2d6 --- /dev/null +++ b/modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C @@ -0,0 +1,272 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "gtest/gtest.h" +#include "LibtorchActorNeuralNet.h" +#include "MooseUnitUtils.h" + +#include + +namespace +{ + +class TestableLibtorchArtificialNeuralNet : public Moose::LibtorchArtificialNeuralNet +{ +public: + using Moose::LibtorchArtificialNeuralNet::_weights; + using Moose::LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet; +}; + +class TestableLibtorchActorNeuralNet : public Moose::LibtorchActorNeuralNet +{ +public: + using Moose::LibtorchActorNeuralNet::_weights; + using Moose::LibtorchActorNeuralNet::LibtorchActorNeuralNet; +}; + +Real +inverseSoftplusPlusOne(const Real target) +{ + return std::log(std::exp(target - 1.0) - 1.0); +} + +} // namespace + +TEST(LibtorchActorNeuralNetTest, artificialNetAppliesAffineInputAndOutputScaling) +{ + TestableLibtorchArtificialNeuralNet network("test_ann", + 2, + 1, + {}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {1.0, 2.0}, + {2.0, 3.0}, + {10.0}); + + ASSERT_EQ(network._weights.size(), 1u); + + network._weights[0]->weight.data().fill_(0.0); + network._weights[0]->weight.data()[0][0] = 1.0; + network._weights[0]->weight.data()[0][1] = -1.0; + network._weights[0]->bias.data().fill_(0.0); + + auto input = torch::tensor({{2.0, 6.0}}, at::kDouble); + const Real actual = network.forward(input).item(); + + EXPECT_NEAR(actual, -100.0, 1e-12); +} + +TEST(LibtorchActorNeuralNetTest, boundedBetaLogProbability) +{ + constexpr Real min_value = -2.0; + constexpr Real max_value = 4.0; + constexpr Real alpha_target = 2.3; + constexpr Real beta_target = 3.7; + constexpr Real action_value = 1.2; + + TestableLibtorchActorNeuralNet network( + "test_beta", 1, 1, {1}, {"linear"}, {min_value}, {max_value}); + + ASSERT_EQ(network._weights.size(), 1u); + + network._weights[0]->weight.data().fill_(0.0); + network._weights[0]->bias.data().fill_(1.0); + network.betaActionDistribution().alphaModule()->weight.data().fill_( + inverseSoftplusPlusOne(alpha_target)); + network.betaActionDistribution().betaModule()->weight.data().fill_( + inverseSoftplusPlusOne(beta_target)); + + auto input = torch::zeros({1, 1}, at::kDouble); + network.evaluate(input, false); + + const Real alpha = network.betaActionDistribution().alphaTensor().item(); + const Real beta = network.betaActionDistribution().betaTensor().item(); + const Real normalized = (action_value - min_value) / (max_value - min_value); + const Real log_norm = std::lgamma(alpha) + std::lgamma(beta) - std::lgamma(alpha + beta); + const Real expected = (alpha - 1.0) * std::log(normalized) + + (beta - 1.0) * std::log1p(-normalized) - log_norm - + std::log(max_value - min_value); + + auto action = torch::tensor({{action_value}}, at::kDouble); + const Real actual = network.logProbability(action).item(); + + EXPECT_NEAR(actual, expected, 1e-12); +} + +TEST(LibtorchActorNeuralNetTest, gaussianActorUsesPhysicalActionScalingAndStateIndependentStd) +{ + constexpr Real input_shift = 1.0; + constexpr Real input_scale = 2.0; + constexpr Real action_scale = 5.0; + const Real log_std = std::log(2.0); + constexpr Real physical_action = 20.0; + constexpr Real expected_deterministic_action = 15.0; + + TestableLibtorchActorNeuralNet network("test_gaussian", + 1, + 1, + {}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {input_shift}, + {input_scale}, + {action_scale}); + + network.gaussianActionDistribution().meanModule()->weight.data().fill_(1.5); + network.gaussianActionDistribution().meanModule()->bias.data().fill_(0.0); + network.gaussianActionDistribution().stdModule()->weight.data().fill_(123.0); + network.gaussianActionDistribution().stdModule()->bias.data().fill_(log_std); + + auto input = torch::tensor({{2.0}}, at::kDouble); + const Real deterministic_action = network.evaluate(input, false).item(); + EXPECT_NEAR(deterministic_action, expected_deterministic_action, 1e-12); + EXPECT_NEAR( + network.gaussianActionDistribution().stdTensor().item(), std::exp(log_std), 1e-12); + + const Real unscaled_mean = expected_deterministic_action / action_scale; + const Real unscaled_action = physical_action / action_scale; + constexpr Real pi = 3.14159265358979323846; + const Real expected_log_probability = + -std::pow(unscaled_action - unscaled_mean, 2) / (2.0 * 4.0) - log_std - + 0.5 * std::log(2.0 * pi) - std::log(action_scale); + + auto action = torch::tensor({{physical_action}}, at::kDouble); + const Real actual_log_probability = network.logProbability(action).item(); + + EXPECT_NEAR(actual_log_probability, expected_log_probability, 1e-12); + + auto second_input = torch::tensor({{4.0}}, at::kDouble); + network.evaluate(second_input, false); + EXPECT_NEAR( + network.gaussianActionDistribution().stdTensor().item(), std::exp(log_std), 1e-12); +} + +TEST(LibtorchActorNeuralNetTest, gaussianActorCanUseStateDependentStdWhenRequested) +{ + TestableLibtorchActorNeuralNet network("test_state_dependent_gaussian", + 1, + 1, + {}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {1.0}, + {2.0}, + {1.0}, + false); + + network.gaussianActionDistribution().meanModule()->weight.data().fill_(0.0); + network.gaussianActionDistribution().meanModule()->bias.data().fill_(0.0); + network.gaussianActionDistribution().stdModule()->weight.data().fill_(0.5); + network.gaussianActionDistribution().stdModule()->bias.data().fill_(0.0); + + auto first_input = torch::tensor({{2.0}}, at::kDouble); + network.evaluate(first_input, false); + const Real first_std = network.gaussianActionDistribution().stdTensor().item(); + + auto second_input = torch::tensor({{4.0}}, at::kDouble); + network.evaluate(second_input, false); + const Real second_std = network.gaussianActionDistribution().stdTensor().item(); + + EXPECT_NEAR(first_std, std::exp(1.0), 1e-12); + EXPECT_NEAR(second_std, std::exp(3.0), 1e-12); + EXPECT_GT(second_std, first_std); +} + +TEST(LibtorchActorNeuralNetTest, loadActorStateAcceptsTorchSaveArchive) +{ + TestableLibtorchActorNeuralNet saved("saved_actor", + 2, + 1, + {2}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {1.0, -2.0}, + {0.5, 3.0}, + {4.0}); + + saved._weights[0]->weight.data() = torch::tensor({{1.0, 2.0}, {3.0, 4.0}}, at::kDouble); + saved._weights[0]->bias.data() = torch::tensor({5.0, 6.0}, at::kDouble); + saved.gaussianActionDistribution().meanModule()->weight.data() = + torch::tensor({{7.0, 8.0}}, at::kDouble); + saved.gaussianActionDistribution().meanModule()->bias.data() = torch::tensor({9.0}, at::kDouble); + saved.gaussianActionDistribution().stdModule()->weight.data() = + torch::tensor({{-1.5, 2.5}}, at::kDouble); + saved.gaussianActionDistribution().stdModule()->bias.data() = torch::tensor({-3.5}, at::kDouble); + + Moose::UnitUtils::TempFile archive; + torch::save(std::make_shared(saved), archive.path().string()); + + TestableLibtorchActorNeuralNet restored("restored_actor", + 2, + 1, + {2}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {1.0, -2.0}, + {0.5, 3.0}, + {4.0}); + + Moose::loadLibtorchActorNeuralNetState(restored, archive.path().string()); + + const auto saved_parameters = saved.named_parameters(); + const auto restored_parameters = restored.named_parameters(); + ASSERT_EQ(saved_parameters.size(), restored_parameters.size()); + for (std::size_t i = 0; i < saved_parameters.size(); ++i) + { + EXPECT_EQ(saved_parameters[i].key(), restored_parameters[i].key()); + EXPECT_TRUE(torch::allclose(saved_parameters[i].value(), + restored_parameters[i].value(), + /*rtol=*/0.0, + /*atol=*/0.0)); + } + + const auto saved_buffers = saved.named_buffers(); + const auto restored_buffers = restored.named_buffers(); + ASSERT_EQ(saved_buffers.size(), restored_buffers.size()); + for (std::size_t i = 0; i < saved_buffers.size(); ++i) + { + EXPECT_EQ(saved_buffers[i].key(), restored_buffers[i].key()); + EXPECT_TRUE(torch::allclose(saved_buffers[i].value(), + restored_buffers[i].value(), + /*rtol=*/0.0, + /*atol=*/0.0)); + } + + auto saved_input = torch::tensor({{3.0, -1.0}}, at::kDouble); + auto restored_input = saved_input.clone(); + EXPECT_TRUE(torch::allclose(saved.evaluate(saved_input, false), + restored.evaluate(restored_input, false), + /*rtol=*/0.0, + /*atol=*/0.0)); +} + +#endif From 563c6c21b7d1e574c4c3156a27893bfea537c9ce Mon Sep 17 00:00:00 2001 From: Peter German Date: Thu, 23 Apr 2026 13:11:58 -0600 Subject: [PATCH 34/51] Remove old framework implementation for distribution heads. --- .../utils/LibtorchActionDistributionHead.h | 171 ------ .../libtorch/utils/LibtorchActorNeuralNet.h | 132 ----- .../utils/LibtorchActionDistributionHead.C | 323 ---------- .../libtorch/utils/LibtorchActorNeuralNet.C | 560 ------------------ unit/src/LibtorchActorNeuralNetTest.C | 272 --------- 5 files changed, 1458 deletions(-) delete mode 100644 framework/include/libtorch/utils/LibtorchActionDistributionHead.h delete mode 100644 framework/include/libtorch/utils/LibtorchActorNeuralNet.h delete mode 100644 framework/src/libtorch/utils/LibtorchActionDistributionHead.C delete mode 100644 framework/src/libtorch/utils/LibtorchActorNeuralNet.C delete mode 100644 unit/src/LibtorchActorNeuralNetTest.C diff --git a/framework/include/libtorch/utils/LibtorchActionDistributionHead.h b/framework/include/libtorch/utils/LibtorchActionDistributionHead.h deleted file mode 100644 index 7cb3dddb4199..000000000000 --- a/framework/include/libtorch/utils/LibtorchActionDistributionHead.h +++ /dev/null @@ -1,171 +0,0 @@ -//* This file is part of the MOOSE framework -//* https://www.mooseframework.org -//* -//* All rights reserved, see COPYRIGHT for full restrictions -//* https://github.com/idaholab/moose/blob/master/COPYRIGHT -//* -//* Licensed under LGPL 2.1, please see LICENSE for details -//* https://www.gnu.org/licenses/lgpl-2.1.html - -#ifdef MOOSE_LIBTORCH_ENABLED - -#pragma once - -#include - -#include "MooseTypes.h" - -#include -#include - -namespace Moose -{ - -/** - * Reusable continuous-action distribution interface for actor policies. - */ -class LibtorchActionDistribution : public torch::nn::Module -{ -public: - LibtorchActionDistribution(const std::string & name, - unsigned int num_inputs, - unsigned int num_outputs, - torch::DeviceType device_type = torch::kCPU, - torch::ScalarType scalar_type = torch::kDouble, - const std::vector & output_scaling_factors = {}); - - virtual void initialize() = 0; - - virtual void reset(const torch::Tensor & input) = 0; - - virtual torch::Tensor sample() const = 0; - - virtual torch::Tensor deterministicAction() const = 0; - - virtual torch::Tensor logProbability(const torch::Tensor & action) const = 0; - - virtual torch::Tensor entropy() const = 0; - - virtual bool isBounded() const = 0; - - void synchronizeScalingFactorsFromBuffer(); - -protected: - torch::Tensor prepareFeatures(const torch::Tensor & input) const; - torch::Tensor prepareAction(const torch::Tensor & action) const; - const torch::Tensor & actionScaleTensor() const { return _action_scale_tensor; } - - const std::string _name; - const unsigned int _num_inputs; - const unsigned int _num_outputs; - const torch::DeviceType _device_type; - const torch::ScalarType _data_type; - std::vector _output_scaling_factors; - - torch::Tensor _action_scale_tensor; -}; - -/** - * Gaussian action distribution for unbounded action spaces. - */ -class LibtorchGaussianActionDistribution : public LibtorchActionDistribution -{ -public: - LibtorchGaussianActionDistribution(const std::string & name, - unsigned int num_inputs, - unsigned int num_outputs, - torch::DeviceType device_type = torch::kCPU, - torch::ScalarType scalar_type = torch::kDouble, - bool build_on_construct = true, - const std::vector & output_scaling_factors = {}, - bool state_independent_std = true); - - virtual void initialize() override; - - virtual void reset(const torch::Tensor & input) override; - - virtual torch::Tensor sample() const override; - - virtual torch::Tensor deterministicAction() const override; - - virtual torch::Tensor logProbability(const torch::Tensor & action) const override; - - virtual torch::Tensor entropy() const override; - - virtual bool isBounded() const override { return false; } - - bool stateIndependentStd() const { return _state_independent_std; } - torch::nn::Linear & meanModule() { return _mean_module; } - const torch::nn::Linear & meanModule() const { return _mean_module; } - torch::nn::Linear & stdModule() { return _std_module; } - const torch::nn::Linear & stdModule() const { return _std_module; } - const torch::Tensor & stdTensor() const { return _std_tensor; } - -private: - void constructDistribution(); - - const bool _state_independent_std; - torch::nn::Linear _mean_module{nullptr}; - torch::nn::Linear _std_module{nullptr}; - torch::Tensor _mean; - torch::Tensor _std_tensor; - torch::Tensor _log_std_tensor; -}; - -/** - * Beta action distribution for bounded action spaces. - */ -class LibtorchBetaActionDistribution : public LibtorchActionDistribution -{ -public: - LibtorchBetaActionDistribution(const std::string & name, - unsigned int num_inputs, - unsigned int num_outputs, - const std::vector & minimum_values, - const std::vector & maximum_values, - torch::DeviceType device_type = torch::kCPU, - torch::ScalarType scalar_type = torch::kDouble, - bool build_on_construct = true, - const std::vector & output_scaling_factors = {}); - - virtual void initialize() override; - - virtual void reset(const torch::Tensor & input) override; - - virtual torch::Tensor sample() const override; - - virtual torch::Tensor deterministicAction() const override; - - virtual torch::Tensor logProbability(const torch::Tensor & action) const override; - - virtual torch::Tensor entropy() const override; - - virtual bool isBounded() const override { return true; } - - torch::nn::Linear & alphaModule() { return _alpha_module; } - const torch::nn::Linear & alphaModule() const { return _alpha_module; } - torch::nn::Linear & betaModule() { return _beta_module; } - const torch::nn::Linear & betaModule() const { return _beta_module; } - const torch::Tensor & alphaTensor() const { return _alpha_tensor; } - const torch::Tensor & betaTensor() const { return _beta_tensor; } - -private: - void constructDistribution(); - - const std::vector _minimum_values; - const std::vector _maximum_values; - - torch::nn::Linear _alpha_module{nullptr}; - torch::nn::Linear _beta_module{nullptr}; - torch::Tensor _min_tensor; - torch::Tensor _max_tensor; - torch::Tensor _alpha_tensor; - torch::Tensor _beta_tensor; - torch::Tensor _alpha_beta_tensor; - torch::Tensor _log_norm; - torch::Tensor _mean; -}; - -} // namespace Moose - -#endif diff --git a/framework/include/libtorch/utils/LibtorchActorNeuralNet.h b/framework/include/libtorch/utils/LibtorchActorNeuralNet.h deleted file mode 100644 index 615ef5a4f0c3..000000000000 --- a/framework/include/libtorch/utils/LibtorchActorNeuralNet.h +++ /dev/null @@ -1,132 +0,0 @@ -//* This file is part of the MOOSE framework -//* https://www.mooseframework.org -//* -//* All rights reserved, see COPYRIGHT for full restrictions -//* https://github.com/idaholab/moose/blob/master/COPYRIGHT -//* -//* Licensed under LGPL 2.1, please see LICENSE for details -//* https://www.gnu.org/licenses/lgpl-2.1.html - -#ifdef MOOSE_LIBTORCH_ENABLED - -#pragma once - -#include -#include -#include "LibtorchActionDistributionHead.h" -#include "LibtorchArtificialNeuralNet.h" - -namespace Moose -{ - -class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet -{ -public: - /** - * Construct using input parameters - * @param name Name of the neural network - * @param num_inputs The number of input neurons/parameters - * @param num_neurons_per_layer Number of neurons per hidden layer - * @param num_outputs The number of output neurons - */ - LibtorchActorNeuralNet(const std::string name, - const unsigned int num_inputs, - const unsigned int num_outputs, - const std::vector & num_neurons_per_layer, - const std::vector & activation_function = {"relu"}, - const std::vector & minimum_values = {}, - const std::vector & maximum_values = {}, - const torch::DeviceType device_type = torch::kCPU, - const torch::ScalarType scalar_type = torch::kDouble, - const bool build_on_construct = true, - const std::vector & input_shift_factors = {}, - const std::vector & input_scaling_factors = {}, - const std::vector & output_scaling_factors = {}, - const bool state_independent_std = true); - - /** - * Copy construct an artificial neural network - * @param nn The neural network which needs to be copied - */ - LibtorchActorNeuralNet(const Moose::LibtorchActorNeuralNet & nn, - const bool build_on_construct = true); - - /** - * Overriding the forward substitution function for the neural network, unfortunately - * this cannot be const since it creates a graph in the background - * @param x Input tensor for the evaluation - */ - virtual torch::Tensor forward(const torch::Tensor & x) override; - - virtual torch::Tensor evaluate(torch::Tensor & input, bool sampled); - - virtual torch::Tensor sample(); - - /// Construct the neural network - virtual void constructNeuralNetwork() override; - - const LibtorchActionDistribution & actionDistribution() const { return *_action_distribution; } - LibtorchActionDistribution & actionDistribution() { return *_action_distribution; } - - const LibtorchGaussianActionDistribution * gaussianActionDistributionPtr() const; - LibtorchGaussianActionDistribution * gaussianActionDistributionPtr(); - const LibtorchGaussianActionDistribution & gaussianActionDistribution() const; - LibtorchGaussianActionDistribution & gaussianActionDistribution(); - - const LibtorchBetaActionDistribution * betaActionDistributionPtr() const; - LibtorchBetaActionDistribution * betaActionDistributionPtr(); - const LibtorchBetaActionDistribution & betaActionDistribution() const; - LibtorchBetaActionDistribution & betaActionDistribution(); - - bool stateIndependentStd() const { return _state_independent_std; } - - void resetDistributionParams(torch::Tensor input); - - torch::Tensor logProbability(const torch::Tensor & other); - - torch::Tensor entropy(); - - virtual void initializeNeuralNetwork() override; - -protected: - const bool _state_independent_std; - std::shared_ptr _action_distribution; -}; - -void to_json(nlohmann::json & json, const Moose::LibtorchActorNeuralNet * const & network); - -void loadLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, - const std::string & filename); - -bool isLegacyLibtorchActorArchive(const std::string & filename); - -void loadLegacyLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, - const std::string & filename, - const std::vector & action_standard_deviations); - -} - -template <> -void dataStore(std::ostream & stream, - std::shared_ptr & nn, - void * context); - -template <> -void dataLoad(std::istream & stream, - std::shared_ptr & nn, - void * context); - -// This is needed because the reporter which is used to ouput the neural net parameters to JSON -// requires a dataStore/dataLoad. However, these functions will be empty due to the fact that -// we are only interested in the JSON output and we don't want to output everything -template <> -void dataStore(std::ostream & stream, - Moose::LibtorchActorNeuralNet const *& nn, - void * context); - -template <> -void dataLoad(std::istream & stream, - Moose::LibtorchActorNeuralNet const *& nn, - void * context); - -#endif diff --git a/framework/src/libtorch/utils/LibtorchActionDistributionHead.C b/framework/src/libtorch/utils/LibtorchActionDistributionHead.C deleted file mode 100644 index f018e41531a9..000000000000 --- a/framework/src/libtorch/utils/LibtorchActionDistributionHead.C +++ /dev/null @@ -1,323 +0,0 @@ -//* This file is part of the MOOSE framework -//* https://www.mooseframework.org -//* -//* All rights reserved, see COPYRIGHT for full restrictions -//* https://github.com/idaholab/moose/blob/master/COPYRIGHT -//* -//* Licensed under LGPL 2.1, please see LICENSE for details -//* https://www.gnu.org/licenses/lgpl-2.1.html - -#ifdef MOOSE_LIBTORCH_ENABLED - -#include "LibtorchActionDistributionHead.h" - -#include "LibtorchUtils.h" -#include "MooseError.h" - -#include -#include -#include -#include "libmesh/utility.h" - -namespace -{ - -std::vector -normalizeActionScalingFactors(const std::vector & factors, const unsigned int expected_size) -{ - const auto normalized = factors.empty() ? std::vector(expected_size, 1.0) : factors; - - if (normalized.size() != expected_size) - mooseError("The number of output_scaling_factors entries must match the number of action " - "outputs."); - - for (const auto factor : normalized) - if (std::abs(factor) == 0.0) - mooseError("The output_scaling_factors entries must be non-zero."); - - return normalized; -} - -} // namespace - -namespace Moose -{ - -LibtorchActionDistribution::LibtorchActionDistribution( - const std::string & name, - const unsigned int num_inputs, - const unsigned int num_outputs, - const torch::DeviceType device_type, - const torch::ScalarType data_type, - const std::vector & output_scaling_factors) - : _name(name), - _num_inputs(num_inputs), - _num_outputs(num_outputs), - _device_type(device_type), - _data_type(data_type), - _output_scaling_factors(normalizeActionScalingFactors(output_scaling_factors, num_outputs)) -{ - auto action_scale = _output_scaling_factors; - LibtorchUtils::vectorToTensor(action_scale, _action_scale_tensor); - _action_scale_tensor = register_buffer( - "action_scale", _action_scale_tensor.transpose(0, 1).to(_data_type).to(_device_type)); -} - -void -LibtorchActionDistribution::synchronizeScalingFactorsFromBuffer() -{ - auto action_scale = - _action_scale_tensor.detach().reshape({-1}).to(torch::kCPU).to(torch::kDouble); - LibtorchUtils::tensorToVector(action_scale, _output_scaling_factors); -} - -torch::Tensor -LibtorchActionDistribution::prepareFeatures(const torch::Tensor & input) const -{ - auto features = input; - if (_data_type != features.scalar_type()) - features = features.to(_data_type); - if (_device_type != features.device().type()) - features = features.to(_device_type); - return features; -} - -torch::Tensor -LibtorchActionDistribution::prepareAction(const torch::Tensor & action) const -{ - auto scaled_action = action; - if (_data_type != scaled_action.scalar_type()) - scaled_action = scaled_action.to(_data_type); - if (_device_type != scaled_action.device().type()) - scaled_action = scaled_action.to(_device_type); - return scaled_action; -} - -LibtorchGaussianActionDistribution::LibtorchGaussianActionDistribution( - const std::string & name, - const unsigned int num_inputs, - const unsigned int num_outputs, - const torch::DeviceType device_type, - const torch::ScalarType data_type, - const bool build_on_construct, - const std::vector & output_scaling_factors, - const bool state_independent_std) - : LibtorchActionDistribution( - name, num_inputs, num_outputs, device_type, data_type, output_scaling_factors), - _state_independent_std(state_independent_std) -{ - if (build_on_construct) - constructDistribution(); -} - -void -LibtorchGaussianActionDistribution::constructDistribution() -{ - _mean_module = register_module( - "mean", torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true))); - _std_module = register_module( - "std", torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true))); - - _mean_module->to(_device_type, _data_type); - _std_module->to(_device_type, _data_type); -} - -void -LibtorchGaussianActionDistribution::initialize() -{ - const auto mean_sizes = _mean_module->weight.sizes(); - const auto mean_max_dim_size = *std::max_element(mean_sizes.begin(), mean_sizes.end()); - torch::nn::init::orthogonal_(_mean_module->weight, 1.0 / mean_max_dim_size); - torch::nn::init::zeros_(_mean_module->bias); - - if (_state_independent_std) - { - _std_module->weight.data().zero_(); - torch::nn::init::zeros_(_std_module->bias); - return; - } - - const auto std_sizes = _std_module->weight.sizes(); - const auto std_max_dim_size = *std::max_element(std_sizes.begin(), std_sizes.end()); - torch::nn::init::orthogonal_(_std_module->weight, 1.0 / std_max_dim_size); - torch::nn::init::zeros_(_std_module->bias); -} - -void -LibtorchGaussianActionDistribution::reset(const torch::Tensor & input) -{ - const auto features = prepareFeatures(input); - _mean = _mean_module->forward(features); - - if (_state_independent_std) - { - if (_mean.dim() <= 1) - _log_std_tensor = _std_module->bias; - else - _log_std_tensor = _std_module->bias.view({1, -1}).expand(_mean.sizes()); - } - else - _log_std_tensor = _std_module->forward(features); - - _log_std_tensor = torch::clamp(_log_std_tensor, std::log(1e-12), -std::log(1e-12)); - _std_tensor = torch::exp(_log_std_tensor); -} - -torch::Tensor -LibtorchGaussianActionDistribution::sample() const -{ - return at::normal(_mean, _std_tensor) * actionScaleTensor(); -} - -torch::Tensor -LibtorchGaussianActionDistribution::deterministicAction() const -{ - return _mean * actionScaleTensor(); -} - -torch::Tensor -LibtorchGaussianActionDistribution::logProbability(const torch::Tensor & action) const -{ - const auto scaled_action = prepareAction(action); - const auto log_action_scale = torch::log(torch::abs(actionScaleTensor())); - const auto unscaled_action = scaled_action / actionScaleTensor(); - - constexpr Real pi = 3.14159265358979323846; - const torch::Tensor var = _std_tensor * _std_tensor; - return -((unscaled_action - _mean) * (unscaled_action - _mean)) / (2.0 * var) - _log_std_tensor - - 0.5 * std::log(2.0 * pi) - log_action_scale; -} - -torch::Tensor -LibtorchGaussianActionDistribution::entropy() const -{ - const auto log_action_scale = torch::log(torch::abs(actionScaleTensor())); - constexpr Real pi = 3.14159265358979323846; - return 0.5 * std::log(2.0 * pi) + _log_std_tensor + 0.5 + log_action_scale; -} - -LibtorchBetaActionDistribution::LibtorchBetaActionDistribution( - const std::string & name, - const unsigned int num_inputs, - const unsigned int num_outputs, - const std::vector & minimum_values, - const std::vector & maximum_values, - const torch::DeviceType device_type, - const torch::ScalarType data_type, - const bool build_on_construct, - const std::vector & output_scaling_factors) - : LibtorchActionDistribution( - name, num_inputs, num_outputs, device_type, data_type, output_scaling_factors), - _minimum_values(minimum_values), - _maximum_values(maximum_values) -{ - if (_minimum_values.size() != _num_outputs || _maximum_values.size() != _num_outputs) - mooseError("The number of minimum_values and maximum_values entries must match the number " - "of action outputs."); - - for (const auto i : make_range(_minimum_values.size())) - if (!(_maximum_values[i] > _minimum_values[i])) - mooseError("maximum_values entries must be strictly greater than minimum_values entries."); - - auto min_value = _minimum_values; - LibtorchUtils::vectorToTensor(min_value, _min_tensor); - _min_tensor = _min_tensor.transpose(0, 1).to(_data_type).to(_device_type); - auto max_value = _maximum_values; - LibtorchUtils::vectorToTensor(max_value, _max_tensor); - _max_tensor = _max_tensor.transpose(0, 1).to(_data_type).to(_device_type); - - if (build_on_construct) - constructDistribution(); -} - -void -LibtorchBetaActionDistribution::constructDistribution() -{ - _alpha_module = register_module( - "alpha", torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true))); - _beta_module = register_module( - "beta", torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true))); - - _alpha_module->to(_device_type, _data_type); - _beta_module->to(_device_type, _data_type); -} - -void -LibtorchBetaActionDistribution::initialize() -{ - const auto alpha_sizes = _alpha_module->weight.sizes(); - const auto alpha_max_dim_size = *std::max_element(alpha_sizes.begin(), alpha_sizes.end()); - torch::nn::init::orthogonal_(_alpha_module->weight, 1.0 / alpha_max_dim_size); - torch::nn::init::zeros_(_alpha_module->bias); - - const auto beta_sizes = _beta_module->weight.sizes(); - const auto beta_max_dim_size = *std::max_element(beta_sizes.begin(), beta_sizes.end()); - torch::nn::init::orthogonal_(_beta_module->weight, 1.0 / beta_max_dim_size); - torch::nn::init::zeros_(_beta_module->bias); -} - -void -LibtorchBetaActionDistribution::reset(const torch::Tensor & input) -{ - const auto features = prepareFeatures(input); - const auto alpha = _alpha_module->forward(features); - _alpha_tensor = torch::log(torch::exp(alpha) + 1.0) + 1.0; - const auto beta = _beta_module->forward(features); - _beta_tensor = torch::log(torch::exp(beta) + 1.0) + 1.0; - - _alpha_beta_tensor = torch::clamp_min(_alpha_tensor + _beta_tensor, 1e-8); - _mean = _alpha_tensor / _alpha_beta_tensor; - _log_norm = at::lgamma(_alpha_tensor) + at::lgamma(_beta_tensor) - at::lgamma(_alpha_beta_tensor); -} - -torch::Tensor -LibtorchBetaActionDistribution::sample() const -{ - const auto alpha_sample = at::_standard_gamma(_alpha_tensor); - const auto beta_sample = at::_standard_gamma(_beta_tensor); - const auto sampled = alpha_sample / (alpha_sample + beta_sample); - return (_min_tensor + (_max_tensor - _min_tensor) * sampled) * actionScaleTensor(); -} - -torch::Tensor -LibtorchBetaActionDistribution::deterministicAction() const -{ - return (_min_tensor + (_max_tensor - _min_tensor) * _mean) * actionScaleTensor(); -} - -torch::Tensor -LibtorchBetaActionDistribution::logProbability(const torch::Tensor & action) const -{ - const auto scaled_action = prepareAction(action); - const auto log_action_scale = torch::log(torch::abs(actionScaleTensor())); - const auto unscaled_action = scaled_action / actionScaleTensor(); - const auto scale = torch::clamp_min(_max_tensor - _min_tensor, 1e-8); - const auto normalized = (unscaled_action - _min_tensor) / scale; - const auto clipped = torch::clamp(normalized, 1e-8, 1.0 - 1e-8); - auto log_prob = (_alpha_tensor - 1.0) * torch::log(clipped) + - (_beta_tensor - 1.0) * torch::log1p(-clipped) - _log_norm - torch::log(scale) - - log_action_scale; - - const auto out_of_bounds = (normalized < 0.0) | (normalized > 1.0); - if (out_of_bounds.any().item()) - log_prob = torch::where(out_of_bounds, - torch::full_like(log_prob, -std::numeric_limits::infinity()), - log_prob); - - return log_prob; -} - -torch::Tensor -LibtorchBetaActionDistribution::entropy() const -{ - const auto log_action_scale = torch::log(torch::abs(actionScaleTensor())); - const auto scale = torch::clamp_min(_max_tensor - _min_tensor, 1e-8); - return _log_norm - (_beta_tensor - 1.0) * torch::digamma(_beta_tensor) - - (_alpha_tensor - 1.0) * torch::digamma(_alpha_tensor) + - (_alpha_beta_tensor - 2.0) * torch::digamma(_alpha_beta_tensor) + torch::log(scale) + - log_action_scale; -} - -} // namespace Moose - -#endif diff --git a/framework/src/libtorch/utils/LibtorchActorNeuralNet.C b/framework/src/libtorch/utils/LibtorchActorNeuralNet.C deleted file mode 100644 index 1a11f2e46cd7..000000000000 --- a/framework/src/libtorch/utils/LibtorchActorNeuralNet.C +++ /dev/null @@ -1,560 +0,0 @@ -//* This file is part of the MOOSE framework -//* https://www.mooseframework.org -//* -//* All rights reserved, see COPYRIGHT for full restrictions -//* https://github.com/idaholab/moose/blob/master/COPYRIGHT -//* -//* Licensed under LGPL 2.1, please see LICENSE for details -//* https://www.gnu.org/licenses/lgpl-2.1.html - -#ifdef MOOSE_LIBTORCH_ENABLED - -#include "LibtorchActorNeuralNet.h" -#include "MooseError.h" - -namespace -{ - -bool -readArchiveTensor(torch::serialize::InputArchive & archive, - const std::string & key, - torch::Tensor & tensor) -{ - try - { - archive.read(key, tensor); - return true; - } - catch (const c10::Error &) - { - return false; - } -} - -void -copyTensor(torch::Tensor & destination, const torch::Tensor & source) -{ - destination.data().copy_(source.to(destination.options())); -} - -bool -readActorStateTensor(torch::serialize::InputArchive & archive, - const std::string & key, - torch::Tensor & tensor) -{ - if (readArchiveTensor(archive, key, tensor)) - return true; - - if (key.rfind("action_head.", 0) == 0) - return readArchiveTensor(archive, key.substr(std::string("action_head.").size()), tensor); - - return false; -} - -bool -isOptionalActorBuffer(const std::string & key) -{ - return key == "input_shift" || key == "input_scale" || key == "output_scale" || - key == "action_head.action_scale"; -} - -bool -isOptionalActorParameter(const std::string & key) -{ - return key == "action_head.mean.bias" || key == "action_head.std.bias"; -} - -template -bool -findNamedTensor(const NamedTensorList & tensors, const std::string & key, torch::Tensor & tensor) -{ - for (const auto & entry : tensors) - if (entry.name == key) - { - tensor = entry.value; - return true; - } - - return false; -} - -template -bool -readScriptedActorStateTensor(const NamedTensorList & tensors, - const std::string & key, - torch::Tensor & tensor) -{ - if (findNamedTensor(tensors, key, tensor)) - return true; - - if (key.rfind("action_head.", 0) == 0) - return findNamedTensor(tensors, key.substr(std::string("action_head.").size()), tensor); - - return false; -} - -bool -loadActorStateFromArchive(Moose::LibtorchActorNeuralNet & nn, - const std::string & filename, - std::string & error) -{ - try - { - torch::serialize::InputArchive archive; - archive.load_from(filename); - - for (auto & parameter : nn.named_parameters()) - { - torch::Tensor stored_tensor; - if (!readActorStateTensor(archive, parameter.key(), stored_tensor)) - { - if (isOptionalActorParameter(parameter.key())) - { - parameter.value().data().zero_(); - continue; - } - - error = "Missing serialized parameter: " + parameter.key(); - return false; - } - - copyTensor(parameter.value(), stored_tensor); - } - - for (auto & buffer : nn.named_buffers()) - { - torch::Tensor stored_tensor; - if (!readActorStateTensor(archive, buffer.key(), stored_tensor)) - { - if (isOptionalActorBuffer(buffer.key())) - continue; - - error = "Missing serialized buffer: " + buffer.key(); - return false; - } - - copyTensor(buffer.value(), stored_tensor); - } - - nn.synchronizeAffineFactorsFromBuffers(); - nn.actionDistribution().synchronizeScalingFactorsFromBuffer(); - return true; - } - catch (const c10::Error & e) - { - error = e.msg(); - return false; - } -} - -bool -loadActorStateFromTorchScript(Moose::LibtorchActorNeuralNet & nn, - const std::string & filename, - std::string & error) -{ - try - { - const auto scripted = torch::jit::load(filename); - const auto scripted_parameters = scripted.named_parameters(); - const auto scripted_buffers = scripted.named_buffers(); - - for (auto & parameter : nn.named_parameters()) - { - torch::Tensor stored_tensor; - if (!readScriptedActorStateTensor(scripted_parameters, parameter.key(), stored_tensor)) - { - if (isOptionalActorParameter(parameter.key())) - { - parameter.value().data().zero_(); - continue; - } - - error = "Missing scripted parameter: " + parameter.key(); - return false; - } - - copyTensor(parameter.value(), stored_tensor); - } - - for (auto & buffer : nn.named_buffers()) - { - torch::Tensor stored_tensor; - if (!readScriptedActorStateTensor(scripted_buffers, buffer.key(), stored_tensor)) - { - if (isOptionalActorBuffer(buffer.key())) - continue; - - error = "Missing scripted buffer: " + buffer.key(); - return false; - } - - copyTensor(buffer.value(), stored_tensor); - } - - nn.synchronizeAffineFactorsFromBuffers(); - nn.actionDistribution().synchronizeScalingFactorsFromBuffer(); - return true; - } - catch (const c10::Error & e) - { - error = e.msg(); - return false; - } -} - -} // namespace - -namespace Moose -{ - -LibtorchActorNeuralNet::LibtorchActorNeuralNet( - const std::string name, - const unsigned int num_inputs, - const unsigned int num_outputs, - const std::vector & num_neurons_per_layer, - const std::vector & activation_function, - const std::vector & minimum_values, - const std::vector & maximum_values, - const torch::DeviceType device_type, - const torch::ScalarType data_type, - const bool build_on_construct, - const std::vector & input_shift_factors, - const std::vector & input_scaling_factors, - const std::vector & output_scaling_factors, - const bool state_independent_std) - : LibtorchArtificialNeuralNet(name, - num_inputs, - num_outputs, - num_neurons_per_layer, - activation_function, - minimum_values, - maximum_values, - device_type, - data_type, - false, - input_shift_factors, - input_scaling_factors, - output_scaling_factors), - _state_independent_std(state_independent_std) -{ - if (build_on_construct) - constructNeuralNetwork(); -} - -LibtorchActorNeuralNet::LibtorchActorNeuralNet(const Moose::LibtorchActorNeuralNet & nn, - const bool build_on_construct) - : LibtorchArtificialNeuralNet(dynamic_cast(nn), false), - _state_independent_std(nn.stateIndependentStd()) -{ - // We construct the NN architecture - if (build_on_construct) - { - constructNeuralNetwork(); - // We fill it up with the current parameter values - const auto & from_params = nn.named_parameters(); - auto to_params = this->named_parameters(); - for (unsigned int param_i : make_range(from_params.size())) - to_params[param_i].value().data() = from_params[param_i].value().data().clone(); - - const auto & from_buffers = nn.named_buffers(); - auto to_buffers = this->named_buffers(); - for (unsigned int buffer_i : make_range(from_buffers.size())) - to_buffers[buffer_i].value().data() = from_buffers[buffer_i].value().data().clone(); - } -} - -void -LibtorchActorNeuralNet::initializeNeuralNetwork() -{ - for (unsigned int i = 0; i < numHiddenLayers(); ++i) - { - const auto & activation = - _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0]; - const Real gain = determineGain(activation); - - auto sizes = _weights[i]->weight.sizes(); - auto max_dim_size = *std::max_element(sizes.begin(), sizes.end()); - torch::nn::init::orthogonal_(_weights[i]->weight, gain / max_dim_size); - torch::nn::init::zeros_(_weights[i]->bias); - } - - _action_distribution->initialize(); -} - -void -LibtorchActorNeuralNet::constructNeuralNetwork() -{ - // Adding hidden layers - unsigned int inp_neurons = _num_inputs; - for (unsigned int i = 0; i < numHiddenLayers(); ++i) - { - std::unordered_map parameters = { - {"inp_neurons", inp_neurons}, {"out_neurons", _num_neurons_per_layer[i]}}; - addLayer("hidden_layer_" + std::to_string(i + 1), parameters); - - // Necessary to retain double precision (and error-free runs) - _weights[i]->to(_device_type, _data_type); - inp_neurons = _num_neurons_per_layer[i]; - } - - if (_minimum_values.empty() && _maximum_values.empty()) - _action_distribution = - std::make_shared("action_distribution", - inp_neurons, - _num_outputs, - _device_type, - _data_type, - true, - _output_scaling_factors, - _state_independent_std); - else - _action_distribution = - std::make_shared("action_distribution", - inp_neurons, - _num_outputs, - _minimum_values, - _maximum_values, - _device_type, - _data_type, - true, - _output_scaling_factors); - - // Keep the serialized module name stable so existing checkpoints continue to load. - register_module("action_head", _action_distribution); -} - -torch::Tensor -LibtorchActorNeuralNet::entropy() -{ - return _action_distribution->entropy(); -} - -const LibtorchGaussianActionDistribution * -LibtorchActorNeuralNet::gaussianActionDistributionPtr() const -{ - return dynamic_cast(_action_distribution.get()); -} - -LibtorchGaussianActionDistribution * -LibtorchActorNeuralNet::gaussianActionDistributionPtr() -{ - return dynamic_cast(_action_distribution.get()); -} - -const LibtorchGaussianActionDistribution & -LibtorchActorNeuralNet::gaussianActionDistribution() const -{ - const auto * distribution = gaussianActionDistributionPtr(); - if (!distribution) - mooseError("Requested a Gaussian action distribution from a bounded actor."); - return *distribution; -} - -LibtorchGaussianActionDistribution & -LibtorchActorNeuralNet::gaussianActionDistribution() -{ - auto * distribution = gaussianActionDistributionPtr(); - if (!distribution) - mooseError("Requested a Gaussian action distribution from a bounded actor."); - return *distribution; -} - -const LibtorchBetaActionDistribution * -LibtorchActorNeuralNet::betaActionDistributionPtr() const -{ - return dynamic_cast(_action_distribution.get()); -} - -LibtorchBetaActionDistribution * -LibtorchActorNeuralNet::betaActionDistributionPtr() -{ - return dynamic_cast(_action_distribution.get()); -} - -const LibtorchBetaActionDistribution & -LibtorchActorNeuralNet::betaActionDistribution() const -{ - const auto * distribution = betaActionDistributionPtr(); - if (!distribution) - mooseError("Requested a Beta action distribution from an unbounded actor."); - return *distribution; -} - -LibtorchBetaActionDistribution & -LibtorchActorNeuralNet::betaActionDistribution() -{ - auto * distribution = betaActionDistributionPtr(); - if (!distribution) - mooseError("Requested a Beta action distribution from an unbounded actor."); - return *distribution; -} - -void -LibtorchActorNeuralNet::resetDistributionParams(torch::Tensor input) -{ - _action_distribution->reset(input); -} - -torch::Tensor -LibtorchActorNeuralNet::forward(const torch::Tensor & x) -{ - torch::Tensor output = preprocessInput(x); - - for (unsigned int i = 0; i < _weights.size(); ++i) - { - std::string activation = - _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0]; - if (activation == "relu") - output = torch::relu(_weights[i]->forward(output)); - else if (activation == "sigmoid") - output = torch::sigmoid(_weights[i]->forward(output)); - else if (activation == "tanh") - output = torch::tanh(_weights[i]->forward(output)); - else if (activation == "elu") - output = torch::elu(_weights[i]->forward(output)); - else if (activation == "gelu") - output = torch::gelu(_weights[i]->forward(output)); - else if (activation == "linear") - output = _weights[i]->forward(output); - - // std::cout << "midresult" << i << output << std::endl; - } - - return output; -} - -torch::Tensor -LibtorchActorNeuralNet::evaluate(torch::Tensor & x, bool sampled) -{ - torch::Tensor output = forward(x); - - // std::cout << "midresult" << output << std::endl; - resetDistributionParams(output); - - if (sampled) - return sample(); - - return _action_distribution->deterministicAction(); -} - -torch::Tensor -LibtorchActorNeuralNet::sample() -{ - return _action_distribution->sample(); -} - -torch::Tensor -LibtorchActorNeuralNet::logProbability(const torch::Tensor & action) -{ - return _action_distribution->logProbability(action); -} - -void -loadLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, const std::string & filename) -{ - std::string archive_error; - if (loadActorStateFromArchive(nn, filename, archive_error)) - return; - - std::string torchscript_error; - if (loadActorStateFromTorchScript(nn, filename, torchscript_error)) - return; - - mooseError("The requested pytorch parameter file could not be loaded. This can either be " - "the result of the file not existing or a misalignment in the generated " - "container and the data in the file. Make sure the dimensions of the generated " - "neural net are the same as the dimensions of the parameters in the input file!\n" - "InputArchive load failed with: ", - archive_error, - "\nTorchScript load failed with: ", - torchscript_error); -} - -bool -isLegacyLibtorchActorArchive(const std::string & filename) -{ - try - { - const auto scripted = torch::jit::load(filename); - const auto parameters = scripted.named_parameters(); - - torch::Tensor ignored; - return findNamedTensor(parameters, "output_layer_.weight", ignored) && - !findNamedTensor(parameters, "action_head.mean.weight", ignored); - } - catch (const c10::Error &) - { - return false; - } -} - -void -loadLegacyLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, - const std::string & filename, - const std::vector & action_standard_deviations) -{ - if (nn.actionDistribution().isBounded()) - mooseError("Legacy deterministic DRL checkpoints are only supported for unbounded actors."); - - const auto legacy_std = action_standard_deviations.empty() - ? std::vector(nn.numOutputs(), 1e-12) - : action_standard_deviations; - - if (legacy_std.size() != nn.numOutputs()) - mooseError("The number of action_standard_deviations entries must match the number of action " - "outputs when loading a legacy deterministic DRL checkpoint."); - - for (const auto std_value : legacy_std) - if (!(std_value > 0.0)) - mooseError("Legacy action_standard_deviations entries must be strictly positive."); - - const auto scripted = torch::jit::load(filename); - const auto legacy_parameters = scripted.named_parameters(); - - for (auto & parameter : nn.named_parameters()) - { - const auto & key = parameter.key(); - torch::Tensor stored_tensor; - - if (key == "action_head.mean.weight") - { - if (!findNamedTensor(legacy_parameters, "output_layer_.weight", stored_tensor)) - mooseError("Legacy deterministic DRL checkpoint is missing output_layer_.weight."); - copyTensor(parameter.value(), stored_tensor); - continue; - } - - if (key == "action_head.mean.bias") - { - if (!findNamedTensor(legacy_parameters, "output_layer_.bias", stored_tensor)) - mooseError("Legacy deterministic DRL checkpoint is missing output_layer_.bias."); - copyTensor(parameter.value(), stored_tensor); - continue; - } - - if (key == "action_head.std.weight") - { - parameter.value().data().zero_(); - continue; - } - - if (key == "action_head.std.bias") - { - auto log_std = torch::log(torch::tensor(legacy_std, parameter.value().options())); - copyTensor(parameter.value(), log_std); - continue; - } - - if (!findNamedTensor(legacy_parameters, key, stored_tensor)) - mooseError("Legacy deterministic DRL checkpoint is missing serialized parameter: ", key); - - copyTensor(parameter.value(), stored_tensor); - } - - nn.synchronizeAffineFactorsFromBuffers(); - nn.actionDistribution().synchronizeScalingFactorsFromBuffer(); -} - -} - -#endif diff --git a/unit/src/LibtorchActorNeuralNetTest.C b/unit/src/LibtorchActorNeuralNetTest.C deleted file mode 100644 index bc43670042ff..000000000000 --- a/unit/src/LibtorchActorNeuralNetTest.C +++ /dev/null @@ -1,272 +0,0 @@ -//* This file is part of the MOOSE framework -//* https://mooseframework.inl.gov -//* -//* All rights reserved, see COPYRIGHT for full restrictions -//* https://github.com/idaholab/moose/blob/master/COPYRIGHT -//* -//* Licensed under LGPL 2.1, please see LICENSE for details -//* https://www.gnu.org/licenses/lgpl-2.1.html - -#ifdef MOOSE_LIBTORCH_ENABLED - -#include "gtest/gtest.h" -#include "LibtorchActorNeuralNet.h" -#include "MooseUnitUtils.h" - -#include - -namespace -{ - -class TestableLibtorchArtificialNeuralNet : public Moose::LibtorchArtificialNeuralNet -{ -public: - using Moose::LibtorchArtificialNeuralNet::_weights; - using Moose::LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet; -}; - -class TestableLibtorchActorNeuralNet : public Moose::LibtorchActorNeuralNet -{ -public: - using Moose::LibtorchActorNeuralNet::_weights; - using Moose::LibtorchActorNeuralNet::LibtorchActorNeuralNet; -}; - -Real -inverseSoftplusPlusOne(const Real target) -{ - return std::log(std::exp(target - 1.0) - 1.0); -} - -} // namespace - -TEST(LibtorchActorNeuralNetTest, artificialNetAppliesAffineInputAndOutputScaling) -{ - TestableLibtorchArtificialNeuralNet network("test_ann", - 2, - 1, - {}, - {"linear"}, - {}, - {}, - torch::kCPU, - torch::kDouble, - true, - {1.0, 2.0}, - {2.0, 3.0}, - {10.0}); - - ASSERT_EQ(network._weights.size(), 1); - - network._weights[0]->weight.data().fill_(0.0); - network._weights[0]->weight.data()[0][0] = 1.0; - network._weights[0]->weight.data()[0][1] = -1.0; - network._weights[0]->bias.data().fill_(0.0); - - auto input = torch::tensor({{2.0, 6.0}}, at::kDouble); - const Real actual = network.forward(input).item(); - - EXPECT_NEAR(actual, -100.0, 1e-12); -} - -TEST(LibtorchActorNeuralNetTest, boundedBetaLogProbability) -{ - constexpr Real min_value = -2.0; - constexpr Real max_value = 4.0; - constexpr Real alpha_target = 2.3; - constexpr Real beta_target = 3.7; - constexpr Real action_value = 1.2; - - TestableLibtorchActorNeuralNet network( - "test_beta", 1, 1, {1}, {"linear"}, {min_value}, {max_value}); - - ASSERT_EQ(network._weights.size(), 1); - - network._weights[0]->weight.data().fill_(0.0); - network._weights[0]->bias.data().fill_(1.0); - network.betaActionDistribution().alphaModule()->weight.data().fill_( - inverseSoftplusPlusOne(alpha_target)); - network.betaActionDistribution().betaModule()->weight.data().fill_( - inverseSoftplusPlusOne(beta_target)); - - auto input = torch::zeros({1, 1}, at::kDouble); - network.evaluate(input, false); - - const Real alpha = network.betaActionDistribution().alphaTensor().item(); - const Real beta = network.betaActionDistribution().betaTensor().item(); - const Real normalized = (action_value - min_value) / (max_value - min_value); - const Real log_norm = std::lgamma(alpha) + std::lgamma(beta) - std::lgamma(alpha + beta); - const Real expected = (alpha - 1.0) * std::log(normalized) + - (beta - 1.0) * std::log1p(-normalized) - log_norm - - std::log(max_value - min_value); - - auto action = torch::tensor({{action_value}}, at::kDouble); - const Real actual = network.logProbability(action).item(); - - EXPECT_NEAR(actual, expected, 1e-12); -} - -TEST(LibtorchActorNeuralNetTest, gaussianActorUsesPhysicalActionScalingAndStateIndependentStd) -{ - constexpr Real input_shift = 1.0; - constexpr Real input_scale = 2.0; - constexpr Real action_scale = 5.0; - const Real log_std = std::log(2.0); - constexpr Real physical_action = 20.0; - constexpr Real expected_deterministic_action = 15.0; - - TestableLibtorchActorNeuralNet network("test_gaussian", - 1, - 1, - {}, - {"linear"}, - {}, - {}, - torch::kCPU, - torch::kDouble, - true, - {input_shift}, - {input_scale}, - {action_scale}); - - network.gaussianActionDistribution().meanModule()->weight.data().fill_(1.5); - network.gaussianActionDistribution().meanModule()->bias.data().fill_(0.0); - network.gaussianActionDistribution().stdModule()->weight.data().fill_(123.0); - network.gaussianActionDistribution().stdModule()->bias.data().fill_(log_std); - - auto input = torch::tensor({{2.0}}, at::kDouble); - const Real deterministic_action = network.evaluate(input, false).item(); - EXPECT_NEAR(deterministic_action, expected_deterministic_action, 1e-12); - EXPECT_NEAR( - network.gaussianActionDistribution().stdTensor().item(), std::exp(log_std), 1e-12); - - const Real unscaled_mean = expected_deterministic_action / action_scale; - const Real unscaled_action = physical_action / action_scale; - constexpr Real pi = 3.14159265358979323846; - const Real expected_log_probability = - -std::pow(unscaled_action - unscaled_mean, 2) / (2.0 * 4.0) - log_std - - 0.5 * std::log(2.0 * pi) - std::log(action_scale); - - auto action = torch::tensor({{physical_action}}, at::kDouble); - const Real actual_log_probability = network.logProbability(action).item(); - - EXPECT_NEAR(actual_log_probability, expected_log_probability, 1e-12); - - auto second_input = torch::tensor({{4.0}}, at::kDouble); - network.evaluate(second_input, false); - EXPECT_NEAR( - network.gaussianActionDistribution().stdTensor().item(), std::exp(log_std), 1e-12); -} - -TEST(LibtorchActorNeuralNetTest, gaussianActorCanUseStateDependentStdWhenRequested) -{ - TestableLibtorchActorNeuralNet network("test_state_dependent_gaussian", - 1, - 1, - {}, - {"linear"}, - {}, - {}, - torch::kCPU, - torch::kDouble, - true, - {1.0}, - {2.0}, - {1.0}, - false); - - network.gaussianActionDistribution().meanModule()->weight.data().fill_(0.0); - network.gaussianActionDistribution().meanModule()->bias.data().fill_(0.0); - network.gaussianActionDistribution().stdModule()->weight.data().fill_(0.5); - network.gaussianActionDistribution().stdModule()->bias.data().fill_(0.0); - - auto first_input = torch::tensor({{2.0}}, at::kDouble); - network.evaluate(first_input, false); - const Real first_std = network.gaussianActionDistribution().stdTensor().item(); - - auto second_input = torch::tensor({{4.0}}, at::kDouble); - network.evaluate(second_input, false); - const Real second_std = network.gaussianActionDistribution().stdTensor().item(); - - EXPECT_NEAR(first_std, std::exp(1.0), 1e-12); - EXPECT_NEAR(second_std, std::exp(3.0), 1e-12); - EXPECT_GT(second_std, first_std); -} - -TEST(LibtorchActorNeuralNetTest, loadActorStateAcceptsTorchSaveArchive) -{ - TestableLibtorchActorNeuralNet saved("saved_actor", - 2, - 1, - {2}, - {"linear"}, - {}, - {}, - torch::kCPU, - torch::kDouble, - true, - {1.0, -2.0}, - {0.5, 3.0}, - {4.0}); - - saved._weights[0]->weight.data() = torch::tensor({{1.0, 2.0}, {3.0, 4.0}}, at::kDouble); - saved._weights[0]->bias.data() = torch::tensor({5.0, 6.0}, at::kDouble); - saved.gaussianActionDistribution().meanModule()->weight.data() = - torch::tensor({{7.0, 8.0}}, at::kDouble); - saved.gaussianActionDistribution().meanModule()->bias.data() = torch::tensor({9.0}, at::kDouble); - saved.gaussianActionDistribution().stdModule()->weight.data() = - torch::tensor({{-1.5, 2.5}}, at::kDouble); - saved.gaussianActionDistribution().stdModule()->bias.data() = torch::tensor({-3.5}, at::kDouble); - - Moose::UnitUtils::TempFile archive; - torch::save(std::make_shared(saved), archive.path().string()); - - TestableLibtorchActorNeuralNet restored("restored_actor", - 2, - 1, - {2}, - {"linear"}, - {}, - {}, - torch::kCPU, - torch::kDouble, - true, - {1.0, -2.0}, - {0.5, 3.0}, - {4.0}); - - Moose::loadLibtorchActorNeuralNetState(restored, archive.path().string()); - - const auto saved_parameters = saved.named_parameters(); - const auto restored_parameters = restored.named_parameters(); - ASSERT_EQ(saved_parameters.size(), restored_parameters.size()); - for (std::size_t i = 0; i < saved_parameters.size(); ++i) - { - EXPECT_EQ(saved_parameters[i].key(), restored_parameters[i].key()); - EXPECT_TRUE(torch::allclose(saved_parameters[i].value(), - restored_parameters[i].value(), - /*rtol=*/0.0, - /*atol=*/0.0)); - } - - const auto saved_buffers = saved.named_buffers(); - const auto restored_buffers = restored.named_buffers(); - ASSERT_EQ(saved_buffers.size(), restored_buffers.size()); - for (std::size_t i = 0; i < saved_buffers.size(); ++i) - { - EXPECT_EQ(saved_buffers[i].key(), restored_buffers[i].key()); - EXPECT_TRUE(torch::allclose(saved_buffers[i].value(), - restored_buffers[i].value(), - /*rtol=*/0.0, - /*atol=*/0.0)); - } - - auto saved_input = torch::tensor({{3.0, -1.0}}, at::kDouble); - auto restored_input = saved_input.clone(); - EXPECT_TRUE(torch::allclose(saved.evaluate(saved_input, false), - restored.evaluate(restored_input, false), - /*rtol=*/0.0, - /*atol=*/0.0)); -} - -#endif From c8f445c9123030b786127211c81d0df8a6351798 Mon Sep 17 00:00:00 2001 From: Peter German Date: Thu, 23 Apr 2026 15:13:02 -0600 Subject: [PATCH 35/51] Remove meltpool example, remove unused postprocessors. --- .../LaserPositionPostprocessor.h | 31 -- framework/src/functions/MooseParsedFunction.C | 3 - .../functions/MooseParsedFunctionWrapper.C | 3 - .../LaserPositionPostprocessor.C | 51 ---- .../combined/examples/stochastic/meltpool/3.i | 267 ------------------ .../stochastic/meltpool/meltpool_trainer.i | 117 -------- .../LiftDragRewardPostprocessor.h | 2 - .../LiftDragRewardPostprocessor.C | 38 +-- 8 files changed, 13 insertions(+), 499 deletions(-) delete mode 100644 framework/include/postprocessors/LaserPositionPostprocessor.h delete mode 100644 framework/src/postprocessors/LaserPositionPostprocessor.C delete mode 100644 modules/combined/examples/stochastic/meltpool/3.i delete mode 100644 modules/combined/examples/stochastic/meltpool/meltpool_trainer.i rename {framework => modules/stochastic_tools}/include/postprocessors/LiftDragRewardPostprocessor.h (99%) rename {framework => modules/stochastic_tools}/src/postprocessors/LiftDragRewardPostprocessor.C (57%) diff --git a/framework/include/postprocessors/LaserPositionPostprocessor.h b/framework/include/postprocessors/LaserPositionPostprocessor.h deleted file mode 100644 index 0ef780c76a4e..000000000000 --- a/framework/include/postprocessors/LaserPositionPostprocessor.h +++ /dev/null @@ -1,31 +0,0 @@ -//* This file is part of the MOOSE framework -//* https://www.mooseframework.org -//* -//* All rights reserved, see COPYRIGHT for full restrictions -//* https://github.com/idaholab/moose/blob/master/COPYRIGHT -//* -//* Licensed under LGPL 2.1, please see LICENSE for details -//* https://www.gnu.org/licenses/lgpl-2.1.html - -#pragma once - -// MOOSE includes -#include "GeneralPostprocessor.h" - -class LaserPositionPostprocessor : public GeneralPostprocessor -{ -public: - static InputParameters validParams(); - LaserPositionPostprocessor(const InputParameters & parameters); - - virtual void execute() override; - virtual void initialize() override {} - using Postprocessor::getValue; - virtual Real getValue() const override; - -protected: - - const PostprocessorValue & _speed; - Real _current_arclength; - Real _delta_arclength; -}; diff --git a/framework/src/functions/MooseParsedFunction.C b/framework/src/functions/MooseParsedFunction.C index 20cb51b7cb80..d0536a5505ee 100644 --- a/framework/src/functions/MooseParsedFunction.C +++ b/framework/src/functions/MooseParsedFunction.C @@ -44,9 +44,6 @@ MooseParsedFunction::MooseParsedFunction(const InputParameters & parameters) Real MooseParsedFunction::value(Real t, const Point & p) const { - // if (name() == "gap_x" || name() == "gap_y") - // std::cout << name() << std::endl; - mooseAssert(_function_ptr, "ParsedFunction should have been initialized"); return _function_ptr->evaluate(t, p); } diff --git a/framework/src/functions/MooseParsedFunctionWrapper.C b/framework/src/functions/MooseParsedFunctionWrapper.C index 2c2381321399..23a2b9d17530 100644 --- a/framework/src/functions/MooseParsedFunctionWrapper.C +++ b/framework/src/functions/MooseParsedFunctionWrapper.C @@ -145,10 +145,7 @@ void MooseParsedFunctionWrapper::update() { for (unsigned int i = 0; i < _pp_index.size(); ++i) - { (*_addr[_pp_index[i]]) = (*_pp_vals[i]); - // std::cout << (*_pp_vals[i]) << std::endl; - } for (unsigned int i = 0; i < _scalar_index.size(); ++i) (*_addr[_scalar_index[i]]) = (*_scalar_vals[i]); diff --git a/framework/src/postprocessors/LaserPositionPostprocessor.C b/framework/src/postprocessors/LaserPositionPostprocessor.C deleted file mode 100644 index 8a9745f81e2a..000000000000 --- a/framework/src/postprocessors/LaserPositionPostprocessor.C +++ /dev/null @@ -1,51 +0,0 @@ -//* This file is part of the MOOSE framework -//* https://www.mooseframework.org -//* -//* All rights reserved, see COPYRIGHT for full restrictions -//* https://github.com/idaholab/moose/blob/master/COPYRIGHT -//* -//* Licensed under LGPL 2.1, please see LICENSE for details -//* https://www.gnu.org/licenses/lgpl-2.1.html - -#include "LaserPositionPostprocessor.h" -#include "FEProblemBase.h" -#include "NonlinearSystemBase.h" -#include "MathUtils.h" -#include "TransientBase.h" -#include "Restartable.h" -#include "libmesh/enum_norm_type.h" - -registerMooseObject("MooseApp", LaserPositionPostprocessor); - -InputParameters -LaserPositionPostprocessor::validParams() -{ - InputParameters params = GeneralPostprocessor::validParams(); - - params.addRequiredParam("speed","Lift coeff"); - - params.addClassDescription("Blabla."); - - return params; -} - -LaserPositionPostprocessor::LaserPositionPostprocessor(const InputParameters & parameters) - : GeneralPostprocessor(parameters), - _speed(getPostprocessorValue("speed")), - _current_arclength(0.0), - _delta_arclength(0.0) -{ -} - -Real -LaserPositionPostprocessor::getValue() const -{ - return _current_arclength; -} - -void -LaserPositionPostprocessor::execute() -{ - _delta_arclength = _speed * _dt; - _current_arclength += _delta_arclength; -} diff --git a/modules/combined/examples/stochastic/meltpool/3.i b/modules/combined/examples/stochastic/meltpool/3.i deleted file mode 100644 index b342ed7f815f..000000000000 --- a/modules/combined/examples/stochastic/meltpool/3.i +++ /dev/null @@ -1,267 +0,0 @@ -# Process parameters -# scanning_speed=1.0 # m/s -power=25 # W (this is the effective power so multiplied by eta) -R=50e-6 # m (this is the effective radius) - -# Geometric parameters -thickness=50e-6 # m -ymin=-180e-6 -ymax=180e-6 -xmin=-180e-6 # m -xmax=180e-6 # m -surfacetemp=300 # K (temperature at the other side of the plate) -backtemp=300 - -# Time stepping parameters -endtime=1.13e-3 # s -timestep=${fparse endtime/240} # s - -[Mesh] - [cmg] - type = GeneratedMeshGenerator - dim = 3 - xmin = ${xmin} - xmax = ${xmax} - ymin = ${ymin} - ymax = ${ymax} - zmin = ${fparse -thickness} - zmax = 0 - nx = 50 - ny = 50 - nz = 15 - [] -[] - -[Variables] - [T] - [] -[] - -[ICs] - [T] - type = FunctionIC - variable = T - function = '(${surfacetemp} - ${backtemp}) / ${thickness} * z + ${surfacetemp}' - [] -[] - -[Kernels] - [temperature_time] - type = ADHeatConductionTimeDerivative - variable = T - use_displaced_mesh = true - density_name = 'rho' - specific_heat = 'cp' - [] - [temperature_conduction] - type = ADHeatConduction - variable = T - thermal_conductivity = 'k' - use_displaced_mesh = true - [] -[] - -[BCs] - [T_cold] - type = DirichletBC - variable = T - boundary = 'back' - value = ${backtemp} - [] - [radiation_flux] - type = FunctionRadiativeBC - variable = T - boundary = 'front' - emissivity_function = '1' - Tinfinity = 300 - stefan_boltzmann_constant = 5.67e-8 - [] - [weld_flux] - type = GaussianEnergyFluxBC - variable = T - boundary = 'front' - P0 = ${power} - R = ${R} - x_beam_coord = xcoord - y_beam_coord = ycoord - z_beam_coord = '0' - [] -[] - -[Functions] - [xcoord] - type = ParsedFunction - expression = '60e-6*sin(pi/(60e-6*pi)*arclength)' - symbol_names = 'arclength' - symbol_values = 'laser_position' - [] - [ycoord] - type = ParsedFunction - expression = '60e-6*cos(pi/(60e-6*pi)*arclength)' - symbol_names = 'arclength' - symbol_values = 'laser_position' - [] - [reward_function] - type = ParsedFunction - expression = '1e-2*min(min(T1-1800, 0), 2800-T1)' - '+1e-2*min(min(T2-1800, 0), 2800-T2)' - '+1e-2*min(min(T3-1800, 0), 2800-T3)' - '+1e-2*min(min(T4-1800, 0), 2800-T4)' - '+1e-2*min(min(T5-1800, 0), 2800-T5)' - '+1e-2*min(min(T6-1800, 0), 2800-T6)' - '+1e-2*min(min(T7-1800, 0), 2800-T7)' - '+1e-2*min(min(T8-1800, 0), 2800-T8)' - symbol_names = 'T1 T2 T3 T4 T5 T6 T7 T8' - symbol_values = 'T1 T2 T3 T4 T5 T6 T7 T8' - [] -[] - -[Postprocessors] - [laser_position] - type = LaserPositionPostprocessor - execute_on = 'TIMESTEP_BEGIN' - speed = speed_signal - [] - [T1] - type = PointValue - variable = T - point = '${fparse 60e-6*sin(2*pi*1/8)} ${fparse 60e-6*cos(2*pi*1/8)} 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [T2] - type = PointValue - variable = T - point = '${fparse 60e-6*sin(2*pi*2/8)} ${fparse 60e-6*cos(2*pi*2/8)} 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [T3] - type = PointValue - variable = T - point = '${fparse 60e-6*sin(2*pi*3/8)} ${fparse 60e-6*cos(2*pi*3/8)} 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [T4] - type = PointValue - variable = T - point = '${fparse 60e-6*sin(2*pi*4/8)} ${fparse 60e-6*cos(2*pi*4/8)} 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [T5] - type = PointValue - variable = T - point = '${fparse 60e-6*sin(2*pi*5/8)} ${fparse 60e-6*cos(2*pi*5/8)} 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [T6] - type = PointValue - variable = T - point = '${fparse 60e-6*sin(2*pi*6/8)} ${fparse 60e-6*cos(2*pi*6/8)} 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [T7] - type = PointValue - variable = T - point = '${fparse 60e-6*sin(2*pi*7/8)} ${fparse 60e-6*cos(2*pi*7/8)} 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [T8] - type = PointValue - variable = T - point = '${fparse 60e-6*sin(2*pi*8/8)} ${fparse 60e-6*cos(2*pi*8/8)} 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [reward] - type = FunctionValuePostprocessor - function = reward_function - execute_on = 'INITIAL TIMESTEP_END' - indirect_dependencies = 'T1 T2 T3 T4 T5 T6 T7 T8' - [] - [speed_signal] - type = ConstantPostprocessor - value = 1.0 - execute_on = TIMESTEP_BEGIN - [] - [speed] - type = LibtorchControlValuePostprocessor - control_name = src_control - [] - [log_prob_speed] - type = LibtorchDRLLogProbabilityPostprocessor - control_name = src_control - [] -[] - -[Reporters] - [results] - type = AccumulateReporter - reporters = 'T1/value T2/value T3/value T4/value T5/value T6/value T7/value T8/value reward/value speed/value log_prob_speed/value' - [] -[] - -[Materials] - [steel] - type = LaserWeld316LStainlessSteel - temperature = T - use_constant_density = true - [] -[] - -[Controls] - [src_control] - type = LibtorchDRLControl - parameters = "Postprocessors/speed_signal/value" - responses = 'T1 T2 T3 T4 T5 T6 T7 T8' - - # keep consistent with LibtorchDRLControlTrainer - input_timesteps = 1 - response_shift_factors = '1500 1500 1500 1500 1500 1500 1500 1500' - response_scaling_factors = '0.000666667 0.000666667 0.000666667 0.000666667 0.000666667 0.000666667 0.000666667 0.000666667' - action_scaling_factors = 1.0 - - # response_scaling_factors = '1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0' - # response_shift_factors = '0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0' - # action_scaling_factors = 1.0 - - execute_on = 'TIMESTEP_BEGIN' - smoother = 1.0 - num_stems_in_period = 10 - stochastic = true - [] -[] - -[Executioner] - type = Transient - end_time = ${endtime} - # dtmin = 1e-10 - # dtmax = 1e-5 - dt = ${timestep} - # petsc_options_iname = '-pc_type -pc_factor_shift_type' - # petsc_options_value = 'lu NONZERO' - petsc_options_iname = '-pc_type -pc_hypre_type -pc_factor_shift_type' - petsc_options_value = 'hypre boomeramg NONZERO' - petsc_options = '-snes_converged_reason -ksp_converged_reason -options_left' - solve_type = 'NEWTON' - line_search = 'none' - nl_max_its = 5 - l_max_its = 100 - # [TimeStepper] - # type = IterationAdaptiveDT - # optimal_iterations = 5 - # iteration_window = 1 - # dt = ${timestep} - # linear_iteration_ratio = 1e6 - # growth_factor = 1.25 - # [] -[] - -[Debug] - show_var_residual_norms = true -[] - -[Outputs] - [exodus] - type = Exodus - # output_material_properties = true - [] - console = false -[] diff --git a/modules/combined/examples/stochastic/meltpool/meltpool_trainer.i b/modules/combined/examples/stochastic/meltpool/meltpool_trainer.i deleted file mode 100644 index 911d5c9cd20b..000000000000 --- a/modules/combined/examples/stochastic/meltpool/meltpool_trainer.i +++ /dev/null @@ -1,117 +0,0 @@ -[StochasticTools] -[] - -[Samplers] - [dummy] - type = CartesianProduct - linear_space_items = '0 0.01 1' - min_procs_per_row = 20 - max_procs_per_row = 20 - [] -[] - -[MultiApps] - [runner] - type = SamplerFullSolveMultiApp - sampler = dummy - input_files = '3.i' - mode = batch-reset - min_procs_per_app = 20 - max_procs_per_app = 20 - [] -[] - -[Transfers] - [nn_transfer] - type = SamplerNeuralNetControlTransfer - to_multi_app = runner - trainer_name = nn_trainer - control_name = src_control - sampler = dummy - [] - [r_transfer] - type = SamplerReporterTransfer - from_multi_app = runner - sampler = dummy - stochastic_reporter = storage - from_reporter = 'results/T1:value results/T2:value results/T3:value results/T4:value ' - 'results/T5:value results/T6:value results/T7:value results/T8:value ' - 'results/reward:value results/speed:value results/log_prob_speed:value' - [] -[] - -[Trainers] - [nn_trainer] - type = LibtorchDRLControlTrainer - response = 'storage/r_transfer:results:T1:value storage/r_transfer:results:T2:value storage/r_transfer:results:T3:value storage/r_transfer:results:T4:value ' - 'storage/r_transfer:results:T5:value storage/r_transfer:results:T6:value storage/r_transfer:results:T7:value storage/r_transfer:results:T8:value' - control = 'storage/r_transfer:results:speed:value' - log_probability = 'storage/r_transfer:results:log_prob_speed:value' - reward = 'storage/r_transfer:results:reward:value' - - num_epochs = 50 - update_frequency = 1 - decay_factor = 0.99 - lambda_factor = 0.97 - - loss_print_frequency = 1 - - critic_learning_rate = 0.001 - num_critic_neurons_per_layer = '256 256' - critic_activation_functions = 'relu relu' - - control_learning_rate = 0.001 - num_control_neurons_per_layer = '256 256' - control_activation_functions = 'tanh tanh' - - # keep consistent with LibtorchNeuralNetControl - input_timesteps = 1 - - # response_scaling_factors = '13.33 15.38 16.66 38.46 15.38 33.33 40 11.76 4.711 15.38' - # response_shift_factors = '2.055 2.055 1.93 -0.171 1.945 0.449 -0.525 0.029 0.17675 1.945' - - response_shift_factors = '1500 1500 1500 1500 1500 1500 1500 1500' - response_scaling_factors = '0.000666667 0.000666667 0.000666667 0.000666667 0.000666667 0.000666667 0.000666667 0.000666667' - - # response_scaling_factors = '1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0' - # response_shift_factors = '0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0' - - standardize_advantage = true - - read_from_file = false - - # min_control_value = ${fparse -0.108} - # max_control_value = ${fparse 0.108} - - min_control_value = ${fparse 0.75} - max_control_value = ${fparse 2.0} - - batch_size = 400 - timestep_window = 10 - - entropy_coeff = 0.01 - [] -[] - -[Reporters] - [storage] - type = StochasticReporter - parallel_type = ROOT - outputs = none - [] - [reward] - type = DRLRewardReporter - drl_trainer_name = nn_trainer - [] -[] - -[Executioner] - type = Transient - num_steps = 1 -[] - -[Outputs] - file_base = output/train_out - json = true - execute_on = TIMESTEP_END -[] diff --git a/framework/include/postprocessors/LiftDragRewardPostprocessor.h b/modules/stochastic_tools/include/postprocessors/LiftDragRewardPostprocessor.h similarity index 99% rename from framework/include/postprocessors/LiftDragRewardPostprocessor.h rename to modules/stochastic_tools/include/postprocessors/LiftDragRewardPostprocessor.h index 96237b4f120f..155eb509ab19 100644 --- a/framework/include/postprocessors/LiftDragRewardPostprocessor.h +++ b/modules/stochastic_tools/include/postprocessors/LiftDragRewardPostprocessor.h @@ -24,7 +24,6 @@ class LiftDragRewardPostprocessor : public GeneralPostprocessor virtual Real getValue() const override; protected: - const PostprocessorValue & _lift; const PostprocessorValue & _drag; @@ -40,5 +39,4 @@ class LiftDragRewardPostprocessor : public GeneralPostprocessor std::vector _drag_history; unsigned int _replace_counter; - }; diff --git a/framework/src/postprocessors/LiftDragRewardPostprocessor.C b/modules/stochastic_tools/src/postprocessors/LiftDragRewardPostprocessor.C similarity index 57% rename from framework/src/postprocessors/LiftDragRewardPostprocessor.C rename to modules/stochastic_tools/src/postprocessors/LiftDragRewardPostprocessor.C index 00a1f0f8b12f..cf8c9a26ec6e 100644 --- a/framework/src/postprocessors/LiftDragRewardPostprocessor.C +++ b/modules/stochastic_tools/src/postprocessors/LiftDragRewardPostprocessor.C @@ -8,22 +8,19 @@ //* https://www.gnu.org/licenses/lgpl-2.1.html #include "LiftDragRewardPostprocessor.h" -#include "FEProblemBase.h" -#include "NonlinearSystemBase.h" -#include "MathUtils.h" -#include "TransientBase.h" -#include "Restartable.h" -#include "libmesh/enum_norm_type.h" -registerMooseObject("MooseApp", LiftDragRewardPostprocessor); +#include +#include + +registerMooseObject("StochasticToolsApp", LiftDragRewardPostprocessor); InputParameters LiftDragRewardPostprocessor::validParams() { InputParameters params = GeneralPostprocessor::validParams(); - params.addRequiredParam("lift","Lift coeff"); - params.addRequiredParam("drag","Drag coeff"); + params.addRequiredParam("lift", "Lift coeff"); + params.addRequiredParam("drag", "Drag coeff"); params.addParam("averaging_window", 1, "The window"); params.addParam("coeff_1", 1.59, "Coeff 1"); @@ -43,40 +40,31 @@ LiftDragRewardPostprocessor::LiftDragRewardPostprocessor(const InputParameters & _coeff_2(getParam("coeff_2")), _avg_lift(0.0), _avg_drag(0.0), - _lift_history(std::vector(_averaging_window,0.0)), - _drag_history(std::vector(_averaging_window,0.0)) + _lift_history(std::vector(_averaging_window, 0.0)), + _drag_history(std::vector(_averaging_window, 0.0)) { } Real LiftDragRewardPostprocessor::getValue() const { - // std::cout << "Reward" << _coeff_1 - _avg_drag - _coeff_2*std::abs(_avg_lift) << std::endl; - return _coeff_1 - _avg_drag - _coeff_2*std::abs(_avg_lift); + return _coeff_1 - _avg_drag - _coeff_2 * std::abs(_avg_lift); } void LiftDragRewardPostprocessor::execute() { auto rolling_index = _t_step % _averaging_window; - // std::cout << " Rolling index " << rolling_index << std::endl; - - // std::cout << "Lift" << _lift << " drag " << _drag << std::endl; _lift_history[rolling_index] = _lift; _drag_history[rolling_index] = _drag; - // std::cout << Moose::stringify(_lift_history) << std::endl; - // std::cout << Moose::stringify(_drag_history) << std::endl; - if (!rolling_index) { const auto normalization = _t_step ? _averaging_window : 1; - // std::cout << Moose::stringify(_lift_history) << std::endl; - // std::cout << Moose::stringify(_drag_history) << std::endl; - _avg_lift = std::reduce(_lift_history.begin(), _lift_history.end())/normalization; - _avg_drag = std::reduce(_drag_history.begin(), _drag_history.end())/normalization; - _lift_history = std::vector(_averaging_window,0.0); - _drag_history = std::vector(_averaging_window,0.0); + _avg_lift = std::reduce(_lift_history.begin(), _lift_history.end()) / normalization; + _avg_drag = std::reduce(_drag_history.begin(), _drag_history.end()) / normalization; + _lift_history = std::vector(_averaging_window, 0.0); + _drag_history = std::vector(_averaging_window, 0.0); } _replace_counter++; From 5e28ce309cc85ff935b8f7b3c0fe9868a5ae0829 Mon Sep 17 00:00:00 2001 From: Peter German Date: Thu, 23 Apr 2026 15:40:06 -0600 Subject: [PATCH 36/51] remive min max parameters from the basic ann. --- .../utils/LibtorchArtificialNeuralNet.h | 11 --- .../controls/LibtorchNeuralNetControl.C | 2 - .../utils/LibtorchArtificialNeuralNet.C | 84 ++----------------- .../libtorch/utils/LibtorchActorNeuralNet.h | 4 + .../trainers/LibtorchDRLControlTrainer.C | 2 - .../libtorch/utils/LibtorchActorNeuralNet.C | 22 ++++- .../unit/src/TestLibtorchActorNeuralNet.C | 2 - .../unit/src/TestLibtorchRLCore.C | 2 + 8 files changed, 31 insertions(+), 98 deletions(-) diff --git a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h index ec3a1a76e16e..12b89d6afdd3 100644 --- a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h +++ b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h @@ -39,8 +39,6 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu const unsigned int num_outputs, const std::vector & num_neurons_per_layer, const std::vector & activation_function = {"relu"}, - const std::vector & minimum_values = {}, - const std::vector & maximum_values = {}, const torch::DeviceType device_type = torch::kCPU, const torch::ScalarType scalar_type = torch::kDouble, const bool build_on_construct = true, @@ -103,10 +101,6 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu virtual void initializeNeuralNetwork(); - const std::vector & minValues() const { return _minimum_values; }; - - const std::vector & maxValues() const { return _maximum_values; }; - /// Store the network architecture in a json file (for debugging, visualization) void store(nlohmann::json & json) const; @@ -147,14 +141,9 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu std::vector _input_scaling_factors; /// Multiplicative scaling applied after the network output is formed std::vector _output_scaling_factors; - /// - const std::vector _minimum_values; - const std::vector _maximum_values; torch::Tensor _input_shift_tensor; torch::Tensor _input_scale_tensor; torch::Tensor _output_scale_tensor; - torch::Tensor _min_tensor; - torch::Tensor _max_tensor; }; void to_json(nlohmann::json & json, const Moose::LibtorchArtificialNeuralNet * const & network); diff --git a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C index 517da31ef7fa..1c21df2164f4 100644 --- a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C +++ b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C @@ -135,8 +135,6 @@ LibtorchNeuralNetControl::loadControlNeuralNetFromFile(const InputParameters & p num_outputs, num_neurons_per_layer, activation_functions, - std::vector(), - std::vector(), torch::kCPU, torch::kDouble, true, diff --git a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C index 9447ceb28d54..da0499852790 100644 --- a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C +++ b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C @@ -57,8 +57,6 @@ LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( const unsigned int num_outputs, const std::vector & num_neurons_per_layer, const std::vector & activation_function, - const std::vector & minimum_values, - const std::vector & maximum_values, const torch::DeviceType device_type, const torch::ScalarType data_type, const bool build_on_construct, @@ -77,9 +75,7 @@ LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( _input_scaling_factors( normalizeAffineFactors(input_scaling_factors, num_inputs, 1.0, "input_scaling_factors")), _output_scaling_factors( - normalizeAffineFactors(output_scaling_factors, num_outputs, 1.0, "output_scaling_factors")), - _minimum_values(minimum_values), - _maximum_values(maximum_values) + normalizeAffineFactors(output_scaling_factors, num_outputs, 1.0, "output_scaling_factors")) { _activation_function = activation_function; initializeAffineBuffers(); @@ -90,29 +86,6 @@ LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( mooseError("The number of activation functions should be either one or the same as the number " "of hidden layers"); - const bool has_minimum_values = !_minimum_values.empty(); - const bool has_maximum_values = !_maximum_values.empty(); - if (has_minimum_values != has_maximum_values) - mooseError("Bounded neural network outputs require both minimum_values and maximum_values."); - - if (has_minimum_values) - { - if (_minimum_values.size() != _num_outputs || _maximum_values.size() != _num_outputs) - mooseError("The number of minimum_values and maximum_values entries must match the number " - "of outputs."); - - for (const auto i : make_range(_minimum_values.size())) - if (!(_maximum_values[i] > _minimum_values[i])) - mooseError("maximum_values entries must be strictly greater than minimum_values entries."); - - auto min_value = _minimum_values; - LibtorchUtils::vectorToTensor(min_value, _min_tensor); - _min_tensor = _min_tensor.transpose(0, 1).to(_data_type).to(_device_type); - auto max_value = _maximum_values; - LibtorchUtils::vectorToTensor(max_value, _max_tensor); - _max_tensor = _max_tensor.transpose(0, 1).to(_data_type).to(_device_type); - } - if (build_on_construct) constructNeuralNetwork(); } @@ -129,9 +102,7 @@ LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( _data_type(nn.dataType()), _input_shift_factors(nn.inputShiftFactors()), _input_scaling_factors(nn.inputScalingFactors()), - _output_scaling_factors(nn.outputScalingFactors()), - _minimum_values(nn.minValues()), - _maximum_values(nn.maxValues()) + _output_scaling_factors(nn.outputScalingFactors()) { initializeAffineBuffers(); @@ -150,16 +121,6 @@ LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( for (unsigned int buffer_i : make_range(from_buffers.size())) to_buffers[buffer_i].value().data() = from_buffers[buffer_i].value().data().clone(); } - - if (_minimum_values.size()) - { - auto min_value = _minimum_values; - LibtorchUtils::vectorToTensor(min_value, _min_tensor); - _min_tensor = _min_tensor.transpose(0, 1).to(_data_type).to(_device_type); - auto max_value = _maximum_values; - LibtorchUtils::vectorToTensor(max_value, _max_tensor); - _max_tensor = _max_tensor.transpose(0, 1).to(_data_type).to(_device_type); - } } Real @@ -306,17 +267,7 @@ LibtorchArtificialNeuralNet::forward(const torch::Tensor & x) output = _weights[i]->forward(output); } - if (_minimum_values.size()) - { - output = torch::sigmoid(_weights[_weights.size() - 1]->forward(output)); - const auto scale = _max_tensor - _min_tensor; - output = torch::mul(output, scale); - output = output + _min_tensor; - } - else - output = _weights[_weights.size() - 1]->forward(output); - - return scaleOutput(output); + return scaleOutput(_weights[_weights.size() - 1]->forward(output)); } void @@ -435,16 +386,6 @@ dataStore( for (unsigned int i = 0; i < afs; ++i) items[i] = nn->activationFunctions()[i]; - // unsigned int nminv(nn->minValues().size()); - // dataStore(stream, nminv, context); - std::vector minv(nn->minValues()); - dataStore(stream, minv, context); - - // unsigned int nmaxv(nn->minValues().size()); - // dataStore(stream, nmaxv, context); - std::vector maxv(nn->maxValues()); - dataStore(stream, maxv, context); - dataStore(stream, items, context); auto device_type = static_cast::type>(nn->deviceType()); @@ -484,14 +425,6 @@ dataLoad( activation_functions.resize(num_activation_items); dataLoad(stream, activation_functions, context); - std::vector min_values; - min_values.resize(num_outputs); - dataLoad(stream, min_values, context); - - std::vector max_values; - max_values.resize(num_outputs); - dataLoad(stream, max_values, context); - std::underlying_type::type device_type; dataLoad(stream, device_type, context); const torch::DeviceType divt(static_cast(device_type)); @@ -500,15 +433,8 @@ dataLoad( dataLoad(stream, data_type, context); const torch::ScalarType datt(static_cast(data_type)); - nn = std::make_shared(name, - num_inputs, - num_outputs, - num_neurons_per_layer, - activation_functions, - min_values, - max_values, - divt, - datt); + nn = std::make_shared( + name, num_inputs, num_outputs, num_neurons_per_layer, activation_functions, divt, datt); Moose::loadLibtorchArtificialNeuralNetState(*nn, name); } diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h index cd4cbefecf2a..2572abc9702a 100644 --- a/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h @@ -79,6 +79,8 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet LibtorchBetaActionDistribution & betaActionDistribution(); bool stateIndependentStd() const { return _state_independent_std; } + const std::vector & minValues() const { return _minimum_values; } + const std::vector & maxValues() const { return _maximum_values; } void resetDistributionParams(torch::Tensor input); @@ -89,6 +91,8 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet virtual void initializeNeuralNetwork() override; protected: + const std::vector _minimum_values; + const std::vector _maximum_values; const bool _state_independent_std; std::shared_ptr _action_distribution; }; diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index 0ffcffb14287..93d05945cdd8 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -275,8 +275,6 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par 1, _num_critic_neurons_per_layer, getParam>("critic_activation_functions"), - std::vector(), - std::vector(), torch::kCPU, torch::kDouble, true, diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C index 1a11f2e46cd7..c33e409dc1cf 100644 --- a/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C @@ -227,16 +227,32 @@ LibtorchActorNeuralNet::LibtorchActorNeuralNet( num_outputs, num_neurons_per_layer, activation_function, - minimum_values, - maximum_values, device_type, data_type, false, input_shift_factors, input_scaling_factors, output_scaling_factors), + _minimum_values(minimum_values), + _maximum_values(maximum_values), _state_independent_std(state_independent_std) { + const bool has_minimum_values = !_minimum_values.empty(); + const bool has_maximum_values = !_maximum_values.empty(); + if (has_minimum_values != has_maximum_values) + mooseError("Bounded action distributions require both minimum_values and maximum_values."); + + if (has_minimum_values) + { + if (_minimum_values.size() != _num_outputs || _maximum_values.size() != _num_outputs) + mooseError("The number of minimum_values and maximum_values entries must match the number " + "of action outputs."); + + for (const auto i : make_range(_minimum_values.size())) + if (!(_maximum_values[i] > _minimum_values[i])) + mooseError("maximum_values entries must be strictly greater than minimum_values entries."); + } + if (build_on_construct) constructNeuralNetwork(); } @@ -244,6 +260,8 @@ LibtorchActorNeuralNet::LibtorchActorNeuralNet( LibtorchActorNeuralNet::LibtorchActorNeuralNet(const Moose::LibtorchActorNeuralNet & nn, const bool build_on_construct) : LibtorchArtificialNeuralNet(dynamic_cast(nn), false), + _minimum_values(nn.minValues()), + _maximum_values(nn.maxValues()), _state_independent_std(nn.stateIndependentStd()) { // We construct the NN architecture diff --git a/modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C b/modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C index 3062eebaa2d6..23a53d615e9b 100644 --- a/modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C +++ b/modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C @@ -47,8 +47,6 @@ TEST(LibtorchActorNeuralNetTest, artificialNetAppliesAffineInputAndOutputScaling 1, {}, {"linear"}, - {}, - {}, torch::kCPU, torch::kDouble, true, diff --git a/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C b/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C index d53dafada6ae..6d5605f8b69d 100644 --- a/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C +++ b/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C @@ -72,7 +72,9 @@ TEST(LibtorchRLCoreTest, PPOLossUsesStoredLogProbabilityAndValueTarget) Moose::LibtorchActorNeuralNet policy_network("policy", 1, 1, {}, {"linear"}); policy_network.gaussianActionDistribution().meanModule()->weight.data().fill_(0.0); + policy_network.gaussianActionDistribution().meanModule()->bias.data().fill_(0.0); policy_network.gaussianActionDistribution().stdModule()->weight.data().fill_(0.0); + policy_network.gaussianActionDistribution().stdModule()->bias.data().fill_(0.0); Moose::LibtorchArtificialNeuralNet value_network("value", 1, 1, {}, {"linear"}); auto value_params = value_network.named_parameters(); From 408dc1289115c371f7707b37006f4a1db582e3f5 Mon Sep 17 00:00:00 2001 From: Peter German Date: Thu, 23 Apr 2026 15:54:11 -0600 Subject: [PATCH 37/51] Remove the vortex control example. --- .../flow_over_circle_linearfv.i | 415 ----------------- .../flow_over_circle_linearfv_single.i | 426 ------------------ .../flow_over_circle_linearfv_single.i | 420 ----------------- .../vortex_control/full-run/header.i | 39 -- .../stochastic/vortex_control/full-run/mesh.i | 240 ---------- .../flow_over_circle_linearfv_single.i | 420 ----------------- .../vortex_control/half-run/header.i | 39 -- .../stochastic/vortex_control/half-run/mesh.i | 240 ---------- .../stochastic/vortex_control/header.i | 39 -- .../examples/stochastic/vortex_control/mesh.i | 240 ---------- .../stochastic/vortex_control/plot_reward.py | 60 --- .../stochastic/vortex_control/trainer.i | 117 ----- 12 files changed, 2695 deletions(-) delete mode 100644 modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i delete mode 100644 modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv_single.i delete mode 100644 modules/combined/examples/stochastic/vortex_control/full-run/flow_over_circle_linearfv_single.i delete mode 100644 modules/combined/examples/stochastic/vortex_control/full-run/header.i delete mode 100644 modules/combined/examples/stochastic/vortex_control/full-run/mesh.i delete mode 100644 modules/combined/examples/stochastic/vortex_control/half-run/flow_over_circle_linearfv_single.i delete mode 100644 modules/combined/examples/stochastic/vortex_control/half-run/header.i delete mode 100644 modules/combined/examples/stochastic/vortex_control/half-run/mesh.i delete mode 100644 modules/combined/examples/stochastic/vortex_control/header.i delete mode 100644 modules/combined/examples/stochastic/vortex_control/mesh.i delete mode 100644 modules/combined/examples/stochastic/vortex_control/plot_reward.py delete mode 100644 modules/combined/examples/stochastic/vortex_control/trainer.i diff --git a/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i b/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i deleted file mode 100644 index e9b49cd0bf88..000000000000 --- a/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv.i +++ /dev/null @@ -1,415 +0,0 @@ -!include header.i - -[Mesh] - [fmg] - type = FileMeshGenerator - file = flow_over_circle_linearfv_out_orig.e - use_for_exodus_restart = true - [] -[] - -[Problem] - linear_sys_names = 'u_system v_system pressure_system' - previous_nl_solution_required = true -[] - -[Functions] - [inlet_function] - type = ParsedFunction - expression = '4*U*(y-ymin)*(ymax-y)/(ymax-ymin)/(ymax-ymin)' - symbol_names = 'U ymax ymin' - symbol_values = '${inlet_velocity} ${y_max} ${y_min}' - [] - [gap_x] - type = ParsedFunction - expression = 'Q*x/(x^2+y^2)*abs(cos(pi/(20/180*pi)*atan(x/y)))' - symbol_names = 'Q' - symbol_values = 'Q_signal' - [] - [gap_y] - type = ParsedFunction - expression = 'if(y>0,Q,-Q)*y/(x^2+y^2)*abs(cos(pi/(20/180*pi)*atan(x/y)))' - symbol_names = 'Q' - symbol_values = 'Q_signal' - [] -[] - -[UserObjects] - [rc] - type = RhieChowMassFlux - u = vel_x - v = vel_y - pressure = pressure - rho = ${rho} - p_diffusion_kernel = p_diffusion - [] -[] - -[Variables] - [vel_x] - type = MooseLinearVariableFVReal - solver_sys = u_system - initial_from_file_var = vel_x - initial_from_file_timestep = LATEST - [] - [vel_y] - type = MooseLinearVariableFVReal - solver_sys = v_system - initial_from_file_var = vel_y - initial_from_file_timestep = LATEST - [] - [pressure] - type = MooseLinearVariableFVReal - # initial_condition = 0 - solver_sys = pressure_system - initial_from_file_var = pressure - initial_from_file_timestep = LATEST - [] -[] - -[LinearFVKernels] - [u_time] - type = LinearFVTimeDerivative - variable = vel_x - factor = ${rho} - [] - [u_advection_stress] - type = LinearWCNSFVMomentumFlux - variable = vel_x - advected_interp_method = ${advected_interp_method} - mu = ${mu} - u = vel_x - v = vel_y - momentum_component = 'x' - rhie_chow_user_object = 'rc' - use_nonorthogonal_correction = true - [] - [u_pressure] - type = LinearFVMomentumPressure - variable = vel_x - pressure = pressure - momentum_component = 'x' - [] - - [v_time] - type = LinearFVTimeDerivative - variable = vel_y - factor = ${rho} - [] - [v_advection_stress] - type = LinearWCNSFVMomentumFlux - variable = vel_y - advected_interp_method = ${advected_interp_method} - mu = ${mu} - u = vel_x - v = vel_y - momentum_component = 'y' - rhie_chow_user_object = 'rc' - use_nonorthogonal_correction = true - [] - [v_pressure] - type = LinearFVMomentumPressure - variable = vel_y - pressure = pressure - momentum_component = 'y' - [] - - [p_diffusion] - type = LinearFVAnisotropicDiffusion - variable = pressure - diffusion_tensor = Ainv - use_nonorthogonal_correction = true - [] - [HbyA_divergence] - type = LinearFVDivergence - variable = pressure - face_flux = HbyA - force_boundary_execution = true - [] -[] - -[LinearFVBCs] - [inlet_x] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_x - boundary = 'left_boundary' - functor = 'inlet_function' - [] - [inlet_y] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_y - boundary = 'left_boundary' - functor = 0 - [] - [circle_x] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_x - boundary = 'circle' - functor = 0 - [] - [circle_y] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_y - boundary = 'circle' - functor = 0 - [] - [gap_x] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_x - boundary = 'top_gap bottom_gap' - functor = 'gap_x' - [] - [gap_y] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_y - boundary = 'top_gap bottom_gap' - functor = 'gap_y' - [] - [walls_x] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_x - boundary = 'top_boundary bottom_boundary' - functor = 0 - [] - [walls_y] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_y - boundary = 'top_boundary bottom_boundary' - functor = 0 - [] - [outlet_p] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - boundary = 'right_boundary' - variable = pressure - functor = 0 - [] - [outlet_u] - type = LinearFVAdvectionDiffusionOutflowBC - variable = vel_x - use_two_term_expansion = false - boundary = 'right_boundary' - [] - [outlet_v] - type = LinearFVAdvectionDiffusionOutflowBC - variable = vel_y - use_two_term_expansion = false - boundary = 'right_boundary' - [] -[] - -[Postprocessors] - [drag_force] - type = IntegralDirectedSurfaceForce - vel_x = vel_x - vel_y = vel_y - mu = ${mu} - pressure = pressure - principal_direction = '1 0 0' - boundary = 'circle' - outputs = none - execute_on = 'INITIAL TIMESTEP_END' - [] - [drag_coeff] - type = ParsedPostprocessor - expression = '2*drag_force/rho/(avgvel*avgvel)/D' - constant_names = 'rho avgvel D' - constant_expressions = '${rho} ${fparse 2/3*inlet_velocity} ${fparse 2*circle_radius}' - pp_names = 'drag_force' - execute_on = 'INITIAL TIMESTEP_END' - [] - [lift_force] - type = IntegralDirectedSurfaceForce - vel_x = vel_x - vel_y = vel_y - mu = ${mu} - pressure = pressure - principal_direction = '0 1 0' - boundary = 'circle' - outputs = none - execute_on = 'INITIAL TIMESTEP_END' - [] - [lift_coeff] - type = ParsedPostprocessor - expression = '2*lift_force/rho/(avgvel*avgvel)/D' - constant_names = 'rho avgvel D' - constant_expressions = '${rho} ${fparse 2/3*inlet_velocity} ${fparse 2*circle_radius}' - pp_names = 'lift_force' - execute_on = 'INITIAL TIMESTEP_END' - [] - [reward] - type = LiftDragRewardPostprocessor - lift = lift_coeff - drag = drag_coeff - averaging_window = 50 - coeff_1 = 0.0 - coeff_2 = 0.2 - execute_on = 'INITIAL TIMESTEP_END' - [] - # [p1] - # type = PointValue - # variable = pressure - # point = '0 0.07 0.0' - # execute_on = 'INITIAL TIMESTEP_END' - # [] - # [p2] - # type = PointValue - # variable = pressure - # point = '0 -0.07 0.0' - # execute_on = 'INITIAL TIMESTEP_END' - # [] - # [p3] - # type = PointValue - # variable = pressure - # point = '0.075 0.1 0.0' - # execute_on = 'INITIAL TIMESTEP_END' - # [] - # [p4] - # type = PointValue - # variable = pressure - # point = '0.075 0.0 0.0' - # execute_on = 'INITIAL TIMESTEP_END' - # [] - # [p5] - # type = PointValue - # variable = pressure - # point = '0.075 -0.1 0.0' - # execute_on = 'INITIAL TIMESTEP_END' - # [] - [p1x] - type = PointValue - variable = vel_x - point = '0 0.07 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p2x] - type = PointValue - variable = vel_x - point = '0 -0.07 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p3x] - type = PointValue - variable = vel_x - point = '0.075 0.1 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p4x] - type = PointValue - variable = vel_x - point = '0.075 0.0 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p5x] - type = PointValue - variable = vel_x - point = '0.075 -0.1 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p1y] - type = PointValue - variable = vel_y - point = '0 0.07 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p2y] - type = PointValue - variable = vel_y - point = '0 -0.07 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p3y] - type = PointValue - variable = vel_y - point = '0.075 0.1 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p4y] - type = PointValue - variable = vel_y - point = '0.075 0.0 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p5y] - type = PointValue - variable = vel_y - point = '0.075 -0.1 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [Q_signal] - type = ConstantPostprocessor - value = 0.0 - execute_on = TIMESTEP_BEGIN - [] - [Q] - type = LibtorchControlValuePostprocessor - control_name = src_control - [] - [log_prob_Q] - type = LibtorchDRLLogProbabilityPostprocessor - control_name = src_control - [] -[] - -[Reporters] - [results] - type = AccumulateReporter - reporters = 'p1x/value p2x/value p3x/value p4x/value p5x/value p1y/value p2y/value p3y/value p4y/value p5y/value reward/value Q/value log_prob_Q/value' - [] -[] - -[Controls] - [src_control] - type = LibtorchDRLControl - parameters = "Postprocessors/Q_signal/value" - responses = 'p1x p2x p3x p4x p5x p1y p2y p3y p4y p5y' - - # keep consistent with LibtorchDRLControlTrainer - input_timesteps = 1 - response_shift_factors = '1.98 1.825 2.015 0.03 1.9 0.58 -0.425 0.06 0.12 -0.02' - response_scaling_factors = '1.47 1.03 2.60 3.45 2.0 1.19 1.6 2.7 1.47 2.08' - action_scaling_factors = 1.0 - - # response_scaling_factors = '1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0' - # response_shift_factors = '0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0' - # action_scaling_factors = 1.0 - - execute_on = 'TIMESTEP_BEGIN' - smoother = 0.1 - num_stems_in_period = 50 - stochastic = true - [] -[] - -[Executioner] - type = PIMPLE - momentum_l_abs_tol = 1e-7 - pressure_l_abs_tol = 1e-7 - momentum_l_tol = 1e-7 - pressure_l_tol = 1e-7 - rhie_chow_user_object = 'rc' - momentum_systems = 'u_system v_system' - pressure_system = 'pressure_system' - momentum_equation_relaxation = 0.9 - pressure_variable_relaxation = 0.6 - num_iterations = 100 - pressure_absolute_tolerance = 5e-6 - momentum_absolute_tolerance = 5e-6 - momentum_petsc_options_iname = '-pc_type -pc_hypre_type' - momentum_petsc_options_value = 'hypre boomeramg' - pressure_petsc_options_iname = '-pc_type -pc_hypre_type' - pressure_petsc_options_value = 'hypre boomeramg' - print_fields = false - continue_on_max_its = true - dt = 0.0005 - num_steps = 2000 -[] - -[Outputs] - exodus = true - [json] - type = JSON - execute_on = final - [] - console = false - # execute_on = FINAL -[] diff --git a/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv_single.i b/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv_single.i deleted file mode 100644 index 7cafe723a903..000000000000 --- a/modules/combined/examples/stochastic/vortex_control/flow_over_circle_linearfv_single.i +++ /dev/null @@ -1,426 +0,0 @@ -!include header.i - -[Mesh] - [fmg] - type = FileMeshGenerator - file = flow_over_circle_linearfv_out_orig.e - use_for_exodus_restart = true - [] -[] - -[Problem] - linear_sys_names = 'u_system v_system pressure_system' - previous_nl_solution_required = true -[] - -[Functions] - [inlet_function] - type = ParsedFunction - expression = '4*U*(y-ymin)*(ymax-y)/(ymax-ymin)/(ymax-ymin)' - symbol_names = 'U ymax ymin' - symbol_values = '${inlet_velocity} ${y_max} ${y_min}' - [] - [gap_x] - type = ParsedFunction - expression = 'Q*x/(x^2+y^2)*abs(cos(pi/(20/180*pi)*atan(x/y)))' - symbol_names = 'Q' - symbol_values = 'Q_signal' - [] - [gap_y] - type = ParsedFunction - expression = 'if(y>0,Q,-Q)*y/(x^2+y^2)*abs(cos(pi/(20/180*pi)*atan(x/y)))' - symbol_names = 'Q' - symbol_values = 'Q_signal' - [] -[] - -[UserObjects] - [rc] - type = RhieChowMassFlux - u = vel_x - v = vel_y - pressure = pressure - rho = ${rho} - p_diffusion_kernel = p_diffusion - [] -[] - -[Variables] - [vel_x] - type = MooseLinearVariableFVReal - solver_sys = u_system - initial_from_file_var = vel_x - initial_from_file_timestep = LATEST - [] - [vel_y] - type = MooseLinearVariableFVReal - solver_sys = v_system - initial_from_file_var = vel_y - initial_from_file_timestep = LATEST - [] - [pressure] - type = MooseLinearVariableFVReal - # initial_condition = 0 - solver_sys = pressure_system - initial_from_file_var = pressure - initial_from_file_timestep = LATEST - [] -[] - -[LinearFVKernels] - [u_time] - type = LinearFVTimeDerivative - variable = vel_x - factor = ${rho} - [] - [u_advection_stress] - type = LinearWCNSFVMomentumFlux - variable = vel_x - advected_interp_method = ${advected_interp_method} - mu = ${mu} - u = vel_x - v = vel_y - momentum_component = 'x' - rhie_chow_user_object = 'rc' - use_nonorthogonal_correction = true - [] - [u_pressure] - type = LinearFVMomentumPressure - variable = vel_x - pressure = pressure - momentum_component = 'x' - [] - - [v_time] - type = LinearFVTimeDerivative - variable = vel_y - factor = ${rho} - [] - [v_advection_stress] - type = LinearWCNSFVMomentumFlux - variable = vel_y - advected_interp_method = ${advected_interp_method} - mu = ${mu} - u = vel_x - v = vel_y - momentum_component = 'y' - rhie_chow_user_object = 'rc' - use_nonorthogonal_correction = true - [] - [v_pressure] - type = LinearFVMomentumPressure - variable = vel_y - pressure = pressure - momentum_component = 'y' - [] - - [p_diffusion] - type = LinearFVAnisotropicDiffusion - variable = pressure - diffusion_tensor = Ainv - use_nonorthogonal_correction = true - [] - [HbyA_divergence] - type = LinearFVDivergence - variable = pressure - face_flux = HbyA - force_boundary_execution = true - [] -[] - -[LinearFVBCs] - [inlet_x] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_x - boundary = 'left_boundary' - functor = 'inlet_function' - [] - [inlet_y] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_y - boundary = 'left_boundary' - functor = 0 - [] - [circle_x] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_x - boundary = 'circle' - functor = 0 - [] - [circle_y] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_y - boundary = 'circle' - functor = 0 - [] - [gap_x] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_x - boundary = 'top_gap bottom_gap' - functor = 'gap_x' - [] - [gap_y] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_y - boundary = 'top_gap bottom_gap' - functor = 'gap_y' - [] - [walls_x] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_x - boundary = 'top_boundary bottom_boundary' - functor = 0 - [] - [walls_y] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_y - boundary = 'top_boundary bottom_boundary' - functor = 0 - [] - [outlet_p] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - boundary = 'right_boundary' - variable = pressure - functor = 0 - [] - [outlet_u] - type = LinearFVAdvectionDiffusionOutflowBC - variable = vel_x - use_two_term_expansion = false - boundary = 'right_boundary' - [] - [outlet_v] - type = LinearFVAdvectionDiffusionOutflowBC - variable = vel_y - use_two_term_expansion = false - boundary = 'right_boundary' - [] -[] - -[Postprocessors] - [drag_force] - type = IntegralDirectedSurfaceForce - vel_x = vel_x - vel_y = vel_y - mu = ${mu} - pressure = pressure - principal_direction = '1 0 0' - boundary = 'circle' - outputs = none - execute_on = 'INITIAL TIMESTEP_END' - [] - [drag_coeff] - type = ParsedPostprocessor - expression = '2*drag_force/rho/(avgvel*avgvel)/D' - constant_names = 'rho avgvel D' - constant_expressions = '${rho} ${fparse 2/3*inlet_velocity} ${fparse 2*circle_radius}' - pp_names = 'drag_force' - execute_on = 'INITIAL TIMESTEP_END' - [] - [lift_force] - type = IntegralDirectedSurfaceForce - vel_x = vel_x - vel_y = vel_y - mu = ${mu} - pressure = pressure - principal_direction = '0 1 0' - boundary = 'circle' - outputs = none - execute_on = 'INITIAL TIMESTEP_END' - [] - [lift_coeff] - type = ParsedPostprocessor - expression = '2*lift_force/rho/(avgvel*avgvel)/D' - constant_names = 'rho avgvel D' - constant_expressions = '${rho} ${fparse 2/3*inlet_velocity} ${fparse 2*circle_radius}' - pp_names = 'lift_force' - execute_on = 'INITIAL TIMESTEP_END' - [] - [reward] - type = LiftDragRewardPostprocessor - lift = lift_coeff - drag = drag_coeff - averaging_window = 50 - coeff_1 = 0.0 - coeff_2 = 0.2 - execute_on = 'INITIAL TIMESTEP_END' - [] - # [p1] - # type = PointValue - # variable = pressure - # point = '0 0.07 0.0' - # execute_on = 'INITIAL TIMESTEP_END' - # [] - # [p2] - # type = PointValue - # variable = pressure - # point = '0 -0.07 0.0' - # execute_on = 'INITIAL TIMESTEP_END' - # [] - # [p3] - # type = PointValue - # variable = pressure - # point = '0.075 0.1 0.0' - # execute_on = 'INITIAL TIMESTEP_END' - # [] - # [p4] - # type = PointValue - # variable = pressure - # point = '0.075 0.0 0.0' - # execute_on = 'INITIAL TIMESTEP_END' - # [] - # [p5] - # type = PointValue - # variable = pressure - # point = '0.075 -0.1 0.0' - # execute_on = 'INITIAL TIMESTEP_END' - # [] - [p1x] - type = PointValue - variable = vel_x - point = '0 0.07 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p2x] - type = PointValue - variable = vel_x - point = '0 -0.07 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p3x] - type = PointValue - variable = vel_x - point = '0.075 0.1 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p4x] - type = PointValue - variable = vel_x - point = '0.075 0.0 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p5x] - type = PointValue - variable = vel_x - point = '0.075 -0.1 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p1y] - type = PointValue - variable = vel_y - point = '0 0.07 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p2y] - type = PointValue - variable = vel_y - point = '0 -0.07 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p3y] - type = PointValue - variable = vel_y - point = '0.075 0.1 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p4y] - type = PointValue - variable = vel_y - point = '0.075 0.0 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p5y] - type = PointValue - variable = vel_y - point = '0.075 -0.1 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [Q_signal] - type = ConstantPostprocessor - value = 0.0 - execute_on = TIMESTEP_BEGIN - [] - [Q] - type = LibtorchControlValuePostprocessor - control_name = src_control - [] - [log_prob_Q] - type = LibtorchDRLLogProbabilityPostprocessor - control_name = src_control - [] -[] - -[Reporters] - [results] - type = AccumulateReporter - reporters = 'p1x/value p2x/value p3x/value p4x/value p5x/value p1y/value p2y/value p3y/value p4y/value p5y/value reward/value Q/value log_prob_Q/value' - [] -[] - -[Controls] - [src_control] - type = LibtorchDRLControl - parameters = "Postprocessors/Q_signal/value" - responses = 'p1x p2x p3x p4x p5x p1y p2y p3y p4y p5y' - - # keep consistent with LibtorchDRLControlTrainer - input_timesteps = 1 - response_shift_factors = '1.98 1.825 2.015 0.03 1.9 0.58 -0.425 0.06 0.12 -0.02' - response_scaling_factors = '1.47 1.03 2.60 3.45 2.0 1.19 1.6 2.7 1.47 2.08' - action_scaling_factors = 1.0 - - filename = "control.net" - - num_neurons_per_layer = '512 512' - activation_function = 'tanh tanh' - - min_control_value = ${fparse -0.108} - max_control_value = ${fparse 0.108} - - action_standard_deviations = '0.1' - - # response_scaling_factors = '1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0' - # response_shift_factors = '0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0' - # action_scaling_factors = 1.0 - - execute_on = 'TIMESTEP_BEGIN' - smoother = 0.1 - num_stems_in_period = 50 - - deterministic = false - [] -[] - -[Executioner] - type = PIMPLE - momentum_l_abs_tol = 1e-7 - pressure_l_abs_tol = 1e-7 - momentum_l_tol = 1e-7 - pressure_l_tol = 1e-7 - rhie_chow_user_object = 'rc' - momentum_systems = 'u_system v_system' - pressure_system = 'pressure_system' - momentum_equation_relaxation = 0.9 - pressure_variable_relaxation = 0.6 - num_iterations = 100 - pressure_absolute_tolerance = 5e-6 - momentum_absolute_tolerance = 5e-6 - momentum_petsc_options_iname = '-pc_type -pc_hypre_type' - momentum_petsc_options_value = 'hypre boomeramg' - pressure_petsc_options_iname = '-pc_type -pc_hypre_type' - pressure_petsc_options_value = 'hypre boomeramg' - print_fields = false - continue_on_max_its = true - dt = 0.0005 - num_steps = 2000 -[] - -[Outputs] - exodus = true - [json] - type = JSON - execute_on = final - [] - # console = false - # execute_on = FINAL -[] diff --git a/modules/combined/examples/stochastic/vortex_control/full-run/flow_over_circle_linearfv_single.i b/modules/combined/examples/stochastic/vortex_control/full-run/flow_over_circle_linearfv_single.i deleted file mode 100644 index 7ec1253f14d5..000000000000 --- a/modules/combined/examples/stochastic/vortex_control/full-run/flow_over_circle_linearfv_single.i +++ /dev/null @@ -1,420 +0,0 @@ -!include header.i - -[Mesh] - [fmg] - type = FileMeshGenerator - file = flow_over_circle_linearfv_out_orig.e - use_for_exodus_restart = true - [] -[] - -[Problem] - linear_sys_names = 'u_system v_system pressure_system' - previous_nl_solution_required = true -[] - -[Functions] - [inlet_function] - type = ParsedFunction - expression = '4*U*(y-ymin)*(ymax-y)/(ymax-ymin)/(ymax-ymin)' - symbol_names = 'U ymax ymin' - symbol_values = '${inlet_velocity} ${y_max} ${y_min}' - [] - [gap_x] - type = ParsedFunction - expression = 'Q*x/(x^2+y^2)*abs(cos(pi/(20/180*pi)*atan(x/y)))' - symbol_names = 'Q' - symbol_values = 'Q_signal' - [] - [gap_y] - type = ParsedFunction - expression = 'if(y>0,Q,-Q)*y/(x^2+y^2)*abs(cos(pi/(20/180*pi)*atan(x/y)))' - symbol_names = 'Q' - symbol_values = 'Q_signal' - [] -[] - -[UserObjects] - [rc] - type = RhieChowMassFlux - u = vel_x - v = vel_y - pressure = pressure - rho = ${rho} - p_diffusion_kernel = p_diffusion - [] -[] - -[Variables] - [vel_x] - type = MooseLinearVariableFVReal - solver_sys = u_system - initial_from_file_var = vel_x - initial_from_file_timestep = LATEST - [] - [vel_y] - type = MooseLinearVariableFVReal - solver_sys = v_system - initial_from_file_var = vel_y - initial_from_file_timestep = LATEST - [] - [pressure] - type = MooseLinearVariableFVReal - # initial_condition = 0 - solver_sys = pressure_system - initial_from_file_var = pressure - initial_from_file_timestep = LATEST - [] -[] - -[LinearFVKernels] - [u_time] - type = LinearFVTimeDerivative - variable = vel_x - factor = ${rho} - [] - [u_advection_stress] - type = LinearWCNSFVMomentumFlux - variable = vel_x - advected_interp_method = ${advected_interp_method} - mu = ${mu} - u = vel_x - v = vel_y - momentum_component = 'x' - rhie_chow_user_object = 'rc' - use_nonorthogonal_correction = true - [] - [u_pressure] - type = LinearFVMomentumPressure - variable = vel_x - pressure = pressure - momentum_component = 'x' - [] - - [v_time] - type = LinearFVTimeDerivative - variable = vel_y - factor = ${rho} - [] - [v_advection_stress] - type = LinearWCNSFVMomentumFlux - variable = vel_y - advected_interp_method = ${advected_interp_method} - mu = ${mu} - u = vel_x - v = vel_y - momentum_component = 'y' - rhie_chow_user_object = 'rc' - use_nonorthogonal_correction = true - [] - [v_pressure] - type = LinearFVMomentumPressure - variable = vel_y - pressure = pressure - momentum_component = 'y' - [] - - [p_diffusion] - type = LinearFVAnisotropicDiffusion - variable = pressure - diffusion_tensor = Ainv - use_nonorthogonal_correction = true - [] - [HbyA_divergence] - type = LinearFVDivergence - variable = pressure - face_flux = HbyA - force_boundary_execution = true - [] -[] - -[LinearFVBCs] - [inlet_x] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_x - boundary = 'left_boundary' - functor = 'inlet_function' - [] - [inlet_y] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_y - boundary = 'left_boundary' - functor = 0 - [] - [circle_x] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_x - boundary = 'circle' - functor = 0 - [] - [circle_y] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_y - boundary = 'circle' - functor = 0 - [] - [gap_x] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_x - boundary = 'top_gap bottom_gap' - functor = 'gap_x' - [] - [gap_y] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_y - boundary = 'top_gap bottom_gap' - functor = 'gap_y' - [] - [walls_x] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_x - boundary = 'top_boundary bottom_boundary' - functor = 0 - [] - [walls_y] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_y - boundary = 'top_boundary bottom_boundary' - functor = 0 - [] - [outlet_p] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - boundary = 'right_boundary' - variable = pressure - functor = 0 - [] - [outlet_u] - type = LinearFVAdvectionDiffusionOutflowBC - variable = vel_x - use_two_term_expansion = false - boundary = 'right_boundary' - [] - [outlet_v] - type = LinearFVAdvectionDiffusionOutflowBC - variable = vel_y - use_two_term_expansion = false - boundary = 'right_boundary' - [] -[] - -[Postprocessors] - [drag_force] - type = IntegralDirectedSurfaceForce - vel_x = vel_x - vel_y = vel_y - mu = ${mu} - pressure = pressure - principal_direction = '1 0 0' - boundary = 'circle' - outputs = none - execute_on = 'INITIAL TIMESTEP_END' - [] - [drag_coeff] - type = ParsedPostprocessor - expression = '2*drag_force/rho/(avgvel*avgvel)/D' - constant_names = 'rho avgvel D' - constant_expressions = '${rho} ${fparse 2/3*inlet_velocity} ${fparse 2*circle_radius}' - pp_names = 'drag_force' - execute_on = 'INITIAL TIMESTEP_END' - [] - [lift_force] - type = IntegralDirectedSurfaceForce - vel_x = vel_x - vel_y = vel_y - mu = ${mu} - pressure = pressure - principal_direction = '0 1 0' - boundary = 'circle' - outputs = none - execute_on = 'INITIAL TIMESTEP_END' - [] - [lift_coeff] - type = ParsedPostprocessor - expression = '2*lift_force/rho/(avgvel*avgvel)/D' - constant_names = 'rho avgvel D' - constant_expressions = '${rho} ${fparse 2/3*inlet_velocity} ${fparse 2*circle_radius}' - pp_names = 'lift_force' - execute_on = 'INITIAL TIMESTEP_END' - [] - [reward] - type = LiftDragRewardPostprocessor - lift = lift_coeff - drag = drag_coeff - averaging_window = 50 - coeff_1 = 0.0 - coeff_2 = 0.2 - execute_on = 'INITIAL TIMESTEP_END' - [] - # [p1] - # type = PointValue - # variable = pressure - # point = '0 0.07 0.0' - # execute_on = 'INITIAL TIMESTEP_END' - # [] - # [p2] - # type = PointValue - # variable = pressure - # point = '0 -0.07 0.0' - # execute_on = 'INITIAL TIMESTEP_END' - # [] - # [p3] - # type = PointValue - # variable = pressure - # point = '0.075 0.1 0.0' - # execute_on = 'INITIAL TIMESTEP_END' - # [] - # [p4] - # type = PointValue - # variable = pressure - # point = '0.075 0.0 0.0' - # execute_on = 'INITIAL TIMESTEP_END' - # [] - # [p5] - # type = PointValue - # variable = pressure - # point = '0.075 -0.1 0.0' - # execute_on = 'INITIAL TIMESTEP_END' - # [] - [p1x] - type = PointValue - variable = vel_x - point = '0 0.07 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p2x] - type = PointValue - variable = vel_x - point = '0 -0.07 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p3x] - type = PointValue - variable = vel_x - point = '0.075 0.1 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p4x] - type = PointValue - variable = vel_x - point = '0.075 0.0 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p5x] - type = PointValue - variable = vel_x - point = '0.075 -0.1 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p1y] - type = PointValue - variable = vel_y - point = '0 0.07 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p2y] - type = PointValue - variable = vel_y - point = '0 -0.07 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p3y] - type = PointValue - variable = vel_y - point = '0.075 0.1 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p4y] - type = PointValue - variable = vel_y - point = '0.075 0.0 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p5y] - type = PointValue - variable = vel_y - point = '0.075 -0.1 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [Q_signal] - type = ConstantPostprocessor - value = 0.0 - execute_on = TIMESTEP_BEGIN - [] - [Q] - type = LibtorchControlValuePostprocessor - control_name = src_control - [] - [log_prob_Q] - type = LibtorchDRLLogProbabilityPostprocessor - control_name = src_control - [] -[] - -[Reporters] - [results] - type = AccumulateReporter - reporters = 'p1x/value p2x/value p3x/value p4x/value p5x/value p1y/value p2y/value p3y/value p4y/value p5y/value reward/value Q/value log_prob_Q/value' - [] -[] - -[Controls] - [src_control] - type = LibtorchDRLControl - parameters = "Postprocessors/Q_signal/value" - responses = 'p1x p2x p3x p4x p5x p1y p2y p3y p4y p5y' - - # keep consistent with LibtorchDRLControlTrainer - input_timesteps = 1 - response_shift_factors = '1.98 1.825 2.015 0.03 1.9 0.58 -0.425 0.06 0.12 -0.02' - response_scaling_factors = '1.47 1.03 2.60 3.45 2.0 1.19 1.6 2.7 1.47 2.08' - action_scaling_factors = 1.0 - - filename = "control.net_best" - - num_neurons_per_layer = '512 512' - activation_function = 'tanh tanh' - - min_control_value = ${fparse -0.108} - max_control_value = ${fparse 0.108} - - execute_on = 'TIMESTEP_BEGIN' - smoother = 0.1 - num_stems_in_period = 50 - - stochastic = false - [] -[] - -[Executioner] - type = PIMPLE - momentum_l_abs_tol = 1e-7 - pressure_l_abs_tol = 1e-7 - momentum_l_tol = 1e-7 - pressure_l_tol = 1e-7 - rhie_chow_user_object = 'rc' - momentum_systems = 'u_system v_system' - pressure_system = 'pressure_system' - momentum_equation_relaxation = 0.9 - pressure_variable_relaxation = 0.6 - num_iterations = 100 - pressure_absolute_tolerance = 5e-6 - momentum_absolute_tolerance = 5e-6 - momentum_petsc_options_iname = '-pc_type -pc_hypre_type' - momentum_petsc_options_value = 'hypre boomeramg' - pressure_petsc_options_iname = '-pc_type -pc_hypre_type' - pressure_petsc_options_value = 'hypre boomeramg' - print_fields = false - continue_on_max_its = true - dt = 0.0005 - num_steps = 4000 -[] - -[Outputs] - exodus = true - [json] - type = JSON - execute_on = final - [] - # console = false - # execute_on = FINAL -[] diff --git a/modules/combined/examples/stochastic/vortex_control/full-run/header.i b/modules/combined/examples/stochastic/vortex_control/full-run/header.i deleted file mode 100644 index 6e5236531616..000000000000 --- a/modules/combined/examples/stochastic/vortex_control/full-run/header.i +++ /dev/null @@ -1,39 +0,0 @@ -# ----------------------------------------------------------------------------- -# Flow around a cylinder (2D) benchmark validation case -# This example showcases a flow around a cylinder which results in vortex -# shedding. The problem specification has been taken from the following paper: -# -# @incollection{schafer1996benchmark, -# title={Benchmark computations of laminar flow around a cylinder}, -# author={Sch{\"a}fer, Michael and Turek, Stefan and Durst, Franz and Krause, Egon and Rannacher, Rolf}, -# booktitle={Flow simulation with high-performance computers II}, -# pages={547--566}, -# year={1996}, -# publisher={Springer} -# } -# The Reyndols number is Re=100. -# The expected Strouhal number (St) is in the [0.2950, 0.3050] range, with -# refinement=8, we expect to get St=0.2941 with the model below. -# Run it using the following command: -# ./navier_stokes-opt -i header.i mesh.i flow_over_circle.i executioner_postprocessor.i -# ----------------------------------------------------------------------------- - -# Geometry parameters -circle_radius = 0.05 -pitch = 0.2 -x_min = -0.2 -x_max = 1.5 -y_min = -0.2 -y_max = 0.21 -rundoff = 1e-4 -refinement = 8 - -# Material properties -mu = 1e-3 -rho = 1 - -# Boundary conditions -inlet_velocity = 1.5 - -# Numerical schemes -advected_interp_method = 'average' diff --git a/modules/combined/examples/stochastic/vortex_control/full-run/mesh.i b/modules/combined/examples/stochastic/vortex_control/full-run/mesh.i deleted file mode 100644 index 2d962b51c9d2..000000000000 --- a/modules/combined/examples/stochastic/vortex_control/full-run/mesh.i +++ /dev/null @@ -1,240 +0,0 @@ -[Mesh] - # ------------------------------------------ - # Middle layer - # ------------------------------------------ - [ccmg] - type = ConcentricCircleMeshGenerator - num_sectors = '${fparse refinement*2}' - radii = '${circle_radius} ${fparse 1.2*circle_radius}' - rings = '4 ${refinement} ${refinement}' - has_outer_square = on - pitch = ${pitch} - preserve_volumes = off - smoothing_max_it = 2 - [] - [in_between] - type = SideSetsBetweenSubdomainsGenerator - input = ccmg - primary_block = 2 - paired_block = 1 - new_boundary = 'circle' - [] - [delete] - type = BlockDeletionGenerator - input = in_between - block = '1' - [] - [final_ccmg] - type = RenameBlockGenerator - input = delete - old_block = '2 3' - new_block = '0 0' - [] - [left] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${x_min}' - xmax = '${fparse -pitch/2}' - ymin = '${fparse -pitch/2}' - ymax = '${fparse pitch/2}' - nx = '${fparse refinement*2}' - ny = '${fparse refinement*4+2}' - [] - [right] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${fparse pitch/2}' - xmax = '${x_max}' - ymin = '${fparse -pitch/2}' - ymax = '${fparse pitch/2}' - nx = '${fparse refinement*30}' - ny = '${fparse refinement*4+2}' - [] - [combined_middle] - type = StitchedMeshGenerator - inputs = 'final_ccmg left right' - stitch_boundaries_pairs = 'left right; right left' - clear_stitched_boundary_ids = false - prevent_boundary_ids_overlap = true - merge_boundaries_with_same_name = true - [] - - [middle_top_sideset] - input = combined_middle - type = ParsedGenerateSideset - combinatorial_geometry = 'y > ${fparse pitch/2-rundoff}' - normal = '0 1 0' - new_sideset_name = 'middle_top' - [] - [middle_bottom_sideset] - input = middle_top_sideset - type = ParsedGenerateSideset - combinatorial_geometry = 'y < ${fparse -pitch/2+rundoff}' - normal = '0 -1 0' - new_sideset_name = 'middle_bottom' - [] - # ------------------------------------------ - # Top layer - # ------------------------------------------ - [top_left_block] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${x_min}' - xmax = '${fparse -pitch/2}' - ymin = '${fparse pitch/2}' - ymax = '${y_max}' - nx = '${fparse refinement*2}' - ny = '${fparse refinement*2+1}' - [] - [top_middle_block] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${fparse -pitch/2}' - xmax = '${fparse pitch/2}' - ymin = '${fparse pitch/2}' - ymax = '${y_max}' - nx = '${fparse refinement*4+2}' - ny = '${fparse refinement*2+1}' - [] - [top_right_block] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${fparse pitch/2}' - xmax = '${x_max}' - ymin = '${fparse pitch/2}' - ymax = '${y_max}' - nx = '${fparse refinement*30}' - ny = '${fparse refinement*2+1}' - [] - [combined_top] - type = StitchedMeshGenerator - inputs = 'top_middle_block top_left_block top_right_block' - stitch_boundaries_pairs = 'left right; right left' - prevent_boundary_ids_overlap = true - merge_boundaries_with_same_name = true - [] - [top_bottom_sideset] - input = combined_top - type = ParsedGenerateSideset - combinatorial_geometry = 'y < ${fparse pitch/2+rundoff}' - normal = '0 -1 0' - new_sideset_name = 'top_bottom' - [] - [combined_middle_top] - type = StitchedMeshGenerator - inputs = 'top_bottom_sideset middle_bottom_sideset' - stitch_boundaries_pairs = 'top_bottom middle_top' - clear_stitched_boundary_ids = false - prevent_boundary_ids_overlap = true - merge_boundaries_with_same_name = true - [] - [create_fused_top_sideset] - input = combined_middle_top - type = ParsedGenerateSideset - combinatorial_geometry = 'y > ${fparse y_max-rundoff}' - normal = '0 1 0' - new_sideset_name = 'top_boundary' - [] - # ------------------------------------------ - # Bottom layer - # ------------------------------------------ - [bottom_left_block] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${x_min}' - xmax = '${fparse -pitch/2}' - ymin = '${y_min}' - ymax = '${fparse -pitch/2}' - nx = '${fparse refinement*2}' - ny = '${fparse refinement*2}' - [] - [bottom_middle_block] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${fparse -pitch/2}' - xmax = '${fparse pitch/2}' - ymin = '${y_min}' - ymax = '${fparse -pitch/2}' - nx = '${fparse refinement*4+2}' - ny = '${fparse refinement*2}' - [] - [bottom_right_block] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${fparse pitch/2}' - xmax = '${x_max}' - ymin = '${y_min}' - ymax = '${fparse -pitch/2}' - nx = '${fparse refinement*30}' - ny = '${fparse refinement*2}' - [] - [combined_bottom] - type = StitchedMeshGenerator - inputs = 'bottom_middle_block bottom_left_block bottom_right_block' - stitch_boundaries_pairs = 'left right; right left' - prevent_boundary_ids_overlap = true - merge_boundaries_with_same_name = true - [] - [bottom_top_sideset] - input = combined_bottom - type = ParsedGenerateSideset - combinatorial_geometry = 'y > ${fparse -pitch/2-rundoff}' - normal = '0 1 0' - new_sideset_name = 'bottom_top' - [] - [combined_final] - type = StitchedMeshGenerator - inputs = 'create_fused_top_sideset bottom_top_sideset' - stitch_boundaries_pairs = 'middle_bottom bottom_top' - clear_stitched_boundary_ids = false - prevent_boundary_ids_overlap = true - merge_boundaries_with_same_name = true - [] - [create_fused_bottom_sideset] - input = combined_final - type = ParsedGenerateSideset - combinatorial_geometry = 'y < ${fparse y_min+rundoff}' - normal = '0 -1 0' - new_sideset_name = 'bottom_boundary' - [] - # ------------------------------------------ - # Left and right boundaries - # ------------------------------------------ - [create_fused_left_sideset] - input = create_fused_bottom_sideset - type = ParsedGenerateSideset - combinatorial_geometry = 'x < ${fparse x_min+rundoff}' - normal = '-1 0 0' - new_sideset_name = 'left_boundary' - [] - [create_fused_right_sideset] - input = create_fused_left_sideset - type = ParsedGenerateSideset - combinatorial_geometry = 'x > ${fparse x_max-rundoff}' - normal = '1 0 0' - new_sideset_name = 'right_boundary' - [] - [sideset_removal] - input = create_fused_right_sideset - type = BoundaryDeletionGenerator - boundary_names = 'bottom top left right middle_bottom middle_top bottom_top top_bottom' - [] - [new_boundaries_1] - type = SideSetsFromBoundingBoxGenerator - input = sideset_removal - bottom_left = '-0.008682409 0.049240388 0' - top_right = '0.008682409 0.5 0' - boundary_new = 'top_gap' - included_boundaries = 'circle' - replace = true - [] - [new_boundaries_2] - type = SideSetsFromBoundingBoxGenerator - input = new_boundaries_1 - bottom_left = '-0.008682409 -0.5 0' - top_right = '0.008682409 -0.049240388 0' - boundary_new = 'bottom_gap' - included_boundaries = 'circle' - replace = true - [] -[] diff --git a/modules/combined/examples/stochastic/vortex_control/half-run/flow_over_circle_linearfv_single.i b/modules/combined/examples/stochastic/vortex_control/half-run/flow_over_circle_linearfv_single.i deleted file mode 100644 index 9ce204048882..000000000000 --- a/modules/combined/examples/stochastic/vortex_control/half-run/flow_over_circle_linearfv_single.i +++ /dev/null @@ -1,420 +0,0 @@ -!include header.i - -[Mesh] - [fmg] - type = FileMeshGenerator - file = flow_over_circle_linearfv_out_orig.e - use_for_exodus_restart = true - [] -[] - -[Problem] - linear_sys_names = 'u_system v_system pressure_system' - previous_nl_solution_required = true -[] - -[Functions] - [inlet_function] - type = ParsedFunction - expression = '4*U*(y-ymin)*(ymax-y)/(ymax-ymin)/(ymax-ymin)' - symbol_names = 'U ymax ymin' - symbol_values = '${inlet_velocity} ${y_max} ${y_min}' - [] - [gap_x] - type = ParsedFunction - expression = 'Q*x/(x^2+y^2)*abs(cos(pi/(20/180*pi)*atan(x/y)))' - symbol_names = 'Q' - symbol_values = 'Q_signal' - [] - [gap_y] - type = ParsedFunction - expression = 'if(y>0,Q,-Q)*y/(x^2+y^2)*abs(cos(pi/(20/180*pi)*atan(x/y)))' - symbol_names = 'Q' - symbol_values = 'Q_signal' - [] -[] - -[UserObjects] - [rc] - type = RhieChowMassFlux - u = vel_x - v = vel_y - pressure = pressure - rho = ${rho} - p_diffusion_kernel = p_diffusion - [] -[] - -[Variables] - [vel_x] - type = MooseLinearVariableFVReal - solver_sys = u_system - initial_from_file_var = vel_x - initial_from_file_timestep = LATEST - [] - [vel_y] - type = MooseLinearVariableFVReal - solver_sys = v_system - initial_from_file_var = vel_y - initial_from_file_timestep = LATEST - [] - [pressure] - type = MooseLinearVariableFVReal - # initial_condition = 0 - solver_sys = pressure_system - initial_from_file_var = pressure - initial_from_file_timestep = LATEST - [] -[] - -[LinearFVKernels] - [u_time] - type = LinearFVTimeDerivative - variable = vel_x - factor = ${rho} - [] - [u_advection_stress] - type = LinearWCNSFVMomentumFlux - variable = vel_x - advected_interp_method = ${advected_interp_method} - mu = ${mu} - u = vel_x - v = vel_y - momentum_component = 'x' - rhie_chow_user_object = 'rc' - use_nonorthogonal_correction = true - [] - [u_pressure] - type = LinearFVMomentumPressure - variable = vel_x - pressure = pressure - momentum_component = 'x' - [] - - [v_time] - type = LinearFVTimeDerivative - variable = vel_y - factor = ${rho} - [] - [v_advection_stress] - type = LinearWCNSFVMomentumFlux - variable = vel_y - advected_interp_method = ${advected_interp_method} - mu = ${mu} - u = vel_x - v = vel_y - momentum_component = 'y' - rhie_chow_user_object = 'rc' - use_nonorthogonal_correction = true - [] - [v_pressure] - type = LinearFVMomentumPressure - variable = vel_y - pressure = pressure - momentum_component = 'y' - [] - - [p_diffusion] - type = LinearFVAnisotropicDiffusion - variable = pressure - diffusion_tensor = Ainv - use_nonorthogonal_correction = true - [] - [HbyA_divergence] - type = LinearFVDivergence - variable = pressure - face_flux = HbyA - force_boundary_execution = true - [] -[] - -[LinearFVBCs] - [inlet_x] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_x - boundary = 'left_boundary' - functor = 'inlet_function' - [] - [inlet_y] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_y - boundary = 'left_boundary' - functor = 0 - [] - [circle_x] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_x - boundary = 'circle' - functor = 0 - [] - [circle_y] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_y - boundary = 'circle' - functor = 0 - [] - [gap_x] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_x - boundary = 'top_gap bottom_gap' - functor = 'gap_x' - [] - [gap_y] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_y - boundary = 'top_gap bottom_gap' - functor = 'gap_y' - [] - [walls_x] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_x - boundary = 'top_boundary bottom_boundary' - functor = 0 - [] - [walls_y] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - variable = vel_y - boundary = 'top_boundary bottom_boundary' - functor = 0 - [] - [outlet_p] - type = LinearFVAdvectionDiffusionFunctorDirichletBC - boundary = 'right_boundary' - variable = pressure - functor = 0 - [] - [outlet_u] - type = LinearFVAdvectionDiffusionOutflowBC - variable = vel_x - use_two_term_expansion = false - boundary = 'right_boundary' - [] - [outlet_v] - type = LinearFVAdvectionDiffusionOutflowBC - variable = vel_y - use_two_term_expansion = false - boundary = 'right_boundary' - [] -[] - -[Postprocessors] - [drag_force] - type = IntegralDirectedSurfaceForce - vel_x = vel_x - vel_y = vel_y - mu = ${mu} - pressure = pressure - principal_direction = '1 0 0' - boundary = 'circle' - outputs = none - execute_on = 'INITIAL TIMESTEP_END' - [] - [drag_coeff] - type = ParsedPostprocessor - expression = '2*drag_force/rho/(avgvel*avgvel)/D' - constant_names = 'rho avgvel D' - constant_expressions = '${rho} ${fparse 2/3*inlet_velocity} ${fparse 2*circle_radius}' - pp_names = 'drag_force' - execute_on = 'INITIAL TIMESTEP_END' - [] - [lift_force] - type = IntegralDirectedSurfaceForce - vel_x = vel_x - vel_y = vel_y - mu = ${mu} - pressure = pressure - principal_direction = '0 1 0' - boundary = 'circle' - outputs = none - execute_on = 'INITIAL TIMESTEP_END' - [] - [lift_coeff] - type = ParsedPostprocessor - expression = '2*lift_force/rho/(avgvel*avgvel)/D' - constant_names = 'rho avgvel D' - constant_expressions = '${rho} ${fparse 2/3*inlet_velocity} ${fparse 2*circle_radius}' - pp_names = 'lift_force' - execute_on = 'INITIAL TIMESTEP_END' - [] - [reward] - type = LiftDragRewardPostprocessor - lift = lift_coeff - drag = drag_coeff - averaging_window = 50 - coeff_1 = 0.0 - coeff_2 = 0.2 - execute_on = 'INITIAL TIMESTEP_END' - [] - # [p1] - # type = PointValue - # variable = pressure - # point = '0 0.07 0.0' - # execute_on = 'INITIAL TIMESTEP_END' - # [] - # [p2] - # type = PointValue - # variable = pressure - # point = '0 -0.07 0.0' - # execute_on = 'INITIAL TIMESTEP_END' - # [] - # [p3] - # type = PointValue - # variable = pressure - # point = '0.075 0.1 0.0' - # execute_on = 'INITIAL TIMESTEP_END' - # [] - # [p4] - # type = PointValue - # variable = pressure - # point = '0.075 0.0 0.0' - # execute_on = 'INITIAL TIMESTEP_END' - # [] - # [p5] - # type = PointValue - # variable = pressure - # point = '0.075 -0.1 0.0' - # execute_on = 'INITIAL TIMESTEP_END' - # [] - [p1x] - type = PointValue - variable = vel_x - point = '0 0.07 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p2x] - type = PointValue - variable = vel_x - point = '0 -0.07 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p3x] - type = PointValue - variable = vel_x - point = '0.075 0.1 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p4x] - type = PointValue - variable = vel_x - point = '0.075 0.0 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p5x] - type = PointValue - variable = vel_x - point = '0.075 -0.1 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p1y] - type = PointValue - variable = vel_y - point = '0 0.07 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p2y] - type = PointValue - variable = vel_y - point = '0 -0.07 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p3y] - type = PointValue - variable = vel_y - point = '0.075 0.1 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p4y] - type = PointValue - variable = vel_y - point = '0.075 0.0 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [p5y] - type = PointValue - variable = vel_y - point = '0.075 -0.1 0.0' - execute_on = 'INITIAL TIMESTEP_END' - [] - [Q_signal] - type = ConstantPostprocessor - value = 0.0 - execute_on = TIMESTEP_BEGIN - [] - # [Q] - # type = LibtorchControlValuePostprocessor - # control_name = src_control - # [] - # [log_prob_Q] - # type = LibtorchDRLLogProbabilityPostprocessor - # control_name = src_control - # [] -[] - -# [Reporters] -# [results] -# type = AccumulateReporter -# reporters = 'p1x/value p2x/value p3x/value p4x/value p5x/value p1y/value p2y/value p3y/value p4y/value p5y/value reward/value Q/value log_prob_Q/value' -# [] -# [] - -# [Controls] -# [src_control] -# type = LibtorchDRLControl -# parameters = "Postprocessors/Q_signal/value" -# responses = 'p1x p2x p3x p4x p5x p1y p2y p3y p4y p5y' - -# # keep consistent with LibtorchDRLControlTrainer -# input_timesteps = 1 -# response_shift_factors = '1.98 1.825 2.015 0.03 1.9 0.58 -0.425 0.06 0.12 -0.02' -# response_scaling_factors = '1.47 1.03 2.60 3.45 2.0 1.19 1.6 2.7 1.47 2.08' -# action_scaling_factors = 1.0 - -# filename = "control.net_best" - -# num_neurons_per_layer = '512 512' -# activation_function = 'tanh tanh' - -# min_control_value = ${fparse -0.108} -# max_control_value = ${fparse 0.108} - -# execute_on = 'TIMESTEP_BEGIN' -# smoother = 0.1 -# num_stems_in_period = 50 - -# stochastic = false -# [] -# [] - -[Executioner] - type = PIMPLE - momentum_l_abs_tol = 1e-7 - pressure_l_abs_tol = 1e-7 - momentum_l_tol = 1e-7 - pressure_l_tol = 1e-7 - rhie_chow_user_object = 'rc' - momentum_systems = 'u_system v_system' - pressure_system = 'pressure_system' - momentum_equation_relaxation = 0.9 - pressure_variable_relaxation = 0.6 - num_iterations = 100 - pressure_absolute_tolerance = 5e-6 - momentum_absolute_tolerance = 5e-6 - momentum_petsc_options_iname = '-pc_type -pc_hypre_type' - momentum_petsc_options_value = 'hypre boomeramg' - pressure_petsc_options_iname = '-pc_type -pc_hypre_type' - pressure_petsc_options_value = 'hypre boomeramg' - print_fields = false - continue_on_max_its = true - dt = 0.0005 - num_steps = 4000 -[] - -[Outputs] - exodus = true - [json] - type = JSON - execute_on = final - [] - # console = false - # execute_on = FINAL -[] diff --git a/modules/combined/examples/stochastic/vortex_control/half-run/header.i b/modules/combined/examples/stochastic/vortex_control/half-run/header.i deleted file mode 100644 index 6e5236531616..000000000000 --- a/modules/combined/examples/stochastic/vortex_control/half-run/header.i +++ /dev/null @@ -1,39 +0,0 @@ -# ----------------------------------------------------------------------------- -# Flow around a cylinder (2D) benchmark validation case -# This example showcases a flow around a cylinder which results in vortex -# shedding. The problem specification has been taken from the following paper: -# -# @incollection{schafer1996benchmark, -# title={Benchmark computations of laminar flow around a cylinder}, -# author={Sch{\"a}fer, Michael and Turek, Stefan and Durst, Franz and Krause, Egon and Rannacher, Rolf}, -# booktitle={Flow simulation with high-performance computers II}, -# pages={547--566}, -# year={1996}, -# publisher={Springer} -# } -# The Reyndols number is Re=100. -# The expected Strouhal number (St) is in the [0.2950, 0.3050] range, with -# refinement=8, we expect to get St=0.2941 with the model below. -# Run it using the following command: -# ./navier_stokes-opt -i header.i mesh.i flow_over_circle.i executioner_postprocessor.i -# ----------------------------------------------------------------------------- - -# Geometry parameters -circle_radius = 0.05 -pitch = 0.2 -x_min = -0.2 -x_max = 1.5 -y_min = -0.2 -y_max = 0.21 -rundoff = 1e-4 -refinement = 8 - -# Material properties -mu = 1e-3 -rho = 1 - -# Boundary conditions -inlet_velocity = 1.5 - -# Numerical schemes -advected_interp_method = 'average' diff --git a/modules/combined/examples/stochastic/vortex_control/half-run/mesh.i b/modules/combined/examples/stochastic/vortex_control/half-run/mesh.i deleted file mode 100644 index 2d962b51c9d2..000000000000 --- a/modules/combined/examples/stochastic/vortex_control/half-run/mesh.i +++ /dev/null @@ -1,240 +0,0 @@ -[Mesh] - # ------------------------------------------ - # Middle layer - # ------------------------------------------ - [ccmg] - type = ConcentricCircleMeshGenerator - num_sectors = '${fparse refinement*2}' - radii = '${circle_radius} ${fparse 1.2*circle_radius}' - rings = '4 ${refinement} ${refinement}' - has_outer_square = on - pitch = ${pitch} - preserve_volumes = off - smoothing_max_it = 2 - [] - [in_between] - type = SideSetsBetweenSubdomainsGenerator - input = ccmg - primary_block = 2 - paired_block = 1 - new_boundary = 'circle' - [] - [delete] - type = BlockDeletionGenerator - input = in_between - block = '1' - [] - [final_ccmg] - type = RenameBlockGenerator - input = delete - old_block = '2 3' - new_block = '0 0' - [] - [left] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${x_min}' - xmax = '${fparse -pitch/2}' - ymin = '${fparse -pitch/2}' - ymax = '${fparse pitch/2}' - nx = '${fparse refinement*2}' - ny = '${fparse refinement*4+2}' - [] - [right] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${fparse pitch/2}' - xmax = '${x_max}' - ymin = '${fparse -pitch/2}' - ymax = '${fparse pitch/2}' - nx = '${fparse refinement*30}' - ny = '${fparse refinement*4+2}' - [] - [combined_middle] - type = StitchedMeshGenerator - inputs = 'final_ccmg left right' - stitch_boundaries_pairs = 'left right; right left' - clear_stitched_boundary_ids = false - prevent_boundary_ids_overlap = true - merge_boundaries_with_same_name = true - [] - - [middle_top_sideset] - input = combined_middle - type = ParsedGenerateSideset - combinatorial_geometry = 'y > ${fparse pitch/2-rundoff}' - normal = '0 1 0' - new_sideset_name = 'middle_top' - [] - [middle_bottom_sideset] - input = middle_top_sideset - type = ParsedGenerateSideset - combinatorial_geometry = 'y < ${fparse -pitch/2+rundoff}' - normal = '0 -1 0' - new_sideset_name = 'middle_bottom' - [] - # ------------------------------------------ - # Top layer - # ------------------------------------------ - [top_left_block] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${x_min}' - xmax = '${fparse -pitch/2}' - ymin = '${fparse pitch/2}' - ymax = '${y_max}' - nx = '${fparse refinement*2}' - ny = '${fparse refinement*2+1}' - [] - [top_middle_block] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${fparse -pitch/2}' - xmax = '${fparse pitch/2}' - ymin = '${fparse pitch/2}' - ymax = '${y_max}' - nx = '${fparse refinement*4+2}' - ny = '${fparse refinement*2+1}' - [] - [top_right_block] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${fparse pitch/2}' - xmax = '${x_max}' - ymin = '${fparse pitch/2}' - ymax = '${y_max}' - nx = '${fparse refinement*30}' - ny = '${fparse refinement*2+1}' - [] - [combined_top] - type = StitchedMeshGenerator - inputs = 'top_middle_block top_left_block top_right_block' - stitch_boundaries_pairs = 'left right; right left' - prevent_boundary_ids_overlap = true - merge_boundaries_with_same_name = true - [] - [top_bottom_sideset] - input = combined_top - type = ParsedGenerateSideset - combinatorial_geometry = 'y < ${fparse pitch/2+rundoff}' - normal = '0 -1 0' - new_sideset_name = 'top_bottom' - [] - [combined_middle_top] - type = StitchedMeshGenerator - inputs = 'top_bottom_sideset middle_bottom_sideset' - stitch_boundaries_pairs = 'top_bottom middle_top' - clear_stitched_boundary_ids = false - prevent_boundary_ids_overlap = true - merge_boundaries_with_same_name = true - [] - [create_fused_top_sideset] - input = combined_middle_top - type = ParsedGenerateSideset - combinatorial_geometry = 'y > ${fparse y_max-rundoff}' - normal = '0 1 0' - new_sideset_name = 'top_boundary' - [] - # ------------------------------------------ - # Bottom layer - # ------------------------------------------ - [bottom_left_block] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${x_min}' - xmax = '${fparse -pitch/2}' - ymin = '${y_min}' - ymax = '${fparse -pitch/2}' - nx = '${fparse refinement*2}' - ny = '${fparse refinement*2}' - [] - [bottom_middle_block] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${fparse -pitch/2}' - xmax = '${fparse pitch/2}' - ymin = '${y_min}' - ymax = '${fparse -pitch/2}' - nx = '${fparse refinement*4+2}' - ny = '${fparse refinement*2}' - [] - [bottom_right_block] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${fparse pitch/2}' - xmax = '${x_max}' - ymin = '${y_min}' - ymax = '${fparse -pitch/2}' - nx = '${fparse refinement*30}' - ny = '${fparse refinement*2}' - [] - [combined_bottom] - type = StitchedMeshGenerator - inputs = 'bottom_middle_block bottom_left_block bottom_right_block' - stitch_boundaries_pairs = 'left right; right left' - prevent_boundary_ids_overlap = true - merge_boundaries_with_same_name = true - [] - [bottom_top_sideset] - input = combined_bottom - type = ParsedGenerateSideset - combinatorial_geometry = 'y > ${fparse -pitch/2-rundoff}' - normal = '0 1 0' - new_sideset_name = 'bottom_top' - [] - [combined_final] - type = StitchedMeshGenerator - inputs = 'create_fused_top_sideset bottom_top_sideset' - stitch_boundaries_pairs = 'middle_bottom bottom_top' - clear_stitched_boundary_ids = false - prevent_boundary_ids_overlap = true - merge_boundaries_with_same_name = true - [] - [create_fused_bottom_sideset] - input = combined_final - type = ParsedGenerateSideset - combinatorial_geometry = 'y < ${fparse y_min+rundoff}' - normal = '0 -1 0' - new_sideset_name = 'bottom_boundary' - [] - # ------------------------------------------ - # Left and right boundaries - # ------------------------------------------ - [create_fused_left_sideset] - input = create_fused_bottom_sideset - type = ParsedGenerateSideset - combinatorial_geometry = 'x < ${fparse x_min+rundoff}' - normal = '-1 0 0' - new_sideset_name = 'left_boundary' - [] - [create_fused_right_sideset] - input = create_fused_left_sideset - type = ParsedGenerateSideset - combinatorial_geometry = 'x > ${fparse x_max-rundoff}' - normal = '1 0 0' - new_sideset_name = 'right_boundary' - [] - [sideset_removal] - input = create_fused_right_sideset - type = BoundaryDeletionGenerator - boundary_names = 'bottom top left right middle_bottom middle_top bottom_top top_bottom' - [] - [new_boundaries_1] - type = SideSetsFromBoundingBoxGenerator - input = sideset_removal - bottom_left = '-0.008682409 0.049240388 0' - top_right = '0.008682409 0.5 0' - boundary_new = 'top_gap' - included_boundaries = 'circle' - replace = true - [] - [new_boundaries_2] - type = SideSetsFromBoundingBoxGenerator - input = new_boundaries_1 - bottom_left = '-0.008682409 -0.5 0' - top_right = '0.008682409 -0.049240388 0' - boundary_new = 'bottom_gap' - included_boundaries = 'circle' - replace = true - [] -[] diff --git a/modules/combined/examples/stochastic/vortex_control/header.i b/modules/combined/examples/stochastic/vortex_control/header.i deleted file mode 100644 index 6e5236531616..000000000000 --- a/modules/combined/examples/stochastic/vortex_control/header.i +++ /dev/null @@ -1,39 +0,0 @@ -# ----------------------------------------------------------------------------- -# Flow around a cylinder (2D) benchmark validation case -# This example showcases a flow around a cylinder which results in vortex -# shedding. The problem specification has been taken from the following paper: -# -# @incollection{schafer1996benchmark, -# title={Benchmark computations of laminar flow around a cylinder}, -# author={Sch{\"a}fer, Michael and Turek, Stefan and Durst, Franz and Krause, Egon and Rannacher, Rolf}, -# booktitle={Flow simulation with high-performance computers II}, -# pages={547--566}, -# year={1996}, -# publisher={Springer} -# } -# The Reyndols number is Re=100. -# The expected Strouhal number (St) is in the [0.2950, 0.3050] range, with -# refinement=8, we expect to get St=0.2941 with the model below. -# Run it using the following command: -# ./navier_stokes-opt -i header.i mesh.i flow_over_circle.i executioner_postprocessor.i -# ----------------------------------------------------------------------------- - -# Geometry parameters -circle_radius = 0.05 -pitch = 0.2 -x_min = -0.2 -x_max = 1.5 -y_min = -0.2 -y_max = 0.21 -rundoff = 1e-4 -refinement = 8 - -# Material properties -mu = 1e-3 -rho = 1 - -# Boundary conditions -inlet_velocity = 1.5 - -# Numerical schemes -advected_interp_method = 'average' diff --git a/modules/combined/examples/stochastic/vortex_control/mesh.i b/modules/combined/examples/stochastic/vortex_control/mesh.i deleted file mode 100644 index 2d962b51c9d2..000000000000 --- a/modules/combined/examples/stochastic/vortex_control/mesh.i +++ /dev/null @@ -1,240 +0,0 @@ -[Mesh] - # ------------------------------------------ - # Middle layer - # ------------------------------------------ - [ccmg] - type = ConcentricCircleMeshGenerator - num_sectors = '${fparse refinement*2}' - radii = '${circle_radius} ${fparse 1.2*circle_radius}' - rings = '4 ${refinement} ${refinement}' - has_outer_square = on - pitch = ${pitch} - preserve_volumes = off - smoothing_max_it = 2 - [] - [in_between] - type = SideSetsBetweenSubdomainsGenerator - input = ccmg - primary_block = 2 - paired_block = 1 - new_boundary = 'circle' - [] - [delete] - type = BlockDeletionGenerator - input = in_between - block = '1' - [] - [final_ccmg] - type = RenameBlockGenerator - input = delete - old_block = '2 3' - new_block = '0 0' - [] - [left] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${x_min}' - xmax = '${fparse -pitch/2}' - ymin = '${fparse -pitch/2}' - ymax = '${fparse pitch/2}' - nx = '${fparse refinement*2}' - ny = '${fparse refinement*4+2}' - [] - [right] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${fparse pitch/2}' - xmax = '${x_max}' - ymin = '${fparse -pitch/2}' - ymax = '${fparse pitch/2}' - nx = '${fparse refinement*30}' - ny = '${fparse refinement*4+2}' - [] - [combined_middle] - type = StitchedMeshGenerator - inputs = 'final_ccmg left right' - stitch_boundaries_pairs = 'left right; right left' - clear_stitched_boundary_ids = false - prevent_boundary_ids_overlap = true - merge_boundaries_with_same_name = true - [] - - [middle_top_sideset] - input = combined_middle - type = ParsedGenerateSideset - combinatorial_geometry = 'y > ${fparse pitch/2-rundoff}' - normal = '0 1 0' - new_sideset_name = 'middle_top' - [] - [middle_bottom_sideset] - input = middle_top_sideset - type = ParsedGenerateSideset - combinatorial_geometry = 'y < ${fparse -pitch/2+rundoff}' - normal = '0 -1 0' - new_sideset_name = 'middle_bottom' - [] - # ------------------------------------------ - # Top layer - # ------------------------------------------ - [top_left_block] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${x_min}' - xmax = '${fparse -pitch/2}' - ymin = '${fparse pitch/2}' - ymax = '${y_max}' - nx = '${fparse refinement*2}' - ny = '${fparse refinement*2+1}' - [] - [top_middle_block] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${fparse -pitch/2}' - xmax = '${fparse pitch/2}' - ymin = '${fparse pitch/2}' - ymax = '${y_max}' - nx = '${fparse refinement*4+2}' - ny = '${fparse refinement*2+1}' - [] - [top_right_block] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${fparse pitch/2}' - xmax = '${x_max}' - ymin = '${fparse pitch/2}' - ymax = '${y_max}' - nx = '${fparse refinement*30}' - ny = '${fparse refinement*2+1}' - [] - [combined_top] - type = StitchedMeshGenerator - inputs = 'top_middle_block top_left_block top_right_block' - stitch_boundaries_pairs = 'left right; right left' - prevent_boundary_ids_overlap = true - merge_boundaries_with_same_name = true - [] - [top_bottom_sideset] - input = combined_top - type = ParsedGenerateSideset - combinatorial_geometry = 'y < ${fparse pitch/2+rundoff}' - normal = '0 -1 0' - new_sideset_name = 'top_bottom' - [] - [combined_middle_top] - type = StitchedMeshGenerator - inputs = 'top_bottom_sideset middle_bottom_sideset' - stitch_boundaries_pairs = 'top_bottom middle_top' - clear_stitched_boundary_ids = false - prevent_boundary_ids_overlap = true - merge_boundaries_with_same_name = true - [] - [create_fused_top_sideset] - input = combined_middle_top - type = ParsedGenerateSideset - combinatorial_geometry = 'y > ${fparse y_max-rundoff}' - normal = '0 1 0' - new_sideset_name = 'top_boundary' - [] - # ------------------------------------------ - # Bottom layer - # ------------------------------------------ - [bottom_left_block] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${x_min}' - xmax = '${fparse -pitch/2}' - ymin = '${y_min}' - ymax = '${fparse -pitch/2}' - nx = '${fparse refinement*2}' - ny = '${fparse refinement*2}' - [] - [bottom_middle_block] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${fparse -pitch/2}' - xmax = '${fparse pitch/2}' - ymin = '${y_min}' - ymax = '${fparse -pitch/2}' - nx = '${fparse refinement*4+2}' - ny = '${fparse refinement*2}' - [] - [bottom_right_block] - type = GeneratedMeshGenerator - dim = 2 - xmin = '${fparse pitch/2}' - xmax = '${x_max}' - ymin = '${y_min}' - ymax = '${fparse -pitch/2}' - nx = '${fparse refinement*30}' - ny = '${fparse refinement*2}' - [] - [combined_bottom] - type = StitchedMeshGenerator - inputs = 'bottom_middle_block bottom_left_block bottom_right_block' - stitch_boundaries_pairs = 'left right; right left' - prevent_boundary_ids_overlap = true - merge_boundaries_with_same_name = true - [] - [bottom_top_sideset] - input = combined_bottom - type = ParsedGenerateSideset - combinatorial_geometry = 'y > ${fparse -pitch/2-rundoff}' - normal = '0 1 0' - new_sideset_name = 'bottom_top' - [] - [combined_final] - type = StitchedMeshGenerator - inputs = 'create_fused_top_sideset bottom_top_sideset' - stitch_boundaries_pairs = 'middle_bottom bottom_top' - clear_stitched_boundary_ids = false - prevent_boundary_ids_overlap = true - merge_boundaries_with_same_name = true - [] - [create_fused_bottom_sideset] - input = combined_final - type = ParsedGenerateSideset - combinatorial_geometry = 'y < ${fparse y_min+rundoff}' - normal = '0 -1 0' - new_sideset_name = 'bottom_boundary' - [] - # ------------------------------------------ - # Left and right boundaries - # ------------------------------------------ - [create_fused_left_sideset] - input = create_fused_bottom_sideset - type = ParsedGenerateSideset - combinatorial_geometry = 'x < ${fparse x_min+rundoff}' - normal = '-1 0 0' - new_sideset_name = 'left_boundary' - [] - [create_fused_right_sideset] - input = create_fused_left_sideset - type = ParsedGenerateSideset - combinatorial_geometry = 'x > ${fparse x_max-rundoff}' - normal = '1 0 0' - new_sideset_name = 'right_boundary' - [] - [sideset_removal] - input = create_fused_right_sideset - type = BoundaryDeletionGenerator - boundary_names = 'bottom top left right middle_bottom middle_top bottom_top top_bottom' - [] - [new_boundaries_1] - type = SideSetsFromBoundingBoxGenerator - input = sideset_removal - bottom_left = '-0.008682409 0.049240388 0' - top_right = '0.008682409 0.5 0' - boundary_new = 'top_gap' - included_boundaries = 'circle' - replace = true - [] - [new_boundaries_2] - type = SideSetsFromBoundingBoxGenerator - input = new_boundaries_1 - bottom_left = '-0.008682409 -0.5 0' - top_right = '0.008682409 -0.049240388 0' - boundary_new = 'bottom_gap' - included_boundaries = 'circle' - replace = true - [] -[] diff --git a/modules/combined/examples/stochastic/vortex_control/plot_reward.py b/modules/combined/examples/stochastic/vortex_control/plot_reward.py deleted file mode 100644 index 3204d1a3cc76..000000000000 --- a/modules/combined/examples/stochastic/vortex_control/plot_reward.py +++ /dev/null @@ -1,60 +0,0 @@ -import json -import matplotlib.pyplot as plt -import numpy as np - -plt.rc('text', usetex=True) -plt.rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']}) - -# Load data from JSON file -with open('train_out.json') as f: - data = json.load(f) - -# Extract data from JSON and select every other data point starting from the first -time_steps = data['time_steps'][1::2] # Exclude the first entry and then take every other entry -average_rewards = [step['reward']['average_reward'] for step in time_steps] -std_rewards = [step['reward']['std_reward'] for step in time_steps] -sample_average_rewards = [step['reward']['sample_average_reward'] for step in time_steps] - -# Create a plot -fig, ax = plt.subplots() - -# Set LaTeX font - - -# Plot average reward data -indices = range(1, len(time_steps) + 1) # Start numbering from 1 -ax.plot(indices, average_rewards, label=r'$\mathrm{Average~Reward}$', color='darkblue', linewidth=2) - -# Compute confidence intervals -lower_bound_1std = [avg - std for avg, std in zip(average_rewards, std_rewards)] -upper_bound_1std = [avg + std for avg, std in zip(average_rewards, std_rewards)] -lower_bound_2std = [avg - 2 * std for avg, std in zip(average_rewards, std_rewards)] -upper_bound_2std = [avg + 2 * std for avg, std in zip(average_rewards, std_rewards)] - -# Fill between for confidence intervals -# ax.fill_between(indices, lower_bound_2std, upper_bound_2std, color='lightblue', alpha=0.75, label=r'$\pm 2\sigma$') -ax.fill_between(indices, lower_bound_1std, upper_bound_1std, color='lightblue', alpha=1.0, label=r'$\pm \sigma$') - -# Plot sample average reward points -for i, sample_rewards in enumerate(sample_average_rewards, start=1): - ax.scatter([i] * len(sample_rewards), sample_rewards, color='black', s=5, alpha=0.7, label=r'$\mathrm{Average~sample~rewards}$' if i == 1 else "") - -# Set custom axis ranges (adjust as needed) -ax.set_xlim([1, len(time_steps)]) # Example range for x-axis -ax.set_ylim([min(lower_bound_1std) - 1, max(upper_bound_1std) + 1]) # Example range for y-axis - -# Ensure x-axis uses only integers and includes the first and last indices -ax.set_xticks(np.arange(1, len(time_steps) + 1, step=1)) - -# Set custom axis titles -ax.set_xlabel(r'$\mathrm{Update~(10~Episodes)}$', fontsize=14) -ax.set_ylabel(r'$\mathrm{Average~Reward}$', fontsize=14) - -# Set custom legend -ax.legend(loc='best') - -# Save plot as PDF -plt.savefig('average_reward_plot_with_samples_and_confidence_intervals.pdf', format='pdf') - -# Show plot -plt.show() diff --git a/modules/combined/examples/stochastic/vortex_control/trainer.i b/modules/combined/examples/stochastic/vortex_control/trainer.i deleted file mode 100644 index e36e16db65ca..000000000000 --- a/modules/combined/examples/stochastic/vortex_control/trainer.i +++ /dev/null @@ -1,117 +0,0 @@ -[StochasticTools] -[] - -[Samplers] - [dummy] - type = CartesianProduct - linear_space_items = '0 0.01 5' - min_procs_per_row = 20 - max_procs_per_row = 20 - [] -[] - -[MultiApps] - [runner] - type = SamplerFullSolveMultiApp - sampler = dummy - input_files = 'flow_over_circle_linearfv.i' - mode = batch-reset - min_procs_per_app = 20 - max_procs_per_app = 20 - [] -[] - -[Transfers] - [nn_transfer] - type = SamplerNeuralNetControlTransfer - to_multi_app = runner - trainer_name = nn_trainer - control_name = src_control - sampler = dummy - [] - [r_transfer] - type = SamplerReporterTransfer - from_multi_app = runner - sampler = dummy - stochastic_reporter = storage - from_reporter = 'results/p1x:value results/p2x:value results/p3x:value results/p4x:value results/p5x:value ' - 'results/p1y:value results/p2y:value results/p3y:value results/p4y:value results/p5y:value ' - 'results/reward:value results/Q:value results/log_prob_Q:value' - [] -[] - -[Trainers] - [nn_trainer] - type = LibtorchDRLControlTrainer - response = 'storage/r_transfer:results:p1x:value storage/r_transfer:results:p2x:value storage/r_transfer:results:p3x:value storage/r_transfer:results:p4x:value storage/r_transfer:results:p5x:value ' - 'storage/r_transfer:results:p1y:value storage/r_transfer:results:p2y:value storage/r_transfer:results:p3y:value storage/r_transfer:results:p4y:value storage/r_transfer:results:p5y:value' - control = 'storage/r_transfer:results:Q:value' - log_probability = 'storage/r_transfer:results:log_prob_Q:value' - reward = 'storage/r_transfer:results:reward:value' - - num_epochs = 25 - update_frequency = 1 - decay_factor = 0.99 - lambda_factor = 0.97 - - loss_print_frequency = 1 - - critic_learning_rate = 0.001 - num_critic_neurons_per_layer = '512 512' - critic_activation_functions = 'tanh tanh' - - control_learning_rate = 0.001 - num_control_neurons_per_layer = '512 512' - control_activation_functions = 'tanh tanh' - - # keep consistent with LibtorchNeuralNetControl - input_timesteps = 1 - - # response_scaling_factors = '13.33 15.38 16.66 38.46 15.38 33.33 40 11.76 4.711 15.38' - # response_shift_factors = '2.055 2.055 1.93 -0.171 1.945 0.449 -0.525 0.029 0.17675 1.945' - - response_shift_factors = '1.98 1.825 2.015 0.03 1.9 0.58 -0.425 0.06 0.12 -0.02' - response_scaling_factors = '1.47 1.03 2.60 3.45 2.0 1.19 1.6 2.7 1.47 2.08' - - # response_scaling_factors = '1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0' - # response_shift_factors = '0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0' - - standardize_advantage = true - - read_from_file = false - - # min_control_value = ${fparse -0.108} - # max_control_value = ${fparse 0.108} - - min_control_value = ${fparse -0.108} - max_control_value = ${fparse 0.108} - - batch_size = 400 - timestep_window = 50 - - entropy_coeff = 0.01 - [] -[] - -[Reporters] - [storage] - type = StochasticReporter - parallel_type = ROOT - outputs = none - [] - [reward] - type = DRLRewardReporter - drl_trainer_name = nn_trainer - [] -[] - -[Executioner] - type = Transient - num_steps = 500 -[] - -[Outputs] - file_base = output/train_out - json = true - execute_on = TIMESTEP_END -[] From c9c637a30fa362c6a4fd1e6affe2eda0534e1b2d Mon Sep 17 00:00:00 2001 From: Peter German Date: Thu, 23 Apr 2026 16:58:12 -0600 Subject: [PATCH 38/51] Make loading nicer, move setup to initial setup. --- .../controls/LibtorchNeuralNetControl.h | 5 +- .../controls/LibtorchNeuralNetControl.C | 16 +- .../utils/LibtorchArtificialNeuralNet.C | 161 ++++++++++++++---- .../libtorch/controls/LibtorchDRLControl.h | 2 +- .../libtorch/controls/LibtorchDRLControl.C | 7 +- .../gold/torch_parameter_read.json | 11 ++ 6 files changed, 152 insertions(+), 50 deletions(-) diff --git a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h index af4b20dae469..686164bc5d7b 100644 --- a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h +++ b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h @@ -31,6 +31,9 @@ class LibtorchNeuralNetControl : public Control /// Construct using input parameters LibtorchNeuralNetControl(const InputParameters & parameters); + /// Load any file-backed controller state after full object construction + virtual void initialSetup() override; + /// Execute neural network to determine the controllable parameter values virtual void execute() override; @@ -51,7 +54,7 @@ class LibtorchNeuralNetControl : public Control */ virtual void loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn); - virtual void loadControlNeuralNetFromFile(const InputParameters & parameters); + virtual void loadControlNeuralNetFromFile(); /// Return a reference to the stored neural network const Moose::LibtorchNeuralNetBase & controlNeuralNet() const; diff --git a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C index 1c21df2164f4..71ca64e364e2 100644 --- a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C +++ b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C @@ -103,15 +103,19 @@ LibtorchNeuralNetControl::LibtorchNeuralNetControl(const InputParameters & param // errors if we don't have the postprocessors requested in the input. for (unsigned int resp_i = 0; resp_i < _response_names.size(); ++resp_i) _response_values.push_back(&getPostprocessorValueByName(_response_names[resp_i])); +} - // If the user wants to read the neural net from file, we do it. We can read it from a - // torchscript file, or we can create a shell and read back the parameters. - if (parameters.isParamSetByUser("filename") && type() == "LibtorchNeuralNetControl") - this->loadControlNeuralNetFromFile(parameters); +void +LibtorchNeuralNetControl::initialSetup() +{ + // File-backed controllers are loaded after full construction so derived controls can override + // the loader without constructor-time type checks. + if (isParamSetByUser("filename")) + loadControlNeuralNetFromFile(); } void -LibtorchNeuralNetControl::loadControlNeuralNetFromFile(const InputParameters & parameters) +LibtorchNeuralNetControl::loadControlNeuralNetFromFile() { const auto & filename = getParam("filename"); if (getParam("torch_script_format")) @@ -123,7 +127,7 @@ LibtorchNeuralNetControl::loadControlNeuralNetFromFile(const InputParameters & p std::vector num_neurons_per_layer = getParam>("num_neurons_per_layer"); std::vector activation_functions = - parameters.isParamSetByUser("activation_function") + isParamSetByUser("activation_function") ? getParam>("activation_function") : std::vector({"relu"}); const auto input_shift_factors = diff --git a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C index da0499852790..693ee1665940 100644 --- a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C +++ b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C @@ -46,6 +46,115 @@ isOptionalArtificialNeuralNetBuffer(const std::string & key) return key == "input_shift" || key == "input_scale" || key == "output_scale"; } +template +bool +findNamedTensor(const NamedTensorList & tensors, const std::string & key, torch::Tensor & tensor) +{ + for (const auto & entry : tensors) + if (entry.name == key) + { + tensor = entry.value; + return true; + } + + return false; +} + +bool +loadArtificialNeuralNetStateFromArchive(Moose::LibtorchArtificialNeuralNet & nn, + const std::string & filename, + std::string & error) +{ + try + { + torch::serialize::InputArchive archive; + archive.load_from(filename); + + for (auto & parameter : nn.named_parameters()) + { + torch::Tensor stored_tensor; + if (!readArchiveTensor(archive, parameter.key(), stored_tensor)) + { + error = "Missing serialized parameter: " + parameter.key(); + return false; + } + + copyTensor(parameter.value(), stored_tensor); + } + + for (auto & buffer : nn.named_buffers()) + { + torch::Tensor stored_tensor; + if (!readArchiveTensor(archive, buffer.key(), stored_tensor)) + { + if (isOptionalArtificialNeuralNetBuffer(buffer.key())) + continue; + + error = "Missing serialized buffer: " + buffer.key(); + return false; + } + + copyTensor(buffer.value(), stored_tensor); + } + + nn.synchronizeAffineFactorsFromBuffers(); + return true; + } + catch (const c10::Error & e) + { + error = e.msg(); + return false; + } +} + +bool +loadArtificialNeuralNetStateFromTorchScript(Moose::LibtorchArtificialNeuralNet & nn, + const std::string & filename, + std::string & error) +{ + try + { + const auto scripted = torch::jit::load(filename); + const auto scripted_parameters = scripted.named_parameters(); + const auto scripted_buffers = scripted.named_buffers(); + + for (auto & parameter : nn.named_parameters()) + { + torch::Tensor stored_tensor; + if (!findNamedTensor(scripted_parameters, parameter.key(), stored_tensor)) + { + error = "Missing scripted parameter: " + parameter.key(); + return false; + } + + copyTensor(parameter.value(), stored_tensor); + } + + for (auto & buffer : nn.named_buffers()) + { + torch::Tensor stored_tensor; + if (!findNamedTensor(scripted_buffers, buffer.key(), stored_tensor)) + { + if (isOptionalArtificialNeuralNetBuffer(buffer.key())) + continue; + + error = "Missing scripted buffer: " + buffer.key(); + return false; + } + + copyTensor(buffer.value(), stored_tensor); + } + + nn.synchronizeAffineFactorsFromBuffers(); + return true; + } + catch (const c10::Error & e) + { + error = e.msg(); + return false; + } +} + } // namespace namespace Moose @@ -318,43 +427,21 @@ void loadLibtorchArtificialNeuralNetState(Moose::LibtorchArtificialNeuralNet & nn, const std::string & filename) { - torch::serialize::InputArchive archive; - archive.load_from(filename); - - for (auto & parameter : nn.named_parameters()) - { - torch::Tensor stored_tensor; - if (!readArchiveTensor(archive, parameter.key(), stored_tensor)) - mooseError("The requested pytorch parameter file could not be loaded. This can either be " - "the result of the file not existing or a misalignment in the generated " - "container and the data in the file. Make sure the dimensions of the generated " - "neural net are the same as the dimensions of the parameters in the input file!\n" - "Missing serialized parameter: ", - parameter.key()); - - copyTensor(parameter.value(), stored_tensor); - } - - for (auto & buffer : nn.named_buffers()) - { - torch::Tensor stored_tensor; - if (!readArchiveTensor(archive, buffer.key(), stored_tensor)) - { - if (isOptionalArtificialNeuralNetBuffer(buffer.key())) - continue; - - mooseError("The requested pytorch parameter file could not be loaded. This can either be " - "the result of the file not existing or a misalignment in the generated " - "container and the data in the file. Make sure the dimensions of the generated " - "neural net are the same as the dimensions of the parameters in the input file!\n" - "Missing serialized buffer: ", - buffer.key()); - } - - copyTensor(buffer.value(), stored_tensor); - } - - nn.synchronizeAffineFactorsFromBuffers(); + std::string archive_error; + if (loadArtificialNeuralNetStateFromArchive(nn, filename, archive_error)) + return; + + std::string scripted_error; + if (loadArtificialNeuralNetStateFromTorchScript(nn, filename, scripted_error)) + return; + + mooseError("The requested pytorch parameter file could not be loaded. This can either be the " + "result of the file not existing or a misalignment in the generated container and " + "the data in the file. Make sure the dimensions of the generated neural net are the " + "same as the dimensions of the parameters in the input file!\nArchive load error: ", + archive_error, + "\nTorchScript load error: ", + scripted_error); } } diff --git a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h index 76350343690f..b1ce537bafd3 100644 --- a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h +++ b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h @@ -41,7 +41,7 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl virtual void loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn) override; - virtual void loadControlNeuralNetFromFile(const InputParameters & parameters) override; + virtual void loadControlNeuralNetFromFile() override; protected: /// The log probability of control signals from the last evaluation of the controller diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C index 870060acbf0a..78bcf132162b 100644 --- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C +++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C @@ -74,13 +74,10 @@ LibtorchDRLControl::LibtorchDRLControl(const InputParameters & parameters) // Fixing the RNG seed to make sure every experiment is the same. if (isParamValid("seed")) torch::manual_seed(getParam("seed")); - - if (parameters.isParamSetByUser("filename")) - loadControlNeuralNetFromFile(parameters); } void -LibtorchDRLControl::loadControlNeuralNetFromFile(const InputParameters & parameters) +LibtorchDRLControl::loadControlNeuralNetFromFile() { const auto & filename = getParam("filename"); if (getParam("torch_script_format")) @@ -95,7 +92,7 @@ LibtorchDRLControl::loadControlNeuralNetFromFile(const InputParameters & paramet std::vector num_neurons_per_layer = getParam>("num_neurons_per_layer"); std::vector activation_functions = - parameters.isParamSetByUser("activation_function") + isParamSetByUser("activation_function") ? getParam>("activation_function") : std::vector({"relu"}); diff --git a/test/tests/controls/libtorch_nn_control/gold/torch_parameter_read.json b/test/tests/controls/libtorch_nn_control/gold/torch_parameter_read.json index f04acdb645fe..5f2e6e84d21e 100644 --- a/test/tests/controls/libtorch_nn_control/gold/torch_parameter_read.json +++ b/test/tests/controls/libtorch_nn_control/gold/torch_parameter_read.json @@ -112,12 +112,23 @@ 0.3051220898124913, 0.2652967572212219 ], + "input_scaling_factors": [ + 1.0, + 1.0 + ], + "input_shift_factors": [ + 0.0, + 0.0 + ], "output_layer_.bias": [ 0.4794089532666399 ], "output_layer_.weight": [ 0.47736589074351915, 0.11757504436858923 + ], + "output_scaling_factors": [ + 1.0 ] } }, From 30b7146c28a600918533a4a51728abca432e13c3 Mon Sep 17 00:00:00 2001 From: Peter German Date: Fri, 24 Apr 2026 10:06:45 -0600 Subject: [PATCH 39/51] Remove normalization for the initialization. --- .../libtorch/utils/LibtorchActionDistribution.C | 17 ++++------------- .../src/libtorch/utils/LibtorchActorNeuralNet.C | 5 +---- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C index 53dc7288999b..9fad0b3a74f9 100644 --- a/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C @@ -14,7 +14,6 @@ #include "LibtorchUtils.h" #include "MooseError.h" -#include #include #include #include "libmesh/utility.h" @@ -125,9 +124,7 @@ LibtorchGaussianActionDistribution::constructDistribution() void LibtorchGaussianActionDistribution::initialize() { - const auto mean_sizes = _mean_module->weight.sizes(); - const auto mean_max_dim_size = *std::max_element(mean_sizes.begin(), mean_sizes.end()); - torch::nn::init::orthogonal_(_mean_module->weight, 1.0 / mean_max_dim_size); + torch::nn::init::orthogonal_(_mean_module->weight); torch::nn::init::zeros_(_mean_module->bias); if (_state_independent_std) @@ -137,9 +134,7 @@ LibtorchGaussianActionDistribution::initialize() return; } - const auto std_sizes = _std_module->weight.sizes(); - const auto std_max_dim_size = *std::max_element(std_sizes.begin(), std_sizes.end()); - torch::nn::init::orthogonal_(_std_module->weight, 1.0 / std_max_dim_size); + torch::nn::init::orthogonal_(_std_module->weight); torch::nn::init::zeros_(_std_module->bias); } @@ -245,14 +240,10 @@ LibtorchBetaActionDistribution::constructDistribution() void LibtorchBetaActionDistribution::initialize() { - const auto alpha_sizes = _alpha_module->weight.sizes(); - const auto alpha_max_dim_size = *std::max_element(alpha_sizes.begin(), alpha_sizes.end()); - torch::nn::init::orthogonal_(_alpha_module->weight, 1.0 / alpha_max_dim_size); + torch::nn::init::orthogonal_(_alpha_module->weight); torch::nn::init::zeros_(_alpha_module->bias); - const auto beta_sizes = _beta_module->weight.sizes(); - const auto beta_max_dim_size = *std::max_element(beta_sizes.begin(), beta_sizes.end()); - torch::nn::init::orthogonal_(_beta_module->weight, 1.0 / beta_max_dim_size); + torch::nn::init::orthogonal_(_beta_module->weight); torch::nn::init::zeros_(_beta_module->bias); } diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C index c33e409dc1cf..50df0e3a7ef5 100644 --- a/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C @@ -289,10 +289,7 @@ LibtorchActorNeuralNet::initializeNeuralNetwork() const auto & activation = _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0]; const Real gain = determineGain(activation); - - auto sizes = _weights[i]->weight.sizes(); - auto max_dim_size = *std::max_element(sizes.begin(), sizes.end()); - torch::nn::init::orthogonal_(_weights[i]->weight, gain / max_dim_size); + torch::nn::init::orthogonal_(_weights[i]->weight, gain); torch::nn::init::zeros_(_weights[i]->bias); } From 3007e7dee38262b3533c209e773f9940b0487612 Mon Sep 17 00:00:00 2001 From: Peter German Date: Fri, 24 Apr 2026 10:30:54 -0600 Subject: [PATCH 40/51] Add docstrings. --- .../controls/LibtorchNeuralNetControl.h | 27 +++--- .../utils/LibtorchArtificialNeuralNet.h | 65 ++++++++++---- .../controls/LibtorchNeuralNetControl.C | 33 +++---- .../utils/LibtorchArtificialNeuralNet.C | 34 ++++++++ .../libtorch/controls/LibtorchDRLControl.h | 11 ++- .../surrogates/LibtorchDRLControlTrainer.h | 43 ++++++++-- .../SamplerNeuralNetControlTransfer.h | 7 +- .../utils/LibtorchActionDistribution.h | 59 +++++++++++++ .../libtorch/utils/LibtorchActorNeuralNet.h | 85 ++++++++++++++++--- .../utils/LibtorchRLMiniBatchSampler.h | 24 ++++++ .../libtorch/utils/LibtorchRLPPOLoss.h | 20 +++++ .../utils/LibtorchRLTrajectoryBuffer.h | 32 +++++++ .../libtorch/utils/LibtorchRLValueEstimator.h | 24 ++++++ .../LiftDragRewardPostprocessor.h | 11 +++ .../libtorch/controls/LibtorchDRLControl.C | 4 +- .../trainers/LibtorchDRLControlTrainer.C | 50 +++++------ .../SamplerNeuralNetControlTransfer.C | 18 ++-- .../utils/LibtorchActionDistribution.C | 6 ++ .../libtorch/utils/LibtorchActorNeuralNet.C | 49 +++++++++++ .../LiftDragRewardPostprocessor.C | 19 +++-- 20 files changed, 502 insertions(+), 119 deletions(-) diff --git a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h index 686164bc5d7b..e3d3dd01ed90 100644 --- a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h +++ b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h @@ -40,7 +40,7 @@ class LibtorchNeuralNetControl : public Control /** * Get the (signal_index)-th signal of the control neural net * @param signal_index The index of the queried control signal - * @return The (signal_index)-th constol signal + * @return The requested control signal. */ Real getSignal(const unsigned int signal_index) const; @@ -48,41 +48,40 @@ class LibtorchNeuralNetControl : public Control unsigned int numberOfControlSignals() const { return _control_names.size(); } /** - * Function responsible for loading the neural network for the controller. This function is used - * when copying the neural network from a main app which trains it. - * @param input_nn Reference to a neural network which will be copied into this object + * Copy a trained neural network into the controller. + * @param input_nn Neural network that should replace the currently stored controller. */ virtual void loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn); + /// Load the controller neural network from the configured file on disk. virtual void loadControlNeuralNetFromFile(); - /// Return a reference to the stored neural network + /// Return a reference to the stored neural network. const Moose::LibtorchNeuralNetBase & controlNeuralNet() const; - /// Return true if the object already has a neural netwok + /// Return true if the object already has a neural network. bool hasControlNeuralNet() const { return (_nn != NULL); }; protected: /** - * Function responsible for checking for potential user errors in the input file - * @param param_name The name of the main parameter - * @param conditional_param Vector parameter names that depend on the main parameter - * @param should_be_defined If the conditional parameters should be defined when the main - * parameter is defined + * Check one conditional-parameter rule and raise an input error if it is violated. + * @param param_name Main parameter that controls the rule. + * @param conditional_param Parameters that depend on the main parameter. + * @param should_be_defined Whether the dependent parameters should be present or absent. */ void conditionalParameterError(const std::string & param_name, const std::vector & conditional_param, bool should_be_defined = true); - /// Function that updates the values of the current response + /// Refresh the current observation values from the linked postprocessors. void updateCurrentResponse(); - /// Function that prepares the input tensor for the controller neural network + /// Build the normalized input tensor passed into the controller neural network. torch::Tensor prepareInputTensor(); /// The values of the current observed postprocessor values std::vector _current_response; - /// This variable is populated if the controller needs acess to older values of the + /// This variable is populated if the controller needs access to older values of the /// observed postprocessor values std::vector> & _old_responses; diff --git a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h index 12b89d6afdd3..1928a03376de 100644 --- a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h +++ b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h @@ -23,16 +23,25 @@ namespace Moose { -// A class that describes a simple feed-forward neural net. +/** + * Simple feed-forward neural net with optional affine input and output scaling. + */ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeuralNetBase { public: /** - * Construct using input parameters - * @param name Name of the neural network - * @param num_inputs The number of input neurons/parameters - * @param num_neurons_per_layer Number of neurons per hidden layer - * @param num_outputs The number of output neurons + * Build a plain feed-forward neural network. + * @param name Name of the neural network module. + * @param num_inputs Number of input neurons or parameters. + * @param num_outputs Number of output neurons. + * @param num_neurons_per_layer Hidden-layer widths. + * @param activation_function Hidden-layer activation names. + * @param device_type Torch device used by the module. + * @param scalar_type Torch scalar type used by the module. + * @param build_on_construct Whether to build the torch modules right away. + * @param input_shift_factors Optional affine input shifts. + * @param input_scaling_factors Optional affine input scales. + * @param output_scaling_factors Optional output scaling factors. */ LibtorchArtificialNeuralNet(const std::string name, const unsigned int num_inputs, @@ -47,25 +56,25 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu const std::vector & output_scaling_factors = {}); /** - * Copy construct an artificial neural network - * @param nn The neural network which needs to be copied + * Copy-construct a feed-forward neural network. + * @param nn Neural network to copy. + * @param build_on_construct Whether to rebuild the module structure during the copy. */ LibtorchArtificialNeuralNet(const Moose::LibtorchArtificialNeuralNet & nn, const bool build_on_construct = true); /** - * Add layers to the neural network - * @param layer_name The name of the layer to be added - * @param parameters A map of parameter names and the corresponding values which - * describe the neural net layer architecture + * Add one linear layer to the network. + * @param layer_name Name of the layer to add. + * @param parameters Small parameter map that describes the layer shape. */ virtual void addLayer(const std::string & layer_name, const std::unordered_map & parameters); /** - * Overriding the forward substitution function for the neural network, unfortunately - * this cannot be const since it creates a graph in the background - * @param x Input tensor for the evaluation + * Run a forward pass through the network. + * @param x Input tensor for the evaluation. + * @return Network output tensor. */ virtual torch::Tensor forward(const torch::Tensor & x) override; @@ -97,24 +106,50 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu /// Update cached affine metadata vectors from the registered libtorch buffers. void synchronizeAffineFactorsFromBuffers(); + /** + * Map an activation name to the orthogonal-initialization gain we want to use. + * @param activation Activation name to look up. + * @return Gain used for orthogonal initialization. + */ Real determineGain(const std::string & activation); + /// Initialize the trainable weights and biases. virtual void initializeNeuralNetwork(); /// Store the network architecture in a json file (for debugging, visualization) void store(nlohmann::json & json) const; protected: + /** + * Normalize affine metadata and fill in defaults when needed. + * @param factors User-provided affine factors. + * @param expected_size Expected number of entries. + * @param default_value Default value used when the vector is empty. + * @param factor_name Name used in error messages. + * @param forbid_zero Whether zero entries should be rejected. + * @return Normalized affine-factor vector. + */ static std::vector normalizeAffineFactors(const std::vector & factors, unsigned int expected_size, Real default_value, const std::string & factor_name, bool forbid_zero = false); + /// Initialize the registered affine metadata buffers used by serialization. void initializeAffineBuffers(); + /** + * Apply affine preprocessing to the raw input tensor. + * @param x Raw input tensor. + * @return Preprocessed input tensor. + */ virtual torch::Tensor preprocessInput(const torch::Tensor & x) const; + /** + * Apply the configured output scaling to a network output tensor. + * @param y Raw network output tensor. + * @return Scaled output tensor. + */ virtual torch::Tensor scaleOutput(const torch::Tensor & y) const; /// Name of the neural network diff --git a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C index 71ca64e364e2..01fa0c351211 100644 --- a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C +++ b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C @@ -23,40 +23,33 @@ LibtorchNeuralNetControl::validParams() InputParameters params = Control::validParams(); params.addClassDescription("Controls the value of multiple controllable input parameters using a " "Libtorch-based neural network."); - params.addRequiredParam>("parameters", - "The input parameter(s) to control."); + params.addRequiredParam>( + "parameters", "Controllable input parameters driven by the network."); params.addRequiredParam>( - "responses", "The responses (prostprocessors) which are used for the control."); + "responses", "Postprocessors used as the current observation vector."); params.addParam>( "response_shift_factors", - "Constants which will be used to shift the response values. This is used for the " - "manipulation of the neural net inputs for better training efficiency."); + "Optional offsets applied to the observation values before scaling."); params.addParam>( "response_scaling_factors", - "Constants which will be used to multiply the shifted response values. This is used for " - "the manipulation of the neural net inputs for better training efficiency."); - params.addParam("filename", - "Define if the neural net is supposed to be loaded from a file."); + "Optional multipliers applied after shifting the observation values."); + params.addParam("filename", "Checkpoint file to load for the controller network."); params.addParam("torch_script_format", false, - "If we want to load the neural net using the torch-script format."); + "Whether the checkpoint should be read as a scripted Torch module."); params.addParam( - "input_timesteps", - 1, - "Number of time steps to use in the input data, if larger than 1, " - "data from the previous timesteps will be used as well as inputs in the training."); - params.addParam>("num_neurons_per_layer", - "The number of neurons on each hidden layer."); + "input_timesteps", 1, "Number of recent timesteps to stack into each network input."); + params.addParam>( + "num_neurons_per_layer", "Hidden-layer widths used when constructing the controller."); params.addParam>( "activation_function", std::vector({"relu"}), - "The type of activation functions to use. It is either one value " - "or one value per hidden layer."); + "Activation name for each hidden layer, or one shared value for all layers."); params.addParam>( "action_scaling_factors", - "Scale factors embedded into constructed neural-network outputs so checkpointed policies " - "carry their physical-unit action scaling."); + "Per-action scaling embedded in the controller outputs so saved checkpoints stay in " + "physical units."); return params; } diff --git a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C index 693ee1665940..4946ff24d7b4 100644 --- a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C +++ b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C @@ -18,6 +18,13 @@ namespace { +/** + * Try to read one tensor from a plain libtorch archive. + * @param archive Archive being read. + * @param key Serialized tensor name. + * @param tensor Tensor that receives the loaded data. + * @return True when the tensor was found and loaded. + */ bool readArchiveTensor(torch::serialize::InputArchive & archive, const std::string & key, @@ -34,18 +41,31 @@ readArchiveTensor(torch::serialize::InputArchive & archive, } } +/** + * Copy a stored tensor into an existing parameter or buffer. + * @param destination Tensor owned by the current module. + * @param source Tensor read from disk. + */ void copyTensor(torch::Tensor & destination, const torch::Tensor & source) { destination.data().copy_(source.to(destination.options())); } +/// Return true for affine-metadata buffers that older checkpoints may omit. bool isOptionalArtificialNeuralNetBuffer(const std::string & key) { return key == "input_shift" || key == "input_scale" || key == "output_scale"; } +/** + * Look up one named tensor in a torch named-parameter or named-buffer list. + * @param tensors Torch named tensor list. + * @param key Tensor name to search for. + * @param tensor Tensor that receives the match. + * @return True when the requested tensor exists. + */ template bool findNamedTensor(const NamedTensorList & tensors, const std::string & key, torch::Tensor & tensor) @@ -60,6 +80,13 @@ findNamedTensor(const NamedTensorList & tensors, const std::string & key, torch: return false; } +/** + * Load ANN parameters and buffers from a plain libtorch archive. + * @param nn Neural network that receives the loaded state. + * @param filename Checkpoint file to read. + * @param error Human-readable error string filled on failure. + * @return True when the network was loaded successfully. + */ bool loadArtificialNeuralNetStateFromArchive(Moose::LibtorchArtificialNeuralNet & nn, const std::string & filename, @@ -107,6 +134,13 @@ loadArtificialNeuralNetStateFromArchive(Moose::LibtorchArtificialNeuralNet & nn, } } +/** + * Load ANN parameters and buffers from a scripted Torch module. + * @param nn Neural network that receives the loaded state. + * @param filename Checkpoint file to read. + * @param error Human-readable error string filled on failure. + * @return True when the network was loaded successfully. + */ bool loadArtificialNeuralNetStateFromTorchScript(Moose::LibtorchArtificialNeuralNet & nn, const std::string & filename, diff --git a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h index b1ce537bafd3..dade0dde5be5 100644 --- a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h +++ b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h @@ -33,14 +33,19 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl virtual void execute() override; /** - * Get the logarithmic probability of (signal_index)-th signal of the control neural net - * @param signal_index The index of the signal - * @return The logarithmic probability of the (signal_index)-th signal + * Return the log-probability of one control signal from the latest actor evaluation. + * @param signal_index Index of the queried control signal. + * @return Log-probability of the queried signal. */ Real getSignalLogProbability(const unsigned int signal_index) const; + /** + * Copy a trained actor into the controller. + * @param input_nn Actor network that should replace the currently stored controller. + */ virtual void loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn) override; + /// Load the actor network from the configured checkpoint file. virtual void loadControlNeuralNetFromFile() override; protected: diff --git a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h index 42472cb929dc..9dc9ae4faaf7 100644 --- a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h +++ b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h @@ -32,32 +32,44 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase public: static InputParameters validParams(); - /// construct using input parameters + /** + * Build the PPO-based DRL trainer. + * @param parameters Input parameters for the trainer. + */ LibtorchDRLControlTrainer(const InputParameters & parameters); + /// Pull fresh rollout data from the reporters and trigger training when ready. virtual void execute() override; /** - * Function which returns the current average episodic reward. It is only updated - * at the end of every episode. + * Return the current average episodic reward. + * @return Average episodic reward over the latest training window. */ Real averageEpisodeReward() { return _average_episode_reward; } + /// Return the current episodic reward standard deviation. Real stdEpisodeReward() { return _std_episode_reward; } + /// Return per-sample mean episodic rewards from the latest update window. std::vector sampleAverageEpsiodeRewards() { return _sample_average_episode_reward; } + /// Return per-sample episodic reward standard deviations from the latest update window. std::vector sampleStdEpsiodeRewards() { return _sample_std_episode_reward; } - /// The condensed training function + /** + * Run the PPO update on a flattened on-policy batch. + * @param batch Flattened trajectory batch to train on. + */ void trainController(const LibtorchRLTrajectoryBuffer::TensorBatch & batch); + /// Return the current actor network. const Moose::LibtorchArtificialNeuralNet & controlNeuralNet() const { return *_control_nn; } + /// Return the trainer seed used for sampling and shuffling. unsigned int seed() const { return _seed; } protected: - /// Compute the average eposiodic reward + /// Compute the average episodic reward statistics for the latest samples. void computeEpisodeRewardStatistics(); - /// Reset data after updating the neural network + /// Reset the stored rollout data after an update. void resetData(); /// Response reporter names @@ -177,14 +189,31 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase std::unique_ptr _critic_optimizer; private: - /// Getting reporter pointers with given names + /** + * Resolve reporter names into cached pointer storage. + * @param reporter_names Reporter names to look up. + * @param pointer_storage Output vector that receives the reporter pointers. + */ void getReporterPointers(const std::vector & reporter_names, std::vector> *> & pointer_storage); + /// Pull trajectories out of the reporters and append them to the trajectory buffer. void collectTrajectoriesFromReporters(); + /** + * Figure out how many aligned transitions a raw reporter sequence contains. + * @param raw_sequence_size Number of raw time entries in the reporter sequence. + * @return Number of valid transitions after history stacking and downsampling. + */ unsigned int computeNumTransitions(std::size_t raw_sequence_size) const; + /** + * Downsample one raw reporter sequence into the aligned rollout sequence we train on. + * @param sample Raw reporter sequence. + * @param offset Starting offset used for the aligned sequence. + * @param num_entries Number of aligned entries to extract. + * @return Downsampled sequence. + */ std::vector extractDownsampledSequence(const std::vector & sample, unsigned int offset, unsigned int num_entries) const; diff --git a/modules/stochastic_tools/include/libtorch/transfers/SamplerNeuralNetControlTransfer.h b/modules/stochastic_tools/include/libtorch/transfers/SamplerNeuralNetControlTransfer.h index 3e0527028af7..aa887d39cb01 100644 --- a/modules/stochastic_tools/include/libtorch/transfers/SamplerNeuralNetControlTransfer.h +++ b/modules/stochastic_tools/include/libtorch/transfers/SamplerNeuralNetControlTransfer.h @@ -22,13 +22,18 @@ class SamplerNeuralNetControlTransfer : public StochasticToolsTransfer, public S public: static InputParameters validParams(); + /** + * Build the transfer that pushes a trained controller into subapps. + * @param parameters Input parameters for the transfer. + */ SamplerNeuralNetControlTransfer(const InputParameters & parameters); + /// Execute the transfer in the standard non-batch path. virtual void execute() override; ///@{ /** - * Methods used when running in batch mode (see SamplerFullSolveMultiApp) + * Methods used when running in batch mode (see SamplerFullSolveMultiApp). */ virtual void initialSetup() override; virtual void initializeFromMultiapp() override; diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h index 7cb3dddb4199..4245f56b858f 100644 --- a/modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h @@ -27,6 +27,15 @@ namespace Moose class LibtorchActionDistribution : public torch::nn::Module { public: + /** + * Build an action-distribution module for an actor network. + * @param name Module name used for registration and serialization. + * @param num_inputs Number of actor features feeding the distribution. + * @param num_outputs Number of action dimensions produced by the distribution. + * @param device_type Torch device used by the module. + * @param scalar_type Torch scalar type used by the module. + * @param output_scaling_factors Optional per-action scaling applied in physical units. + */ LibtorchActionDistribution(const std::string & name, unsigned int num_inputs, unsigned int num_outputs, @@ -34,20 +43,47 @@ class LibtorchActionDistribution : public torch::nn::Module torch::ScalarType scalar_type = torch::kDouble, const std::vector & output_scaling_factors = {}); + /// Initialize the trainable distribution parameters. virtual void initialize() = 0; + /** + * Refresh cached distribution parameters from the latest actor features. + * @param input Feature tensor coming from the actor body. + */ virtual void reset(const torch::Tensor & input) = 0; + /** + * Draw a stochastic action sample in physical units. + * @return Sampled action tensor. + */ virtual torch::Tensor sample() const = 0; + /** + * Return the deterministic action used for evaluation. + * @return Deterministic action tensor. + */ virtual torch::Tensor deterministicAction() const = 0; + /** + * Evaluate the log-probability of an action under the current distribution. + * @param action Action tensor in physical units. + * @return Log-probability tensor for the action. + */ virtual torch::Tensor logProbability(const torch::Tensor & action) const = 0; + /** + * Compute the entropy of the current distribution. + * @return Entropy tensor. + */ virtual torch::Tensor entropy() const = 0; + /** + * Tell callers whether the distribution enforces explicit action bounds. + * @return True for bounded distributions, false for unbounded ones. + */ virtual bool isBounded() const = 0; + /// Sync cached scaling metadata from the registered buffers after loading state. void synchronizeScalingFactorsFromBuffer(); protected: @@ -71,6 +107,17 @@ class LibtorchActionDistribution : public torch::nn::Module class LibtorchGaussianActionDistribution : public LibtorchActionDistribution { public: + /** + * Build the Gaussian action distribution used for unbounded controls. + * @param name Module name used for registration and serialization. + * @param num_inputs Number of actor features feeding the distribution. + * @param num_outputs Number of action dimensions produced by the distribution. + * @param device_type Torch device used by the module. + * @param scalar_type Torch scalar type used by the module. + * @param build_on_construct Whether to build the torch modules right away. + * @param output_scaling_factors Optional per-action scaling applied in physical units. + * @param state_independent_std Whether the learned std should ignore the current state. + */ LibtorchGaussianActionDistribution(const std::string & name, unsigned int num_inputs, unsigned int num_outputs, @@ -118,6 +165,18 @@ class LibtorchGaussianActionDistribution : public LibtorchActionDistribution class LibtorchBetaActionDistribution : public LibtorchActionDistribution { public: + /** + * Build the Beta action distribution used for bounded controls. + * @param name Module name used for registration and serialization. + * @param num_inputs Number of actor features feeding the distribution. + * @param num_outputs Number of action dimensions produced by the distribution. + * @param minimum_values Lower action bounds in physical units. + * @param maximum_values Upper action bounds in physical units. + * @param device_type Torch device used by the module. + * @param scalar_type Torch scalar type used by the module. + * @param build_on_construct Whether to build the torch modules right away. + * @param output_scaling_factors Optional extra per-action scaling in physical units. + */ LibtorchBetaActionDistribution(const std::string & name, unsigned int num_inputs, unsigned int num_outputs, diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h index 2572abc9702a..2adaafd44b91 100644 --- a/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h @@ -23,11 +23,21 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet { public: /** - * Construct using input parameters - * @param name Name of the neural network - * @param num_inputs The number of input neurons/parameters - * @param num_neurons_per_layer Number of neurons per hidden layer - * @param num_outputs The number of output neurons + * Build an actor network with either a Gaussian or Beta action distribution. + * @param name Name of the neural network module. + * @param num_inputs Number of actor inputs. + * @param num_outputs Number of action outputs. + * @param num_neurons_per_layer Hidden-layer widths. + * @param activation_function Hidden-layer activation names. + * @param minimum_values Optional lower action bounds. Leave empty for Gaussian actions. + * @param maximum_values Optional upper action bounds. Leave empty for Gaussian actions. + * @param device_type Torch device used by the module. + * @param scalar_type Torch scalar type used by the module. + * @param build_on_construct Whether to build the torch modules right away. + * @param input_shift_factors Optional affine input shifts. + * @param input_scaling_factors Optional affine input scales. + * @param output_scaling_factors Optional per-action scaling in physical units. + * @param state_independent_std Whether the Gaussian std should ignore the current state. */ LibtorchActorNeuralNet(const std::string name, const unsigned int num_inputs, @@ -45,36 +55,52 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet const bool state_independent_std = true); /** - * Copy construct an artificial neural network - * @param nn The neural network which needs to be copied + * Copy-construct an actor network. + * @param nn Actor network to copy. + * @param build_on_construct Whether to rebuild the module structure during the copy. */ LibtorchActorNeuralNet(const Moose::LibtorchActorNeuralNet & nn, const bool build_on_construct = true); /** - * Overriding the forward substitution function for the neural network, unfortunately - * this cannot be const since it creates a graph in the background - * @param x Input tensor for the evaluation + * Run the actor forward pass and return a sampled action. + * @param x Input tensor for the evaluation. + * @return Action tensor produced by the actor. */ virtual torch::Tensor forward(const torch::Tensor & x) override; + /** + * Evaluate the actor and either sample from it or use its deterministic action. + * @param input Input tensor for the evaluation. + * @param sampled Whether to draw a stochastic sample. + * @return Action tensor produced by the actor. + */ virtual torch::Tensor evaluate(torch::Tensor & input, bool sampled); + /** + * Sample an action from the already-reset distribution. + * @return Sampled action tensor. + */ virtual torch::Tensor sample(); - /// Construct the neural network + /// Build the hidden layers and the matching action-distribution module. virtual void constructNeuralNetwork() override; + /// Return the active action distribution as the common base type. const LibtorchActionDistribution & actionDistribution() const { return *_action_distribution; } LibtorchActionDistribution & actionDistribution() { return *_action_distribution; } + /// Return the Gaussian action distribution pointer, or nullptr for bounded actors. const LibtorchGaussianActionDistribution * gaussianActionDistributionPtr() const; LibtorchGaussianActionDistribution * gaussianActionDistributionPtr(); + /// Return the Gaussian action distribution reference. Errors if the actor is bounded. const LibtorchGaussianActionDistribution & gaussianActionDistribution() const; LibtorchGaussianActionDistribution & gaussianActionDistribution(); + /// Return the Beta action distribution pointer, or nullptr for Gaussian actors. const LibtorchBetaActionDistribution * betaActionDistributionPtr() const; LibtorchBetaActionDistribution * betaActionDistributionPtr(); + /// Return the Beta action distribution reference. Errors if the actor is unbounded. const LibtorchBetaActionDistribution & betaActionDistribution() const; LibtorchBetaActionDistribution & betaActionDistribution(); @@ -82,12 +108,26 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet const std::vector & minValues() const { return _minimum_values; } const std::vector & maxValues() const { return _maximum_values; } + /** + * Refresh the cached distribution parameters from a fresh input tensor. + * @param input Input tensor used to update the distribution. + */ void resetDistributionParams(torch::Tensor input); + /** + * Evaluate the log-probability of an action under the current actor state. + * @param other Action tensor in physical units. + * @return Log-probability tensor. + */ torch::Tensor logProbability(const torch::Tensor & other); + /** + * Compute the entropy of the current action distribution. + * @return Entropy tensor. + */ torch::Tensor entropy(); + /// Initialize the hidden layers and action-distribution parameters. virtual void initializeNeuralNetwork() override; protected: @@ -97,13 +137,34 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet std::shared_ptr _action_distribution; }; +/** + * Dump an actor network into JSON for reporter output and debugging. + * @param json JSON object that receives the serialized state. + * @param network Actor network pointer to serialize. + */ void to_json(nlohmann::json & json, const Moose::LibtorchActorNeuralNet * const & network); +/** + * Load an actor checkpoint written either as a state archive or TorchScript module. + * @param nn Actor network that receives the loaded state. + * @param filename Checkpoint file to read. + */ void loadLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, const std::string & filename); +/** + * Check whether a checkpoint comes from the older serialized-parameter layout. + * @param filename Checkpoint file to inspect. + * @return True if the file matches the legacy actor format. + */ bool isLegacyLibtorchActorArchive(const std::string & filename); +/** + * Load a checkpoint that still uses the legacy actor serialization layout. + * @param nn Actor network that receives the loaded state. + * @param filename Checkpoint file to read. + * @param action_standard_deviations Fallback std values for older Gaussian checkpoints. + */ void loadLegacyLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, const std::string & filename, const std::vector & action_standard_deviations); @@ -120,7 +181,7 @@ void dataLoad(std::istream & stream, std::shared_ptr & nn, void * context); -// This is needed because the reporter which is used to ouput the neural net parameters to JSON +// This is needed because the reporter which is used to output the neural net parameters to JSON // requires a dataStore/dataLoad. However, these functions will be empty due to the fact that // we are only interested in the JSON output and we don't want to output everything template <> diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h index 0c543f508aa7..12d60cb2cda7 100644 --- a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h @@ -20,12 +20,18 @@ struct LibtorchRLMiniBatch { + /// Flattened observation rows for the mini-batch. torch::Tensor observations; + /// Action rows that match the sampled observations. torch::Tensor actions; + /// Behavior-policy log probabilities for the sampled actions. torch::Tensor old_log_probabilities; + /// Critic targets aligned with the sampled observations. torch::Tensor value_targets; + /// Advantage estimates aligned with the sampled observations. torch::Tensor advantages; + /// Return the number of rows stored in the mini-batch. std::int64_t size() const { return observations.defined() ? observations.size(0) : 0; } }; @@ -35,13 +41,31 @@ struct LibtorchRLMiniBatch class LibtorchRLMiniBatchSampler { public: + /** + * Shuffle a flattened rollout batch into PPO-sized chunks. + * @param batch Flattened rollout tensors ready for PPO updates. + * @param batch_size Preferred number of rows per mini-batch. + * @param standardize_advantage Whether to normalize the advantages inside each chunk. + * @return Vector of sampled mini-batches. + */ std::vector sample(const LibtorchRLTrajectoryBuffer::TensorBatch & batch, unsigned int batch_size, bool standardize_advantage) const; private: + /** + * Sanity-check that the flattened rollout tensors all line up. + * @param batch Flattened rollout tensors to validate. + */ static void validateBatch(const LibtorchRLTrajectoryBuffer::TensorBatch & batch); + /** + * Slice one shuffled mini-batch out of the flattened rollout tensors. + * @param batch Flattened rollout tensors. + * @param indices Row indices assigned to this mini-batch. + * @param standardize_advantage Whether to normalize the advantages in this slice. + * @return One PPO mini-batch. + */ static LibtorchRLMiniBatch makeMiniBatch(const LibtorchRLTrajectoryBuffer::TensorBatch & batch, const torch::Tensor & indices, bool standardize_advantage); diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLPPOLoss.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLPPOLoss.h index a0e21c72fa24..46650f154cf6 100644 --- a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLPPOLoss.h +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLPPOLoss.h @@ -17,8 +17,11 @@ struct LibtorchRLPPOLossOutput { + /// Clipped actor loss for the current mini-batch. torch::Tensor actor_loss; + /// Critic regression loss for the current mini-batch. torch::Tensor critic_loss; + /// Mean action-distribution entropy for the current mini-batch. torch::Tensor entropy; }; @@ -28,13 +31,30 @@ struct LibtorchRLPPOLossOutput class LibtorchRLPPOLoss { public: + /** + * Build the PPO loss helper. + * @param clip_parameter PPO clipping width. + * @param entropy_coeff Weight applied to the entropy bonus. + */ LibtorchRLPPOLoss(Real clip_parameter, Real entropy_coeff); + /** + * Compute actor, critic, and entropy terms for one mini-batch. + * @param policy_network Actor network used for the policy term. + * @param value_network Critic network used for the value term. + * @param batch Mini-batch pulled from the on-policy trajectory buffer. + * @return The three loss components for the mini-batch. + */ LibtorchRLPPOLossOutput compute(Moose::LibtorchActorNeuralNet & policy_network, Moose::LibtorchArtificialNeuralNet & value_network, const LibtorchRLMiniBatch & batch) const; private: + /** + * Collapse multi-action log-probabilities or entropies into one column tensor. + * @param tensor Action-wise tensor to reduce. + * @return Column tensor with one value per row in the mini-batch. + */ static torch::Tensor reduceActionDimension(const torch::Tensor & tensor); const Real _clip_parameter; diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLTrajectoryBuffer.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLTrajectoryBuffer.h index aef781adbc1c..ffd8e88fa7cf 100644 --- a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLTrajectoryBuffer.h +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLTrajectoryBuffer.h @@ -26,44 +26,76 @@ class LibtorchRLTrajectoryBuffer public: struct Trajectory { + /// Observations for each transition. std::vector> observations; + /// Next observations for each transition. std::vector> next_observations; + /// Actions applied at each transition. std::vector> actions; + /// Action log-probabilities recorded during rollout. std::vector> log_probabilities; + /// Scalar rewards for each transition. std::vector rewards; + /// Critic targets aligned with each transition. std::vector value_targets; + /// Advantage estimates aligned with each transition. std::vector advantages; }; struct TensorBatch { + /// Flattened observation matrix. torch::Tensor observations; + /// Flattened next-observation matrix. torch::Tensor next_observations; + /// Flattened action matrix. torch::Tensor actions; + /// Flattened action log-probabilities. torch::Tensor log_probabilities; + /// Flattened rewards. torch::Tensor rewards; + /// Flattened critic targets. torch::Tensor value_targets; + /// Flattened advantages. torch::Tensor advantages; + /// Return the number of transitions represented by the batch. std::int64_t size() const { return observations.defined() ? observations.size(0) : 0; } }; + /** + * Append one trajectory to the on-policy buffer. + * @param trajectory Trajectory to store. + */ void addTrajectory(Trajectory trajectory); + /// Clear every stored trajectory. void clear(); bool empty() const { return _trajectories.empty(); } std::size_t numTrajectories() const { return _trajectories.size(); } + /** + * Count the total number of transitions stored across every trajectory. + * @return Total transition count. + */ std::size_t numTransitions() const; std::vector & trajectories() { return _trajectories; } const std::vector & trajectories() const { return _trajectories; } + /** + * Flatten every stored trajectory into one tensor batch. + * @return Tensor batch ready for mini-batch sampling. + */ TensorBatch flatten() const; private: + /** + * Validate a trajectory before it is stored. + * @param trajectory Trajectory to validate. + */ static void validateTrajectory(const Trajectory & trajectory); std::vector _trajectories; diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLValueEstimator.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLValueEstimator.h index d2bcb85f6fc2..81fd07ac2f71 100644 --- a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLValueEstimator.h +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLValueEstimator.h @@ -24,19 +24,43 @@ class LibtorchRLValueEstimator public: struct Targets { + /// Generalized-advantage estimates. std::vector advantages; + /// Critic regression targets. std::vector value_targets; }; + /** + * Build the GAE helper. + * @param discount_factor Reward discount factor. + * @param lambda_factor GAE lambda factor. + */ LibtorchRLValueEstimator(Real discount_factor, Real lambda_factor); + /** + * Fill every trajectory in the buffer with value targets and advantages. + * @param buffer On-policy trajectory buffer to update. + * @param value_network Critic used for target estimation. + */ void computeValueTargets(LibtorchRLTrajectoryBuffer & buffer, Moose::LibtorchArtificialNeuralNet & value_network) const; + /** + * Compute value targets and advantages for one trajectory. + * @param trajectory Trajectory to evaluate. + * @param value_network Critic used for target estimation. + * @return Advantage and value-target vectors for the trajectory. + */ Targets estimate(const LibtorchRLTrajectoryBuffer::Trajectory & trajectory, Moose::LibtorchArtificialNeuralNet & value_network) const; private: + /** + * Evaluate the critic on a batch of observations. + * @param observations Observation matrix to feed through the critic. + * @param value_network Critic used for the evaluation. + * @return One value estimate per observation row. + */ std::vector evaluate(const std::vector> & observations, Moose::LibtorchArtificialNeuralNet & value_network) const; diff --git a/modules/stochastic_tools/include/postprocessors/LiftDragRewardPostprocessor.h b/modules/stochastic_tools/include/postprocessors/LiftDragRewardPostprocessor.h index 155eb509ab19..52217ac90d19 100644 --- a/modules/stochastic_tools/include/postprocessors/LiftDragRewardPostprocessor.h +++ b/modules/stochastic_tools/include/postprocessors/LiftDragRewardPostprocessor.h @@ -16,11 +16,22 @@ class LiftDragRewardPostprocessor : public GeneralPostprocessor { public: static InputParameters validParams(); + + /** + * Build the rolling lift/drag reward postprocessor. + * @param parameters Input parameters for the postprocessor. + */ LiftDragRewardPostprocessor(const InputParameters & parameters); + /// Update the rolling lift and drag averages. virtual void execute() override; virtual void initialize() override {} using Postprocessor::getValue; + + /** + * Return the current reward value. + * @return Reward built from the rolling drag penalty and lift penalty. + */ virtual Real getValue() const override; protected: diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C index 78bcf132162b..5de97b8e7160 100644 --- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C +++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C @@ -42,9 +42,9 @@ LibtorchDRLControl::validParams() "If true, sample from the policy distribution; otherwise use the deterministic action."); params.addParam>( - "min_control_value", {}, "The minimum values of the control signal."); + "min_control_value", {}, "Optional lower bounds for each control signal."); params.addParam>( - "max_control_value", {}, "The maximum values of the control signal."); + "max_control_value", {}, "Optional upper bounds for each control signal."); params.addParam>( "action_standard_deviations", {}, diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index 93d05945cdd8..952fd5eb904e 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -28,12 +28,10 @@ LibtorchDRLControlTrainer::validParams() "response", "Reporter values containing the response values from the model."); params.addParam>( "response_shift_factors", - "A shift constant which will be used to shift the response values. This is used for the " - "manipulation of the neural net inputs for better training efficiency."); + "Optional offsets applied to the observed state values before scaling."); params.addParam>( "response_scaling_factors", - "A normalization constant which will be used to divide the response values. This is used for " - "the manipulation of the neural net inputs for better training efficiency."); + "Optional multipliers applied after shifting the observed state values."); params.addRequiredParam>( "control", "Reporters containing the values of the controlled quantities (control signals) from the " @@ -60,34 +58,29 @@ LibtorchDRLControlTrainer::validParams() params.addRequiredParam("num_epochs", "Number of epochs for the training."); - params.addRequiredRangeCheckedParam( - "critic_learning_rate", - "0>( - "num_critic_neurons_per_layer", "Number of neurons per layer in the emulator neural net."); + params.addRequiredRangeCheckedParam("critic_learning_rate", + "0>("num_critic_neurons_per_layer", + "Hidden-layer widths for the critic network."); params.addParam>( "critic_activation_functions", std::vector({"relu"}), - "The type of activation functions to use in the emulator neural net. It is either one value " - "or one value per hidden layer."); + "Activation name for each critic hidden layer, or one shared value for all layers."); - params.addRequiredRangeCheckedParam( - "control_learning_rate", - "0("control_learning_rate", + "0>( "num_control_neurons_per_layer", "Number of neurons per layer for the control neural network."); params.addParam>( "control_activation_functions", std::vector({"relu"}), - "The type of activation functions to use in the control neural net. It " - "is either one value " - "or one value per hidden layer."); + "Activation name for each actor hidden layer, or one shared value for all layers."); params.addParam("filename_base", - "Filename used to output the neural net parameters."); + "Base filename used when writing actor and critic checkpoints."); params.addParam( "seed", 11, "Random number generator seed for stochastic optimizers."); @@ -104,8 +97,7 @@ LibtorchDRLControlTrainer::validParams() "decay_factor", 1.0, "0.0<=decay_factor<=1.0", - "Decay factor for calculating the return. This accounts for decreased " - "reward values from the later steps."); + "Discount factor used when building PPO return and GAE targets."); params.addRangeCheckedParam("lambda_factor", 1.0, "0.0<=lambda_factor<=1.0", "GAE lambda."); @@ -114,20 +106,18 @@ LibtorchDRLControlTrainer::validParams() params.addParam( "shift_outputs", true, - "If we would like to shift the outputs the realign the input-output pairs."); + "Whether to shift rollout outputs so observations and actions line up in time."); params.addParam( "standardize_advantage", true, "Switch to enable the shifting and normalization of the advantages in the PPO algorithm."); - params.addParam("loss_print_frequency", - 0, - "The frequency which is used to print the loss values. If 0, the " - "loss values are not printed."); - params.addParam("batch_size", 100, "Batch size"); + params.addParam( + "loss_print_frequency", 0, "Print PPO loss values every N updates. Use 0 to stay quiet."); + params.addParam("batch_size", 100, "Number of flattened samples per mini-batch."); params.addParam>( - "min_control_value", {}, "The minimum values of the control signal."); + "min_control_value", {}, "Optional lower bounds for each control signal."); params.addParam>( - "max_control_value", {}, "The maximum values of the control signal."); + "max_control_value", {}, "Optional upper bounds for each control signal."); params.addParam>( "action_standard_deviations", {}, diff --git a/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C b/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C index 1ab783ff3f9b..0508de85742c 100644 --- a/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C +++ b/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C @@ -25,15 +25,13 @@ SamplerNeuralNetControlTransfer::validParams() params.suppressParameter("from_multi_app"); - params.addRequiredParam("trainer_name", - "Trainer object that contains the neural networks." - " for different samples."); + params.addRequiredParam( + "trainer_name", "Trainer object that owns the latest controller network."); params.addRequiredParam("control_name", "Controller object name."); return params; } -SamplerNeuralNetControlTransfer::SamplerNeuralNetControlTransfer( - const InputParameters & parameters) +SamplerNeuralNetControlTransfer::SamplerNeuralNetControlTransfer(const InputParameters & parameters) : StochasticToolsTransfer(parameters), SurrogateModelInterface(this), _control_name(getParam("control_name")), @@ -53,7 +51,8 @@ SamplerNeuralNetControlTransfer::execute() const auto n = getToMultiApp()->numGlobalApps(); for (MooseIndex(n) i = 0; i < n; i++) { - // std::cout << "Do I have this app? " << i << " " << getToMultiApp()->hasLocalApp(i) << std::endl; + // std::cout << "Do I have this app? " << i << " " << getToMultiApp()->hasLocalApp(i) << + // std::endl; if (getToMultiApp()->hasLocalApp(i)) { // Get the control neural net from the trainer @@ -64,7 +63,7 @@ SamplerNeuralNetControlTransfer::execute() auto & control_warehouse = app_problem.getControlWarehouse(); std::shared_ptr control_ptr = control_warehouse.getActiveObject(_control_name); LibtorchNeuralNetControl * control_object = - dynamic_cast(control_ptr.get()); + dynamic_cast(control_ptr.get()); if (!control_object) paramError("control_name", "The given control is not a LibtorchNeuralNetrControl!"); @@ -79,7 +78,8 @@ SamplerNeuralNetControlTransfer::execute() // // We cast the parameters into a 1D vector // std::cout << "Transferring " << Moose::stringify(std::vector( // named_params[param_i].value().data_ptr(), - // named_params[param_i].value().data_ptr() + named_params[param_i].value().numel())) << std::endl; + // named_params[param_i].value().data_ptr() + + // named_params[param_i].value().numel())) << std::endl; // } } } @@ -127,7 +127,7 @@ SamplerNeuralNetControlTransfer::executeToMultiapp() auto & control_warehouse = app_problem.getControlWarehouse(); std::shared_ptr control_ptr = control_warehouse.getActiveObject(_control_name); LibtorchNeuralNetControl * control_object = - dynamic_cast(control_ptr.get()); + dynamic_cast(control_ptr.get()); if (!control_object) paramError("control_name", "The given control is not a LibtorchNeuralNetrControl!"); diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C index 9fad0b3a74f9..502586fde54b 100644 --- a/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C @@ -21,6 +21,12 @@ namespace { +/** + * Fill in default action scaling and catch shape mistakes early. + * @param factors User-provided scaling factors. + * @param expected_size Number of action outputs expected by the distribution. + * @return A fully populated scaling-factor vector. + */ std::vector normalizeActionScalingFactors(const std::vector & factors, const unsigned int expected_size) { diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C index 50df0e3a7ef5..90e7e9983976 100644 --- a/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C @@ -15,6 +15,13 @@ namespace { +/** + * Try to read one tensor from a plain libtorch archive. + * @param archive Archive being read. + * @param key Serialized tensor name. + * @param tensor Tensor that receives the loaded data. + * @return True when the tensor was found and loaded. + */ bool readArchiveTensor(torch::serialize::InputArchive & archive, const std::string & key, @@ -31,12 +38,24 @@ readArchiveTensor(torch::serialize::InputArchive & archive, } } +/** + * Copy a stored tensor into an existing parameter or buffer. + * @param destination Tensor owned by the current module. + * @param source Tensor read from disk. + */ void copyTensor(torch::Tensor & destination, const torch::Tensor & source) { destination.data().copy_(source.to(destination.options())); } +/** + * Read an actor tensor, while still accepting the legacy action_head.* prefix. + * @param archive Archive being read. + * @param key Serialized tensor name expected by the current actor. + * @param tensor Tensor that receives the loaded data. + * @return True when the tensor was found and loaded. + */ bool readActorStateTensor(torch::serialize::InputArchive & archive, const std::string & key, @@ -51,6 +70,7 @@ readActorStateTensor(torch::serialize::InputArchive & archive, return false; } +/// Return true for actor buffers that older checkpoints may legitimately omit. bool isOptionalActorBuffer(const std::string & key) { @@ -58,12 +78,20 @@ isOptionalActorBuffer(const std::string & key) key == "action_head.action_scale"; } +/// Return true for actor parameters that older checkpoints may legitimately omit. bool isOptionalActorParameter(const std::string & key) { return key == "action_head.mean.bias" || key == "action_head.std.bias"; } +/** + * Look up one named tensor in a torch named-parameter or named-buffer list. + * @param tensors Torch named tensor list. + * @param key Tensor name to search for. + * @param tensor Tensor that receives the match. + * @return True when the requested tensor exists. + */ template bool findNamedTensor(const NamedTensorList & tensors, const std::string & key, torch::Tensor & tensor) @@ -78,6 +106,13 @@ findNamedTensor(const NamedTensorList & tensors, const std::string & key, torch: return false; } +/** + * Read one tensor from a scripted actor checkpoint, with legacy action_head.* fallback. + * @param tensors Scripted parameter or buffer list. + * @param key Serialized tensor name expected by the current actor. + * @param tensor Tensor that receives the loaded data. + * @return True when the tensor was found and loaded. + */ template bool readScriptedActorStateTensor(const NamedTensorList & tensors, @@ -93,6 +128,13 @@ readScriptedActorStateTensor(const NamedTensorList & tensors, return false; } +/** + * Load actor parameters and buffers from a plain libtorch archive. + * @param nn Actor that receives the loaded state. + * @param filename Checkpoint file to read. + * @param error Human-readable error string filled on failure. + * @return True when the actor was loaded successfully. + */ bool loadActorStateFromArchive(Moose::LibtorchActorNeuralNet & nn, const std::string & filename, @@ -147,6 +189,13 @@ loadActorStateFromArchive(Moose::LibtorchActorNeuralNet & nn, } } +/** + * Load actor parameters and buffers from a scripted Torch module. + * @param nn Actor that receives the loaded state. + * @param filename Checkpoint file to read. + * @param error Human-readable error string filled on failure. + * @return True when the actor was loaded successfully. + */ bool loadActorStateFromTorchScript(Moose::LibtorchActorNeuralNet & nn, const std::string & filename, diff --git a/modules/stochastic_tools/src/postprocessors/LiftDragRewardPostprocessor.C b/modules/stochastic_tools/src/postprocessors/LiftDragRewardPostprocessor.C index cf8c9a26ec6e..ed10858284c6 100644 --- a/modules/stochastic_tools/src/postprocessors/LiftDragRewardPostprocessor.C +++ b/modules/stochastic_tools/src/postprocessors/LiftDragRewardPostprocessor.C @@ -19,14 +19,21 @@ LiftDragRewardPostprocessor::validParams() { InputParameters params = GeneralPostprocessor::validParams(); - params.addRequiredParam("lift", "Lift coeff"); - params.addRequiredParam("drag", "Drag coeff"); + params.addRequiredParam( + "lift", "Postprocessor that supplies the current lift coefficient."); + params.addRequiredParam( + "drag", "Postprocessor that supplies the current drag coefficient."); - params.addParam("averaging_window", 1, "The window"); - params.addParam("coeff_1", 1.59, "Coeff 1"); - params.addParam("coeff_2", 0.2, "Coeff 2"); + params.addParam( + "averaging_window", + 1, + "Number of timesteps to include in the rolling lift and drag averages."); + params.addParam( + "coeff_1", 1.59, "Baseline reward offset before drag and lift penalties are applied."); + params.addParam("coeff_2", 0.2, "Multiplier applied to the absolute-value lift penalty."); - params.addClassDescription("Blabla."); + params.addClassDescription( + "Turns rolling lift and drag coefficients into a simple scalar reward signal."); return params; } From 2c32e39f4317b50abec8bbd74816b3418b2f45a5 Mon Sep 17 00:00:00 2001 From: Peter German Date: Fri, 24 Apr 2026 14:07:15 -0600 Subject: [PATCH 41/51] Make the random number generation consistent in DRL. --- .../utils/LibtorchArtificialNeuralNet.h | 2 +- .../libtorch/utils/LibtorchRandomUtils.h | 37 ++ .../utils/LibtorchArtificialNeuralNet.C | 7 +- .../src/libtorch/utils/LibtorchRandomUtils.C | 71 ++++ .../libtorch/controls/LibtorchDRLControl.h | 5 +- .../utils/LibtorchActionDistribution.h | 14 +- .../libtorch/utils/LibtorchActorNeuralNet.h | 29 +- .../utils/LibtorchRLMiniBatchSampler.h | 8 +- .../libtorch/controls/LibtorchDRLControl.C | 113 +++--- .../trainers/LibtorchDRLControlTrainer.C | 44 +- .../SamplerNeuralNetControlTransfer.C | 5 +- .../utils/LibtorchActionDistribution.C | 23 +- .../libtorch/utils/LibtorchActorNeuralNet.C | 377 +++--------------- .../utils/LibtorchRLMiniBatchSampler.C | 6 +- .../unit/src/TestLibtorchActorNeuralNet.C | 152 +++++++ .../unit/src/TestLibtorchRLCore.C | 34 ++ 16 files changed, 451 insertions(+), 476 deletions(-) create mode 100644 framework/include/libtorch/utils/LibtorchRandomUtils.h create mode 100644 framework/src/libtorch/utils/LibtorchRandomUtils.C diff --git a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h index 1928a03376de..6d5bc88c4b22 100644 --- a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h +++ b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h @@ -114,7 +114,7 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu Real determineGain(const std::string & activation); /// Initialize the trainable weights and biases. - virtual void initializeNeuralNetwork(); + virtual void initializeNeuralNetwork(c10::optional generator = c10::nullopt); /// Store the network architecture in a json file (for debugging, visualization) void store(nlohmann::json & json) const; diff --git a/framework/include/libtorch/utils/LibtorchRandomUtils.h b/framework/include/libtorch/utils/LibtorchRandomUtils.h new file mode 100644 index 000000000000..2cb664198528 --- /dev/null +++ b/framework/include/libtorch/utils/LibtorchRandomUtils.h @@ -0,0 +1,37 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#pragma once + +#include + +#include "MooseTypes.h" + +namespace Moose +{ + +/// Create an owned CPU generator using libtorch's default seed behavior. +at::Generator makeLibtorchCPUGenerator(); + +/// Create an owned CPU generator with an explicit seed. +at::Generator makeLibtorchCPUGenerator(uint64_t seed); + +/** + * Fill a tensor with a (semi) orthogonal matrix using the provided generator. + * This mirrors torch::nn::init::orthogonal_, but avoids the ambient default RNG. + */ +void orthogonalInitializeTensor(torch::Tensor & tensor, + Real gain = 1.0, + c10::optional generator = c10::nullopt); + +} // namespace Moose + +#endif diff --git a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C index 4946ff24d7b4..ead957d43260 100644 --- a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C +++ b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C @@ -10,6 +10,7 @@ #ifdef MOOSE_LIBTORCH_ENABLED #include "LibtorchArtificialNeuralNet.h" +#include "LibtorchRandomUtils.h" #include "MooseError.h" #include "LibtorchUtils.h" @@ -278,18 +279,18 @@ LibtorchArtificialNeuralNet::determineGain(const std::string & activation) } void -LibtorchArtificialNeuralNet::initializeNeuralNetwork() +LibtorchArtificialNeuralNet::initializeNeuralNetwork(const c10::optional generator) { for (unsigned int i = 0; i < numHiddenLayers(); ++i) { const auto & activation = _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0]; const Real gain = determineGain(activation); - torch::nn::init::orthogonal_(_weights[i]->weight, gain); + Moose::orthogonalInitializeTensor(_weights[i]->weight, gain, generator); torch::nn::init::zeros_(_weights[i]->bias); } - torch::nn::init::orthogonal_(_weights.back()->weight); + Moose::orthogonalInitializeTensor(_weights.back()->weight, 1.0, generator); torch::nn::init::zeros_(_weights.back()->bias); } diff --git a/framework/src/libtorch/utils/LibtorchRandomUtils.C b/framework/src/libtorch/utils/LibtorchRandomUtils.C new file mode 100644 index 000000000000..f1c18b349dc1 --- /dev/null +++ b/framework/src/libtorch/utils/LibtorchRandomUtils.C @@ -0,0 +1,71 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "LibtorchRandomUtils.h" + +#include +#include +#include + +#include "MooseError.h" + +namespace Moose +{ + +at::Generator +makeLibtorchCPUGenerator() +{ + return at::detail::createCPUGenerator(); +} + +at::Generator +makeLibtorchCPUGenerator(const uint64_t seed) +{ + return at::detail::createCPUGenerator(seed); +} + +void +orthogonalInitializeTensor(torch::Tensor & tensor, + const Real gain, + const c10::optional generator) +{ + if (tensor.ndimension() < 2) + mooseError("Only tensors with 2 or more dimensions are supported for orthogonal " + "initialization."); + + if (!tensor.numel()) + return; + + torch::NoGradGuard no_grad; + + const auto rows = tensor.size(0); + const auto cols = tensor.numel() / rows; + auto flattened = torch::empty({rows, cols}, tensor.options()); + at::normal_out(flattened, 0.0, 1.0, {rows, cols}, generator); + + if (rows < cols) + flattened = flattened.transpose(0, 1); + + auto qr = at::linalg_qr(flattened, "reduced"); + auto q = std::get<0>(qr); + const auto phases = torch::diag(std::get<1>(qr), 0).sign(); + q = q * phases; + + if (rows < cols) + q = q.transpose(0, 1); + + tensor.view_as(q).copy_(q); + tensor.mul_(gain); +} + +} // namespace Moose + +#endif diff --git a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h index dade0dde5be5..11c1f9e4bcd6 100644 --- a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h +++ b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h @@ -48,6 +48,9 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl /// Load the actor network from the configured checkpoint file. virtual void loadControlNeuralNetFromFile() override; + /// Reset the owned policy-sampling generator to a known seed. + void setPolicySampleSeed(uint64_t seed); + protected: /// The log probability of control signals from the last evaluation of the controller std::vector _current_control_signal_log_probabilities; @@ -56,12 +59,12 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl std::vector _current_smoothed_signal; std::shared_ptr _actor_nn; + at::Generator _policy_generator; unsigned int _call_counter; const unsigned int _num_steps_in_period; const Real _smoother; const bool _stochastic; - }; #endif diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h index 4245f56b858f..119e9f7be58f 100644 --- a/modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h @@ -44,7 +44,7 @@ class LibtorchActionDistribution : public torch::nn::Module const std::vector & output_scaling_factors = {}); /// Initialize the trainable distribution parameters. - virtual void initialize() = 0; + virtual void initialize(c10::optional generator = c10::nullopt) = 0; /** * Refresh cached distribution parameters from the latest actor features. @@ -56,7 +56,7 @@ class LibtorchActionDistribution : public torch::nn::Module * Draw a stochastic action sample in physical units. * @return Sampled action tensor. */ - virtual torch::Tensor sample() const = 0; + virtual torch::Tensor sample(c10::optional generator = c10::nullopt) const = 0; /** * Return the deterministic action used for evaluation. @@ -127,11 +127,12 @@ class LibtorchGaussianActionDistribution : public LibtorchActionDistribution const std::vector & output_scaling_factors = {}, bool state_independent_std = true); - virtual void initialize() override; + virtual void initialize(c10::optional generator = c10::nullopt) override; virtual void reset(const torch::Tensor & input) override; - virtual torch::Tensor sample() const override; + virtual torch::Tensor + sample(c10::optional generator = c10::nullopt) const override; virtual torch::Tensor deterministicAction() const override; @@ -187,11 +188,12 @@ class LibtorchBetaActionDistribution : public LibtorchActionDistribution bool build_on_construct = true, const std::vector & output_scaling_factors = {}); - virtual void initialize() override; + virtual void initialize(c10::optional generator = c10::nullopt) override; virtual void reset(const torch::Tensor & input) override; - virtual torch::Tensor sample() const override; + virtual torch::Tensor + sample(c10::optional generator = c10::nullopt) const override; virtual torch::Tensor deterministicAction() const override; diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h index 2adaafd44b91..9613c56af151 100644 --- a/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h @@ -12,7 +12,6 @@ #pragma once #include -#include #include "LibtorchActionDistribution.h" #include "LibtorchArtificialNeuralNet.h" @@ -75,13 +74,15 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet * @param sampled Whether to draw a stochastic sample. * @return Action tensor produced by the actor. */ - virtual torch::Tensor evaluate(torch::Tensor & input, bool sampled); + virtual torch::Tensor evaluate(torch::Tensor & input, + bool sampled, + c10::optional generator = c10::nullopt); /** * Sample an action from the already-reset distribution. * @return Sampled action tensor. */ - virtual torch::Tensor sample(); + virtual torch::Tensor sample(c10::optional generator = c10::nullopt); /// Build the hidden layers and the matching action-distribution module. virtual void constructNeuralNetwork() override; @@ -128,7 +129,8 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet torch::Tensor entropy(); /// Initialize the hidden layers and action-distribution parameters. - virtual void initializeNeuralNetwork() override; + virtual void + initializeNeuralNetwork(c10::optional generator = c10::nullopt) override; protected: const std::vector _minimum_values; @@ -145,30 +147,13 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet void to_json(nlohmann::json & json, const Moose::LibtorchActorNeuralNet * const & network); /** - * Load an actor checkpoint written either as a state archive or TorchScript module. + * Load an actor checkpoint written as a native libtorch state archive. * @param nn Actor network that receives the loaded state. * @param filename Checkpoint file to read. */ void loadLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, const std::string & filename); -/** - * Check whether a checkpoint comes from the older serialized-parameter layout. - * @param filename Checkpoint file to inspect. - * @return True if the file matches the legacy actor format. - */ -bool isLegacyLibtorchActorArchive(const std::string & filename); - -/** - * Load a checkpoint that still uses the legacy actor serialization layout. - * @param nn Actor network that receives the loaded state. - * @param filename Checkpoint file to read. - * @param action_standard_deviations Fallback std values for older Gaussian checkpoints. - */ -void loadLegacyLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, - const std::string & filename, - const std::vector & action_standard_deviations); - } template <> diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h index 12d60cb2cda7..af3c290923d1 100644 --- a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h @@ -48,9 +48,11 @@ class LibtorchRLMiniBatchSampler * @param standardize_advantage Whether to normalize the advantages inside each chunk. * @return Vector of sampled mini-batches. */ - std::vector sample(const LibtorchRLTrajectoryBuffer::TensorBatch & batch, - unsigned int batch_size, - bool standardize_advantage) const; + std::vector + sample(const LibtorchRLTrajectoryBuffer::TensorBatch & batch, + unsigned int batch_size, + bool standardize_advantage, + c10::optional generator = c10::nullopt) const; private: /** diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C index 5de97b8e7160..0fedd805cf31 100644 --- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C +++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C @@ -10,7 +10,7 @@ #ifdef MOOSE_LIBTORCH_ENABLED #include "LibtorchDRLControl.h" -#include "TorchScriptModule.h" +#include "LibtorchRandomUtils.h" #include "Transient.h" #include "LibtorchUtils.h" @@ -23,6 +23,7 @@ LibtorchDRLControl::validParams() params.addClassDescription( "Sets the value of multiple 'Real' input parameters and postprocessors based on a Deep " "Reinforcement Learning (DRL) neural network trained using a PPO algorithm."); + params.suppressParameter("torch_script_format"); params.addParam("seed", "Seed for the random number generator."); @@ -45,11 +46,6 @@ LibtorchDRLControl::validParams() "min_control_value", {}, "Optional lower bounds for each control signal."); params.addParam>( "max_control_value", {}, "Optional upper bounds for each control signal."); - params.addParam>( - "action_standard_deviations", - {}, - "Deprecated compatibility parameter. Actor policies now learn their own action " - "distribution widths."); params.addParam( "state_independent_std", true, @@ -64,6 +60,7 @@ LibtorchDRLControl::LibtorchDRLControl(const InputParameters & parameters) _current_control_signal_log_probabilities(std::vector(_control_names.size(), 0.0)), _previous_control_signal(std::vector(_control_names.size(), 0.0)), _current_smoothed_signal(std::vector(_control_names.size(), 0.0)), + _policy_generator(Moose::makeLibtorchCPUGenerator()), _call_counter(0), _num_steps_in_period(parameters.isParamSetByUser("num_steps_in_period") ? getParam("num_steps_in_period") @@ -71,72 +68,48 @@ LibtorchDRLControl::LibtorchDRLControl(const InputParameters & parameters) _smoother(getParam("smoother")), _stochastic(getParam("stochastic")) { - // Fixing the RNG seed to make sure every experiment is the same. if (isParamValid("seed")) - torch::manual_seed(getParam("seed")); + setPolicySampleSeed(getParam("seed")); } void LibtorchDRLControl::loadControlNeuralNetFromFile() { const auto & filename = getParam("filename"); - if (getParam("torch_script_format")) - { - _actor_nn.reset(); - _nn = std::make_shared(filename); - } - else - { - unsigned int num_inputs = _response_names.size() * _input_timesteps; - unsigned int num_outputs = _control_names.size(); - std::vector num_neurons_per_layer = - getParam>("num_neurons_per_layer"); - std::vector activation_functions = - isParamSetByUser("activation_function") - ? getParam>("activation_function") - : std::vector({"relu"}); - - const std::vector & minimum_values = getParam>("min_control_value"); - const std::vector & maximum_values = getParam>("max_control_value"); - const auto input_shift_factors = - _observation_history.expandFeatureFactors(_response_shift_factors); - const auto input_scaling_factors = - _observation_history.expandFeatureFactors(_response_scaling_factors); - - auto nn = - std::make_shared(filename, - num_inputs, - num_outputs, - num_neurons_per_layer, - activation_functions, - minimum_values, - maximum_values, - torch::kCPU, - torch::kDouble, - true, - input_shift_factors, - input_scaling_factors, - _action_scaling_factors, - getParam("state_independent_std")); - - try - { - if (Moose::isLegacyLibtorchActorArchive(filename)) - Moose::loadLegacyLibtorchActorNeuralNetState( - *nn, filename, getParam>("action_standard_deviations")); - else - Moose::loadLibtorchActorNeuralNetState(*nn, filename); - } - catch (const c10::Error & e) - { - mooseError("The requested pytorch parameter file could not be loaded for the control neural " - "net.\n", - e.msg()); - } - - _actor_nn = std::make_shared(*nn); - _nn = _actor_nn; - } + unsigned int num_inputs = _response_names.size() * _input_timesteps; + unsigned int num_outputs = _control_names.size(); + std::vector num_neurons_per_layer = + getParam>("num_neurons_per_layer"); + std::vector activation_functions = + isParamSetByUser("activation_function") + ? getParam>("activation_function") + : std::vector({"relu"}); + + const std::vector & minimum_values = getParam>("min_control_value"); + const std::vector & maximum_values = getParam>("max_control_value"); + const auto input_shift_factors = + _observation_history.expandFeatureFactors(_response_shift_factors); + const auto input_scaling_factors = + _observation_history.expandFeatureFactors(_response_scaling_factors); + + _actor_nn = + std::make_shared(filename, + num_inputs, + num_outputs, + num_neurons_per_layer, + activation_functions, + minimum_values, + maximum_values, + torch::kCPU, + torch::kDouble, + true, + input_shift_factors, + input_scaling_factors, + _action_scaling_factors, + getParam("state_independent_std")); + + Moose::loadLibtorchActorNeuralNetState(*_actor_nn, filename); + _nn = _actor_nn; } void @@ -165,7 +138,7 @@ LibtorchDRLControl::execute() if (_actor_nn) { - action = _actor_nn->evaluate(input_tensor, _stochastic); + action = _actor_nn->evaluate(input_tensor, _stochastic, _policy_generator); if (_stochastic) { @@ -215,8 +188,14 @@ LibtorchDRLControl::loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNe const auto * check = dynamic_cast(&input_nn); if (!check) mooseError("This needs to be a LibtorchActorNeuralNet!"); - _nn = std::make_shared(*check); _actor_nn = std::make_shared(*check); + _nn = _actor_nn; +} + +void +LibtorchDRLControl::setPolicySampleSeed(const uint64_t seed) +{ + _policy_generator.set_current_seed(seed); } Real diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index 952fd5eb904e..23626fce7b50 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -10,6 +10,7 @@ #ifdef MOOSE_LIBTORCH_ENABLED #include "LibtorchDRLControlTrainer.h" +#include "LibtorchRandomUtils.h" #include #include @@ -118,11 +119,6 @@ LibtorchDRLControlTrainer::validParams() "min_control_value", {}, "Optional lower bounds for each control signal."); params.addParam>( "max_control_value", {}, "Optional upper bounds for each control signal."); - params.addParam>( - "action_standard_deviations", - {}, - "Deprecated compatibility parameter. Actor policies now learn their own action " - "distribution widths."); params.addParam( "state_independent_std", true, @@ -209,10 +205,6 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par getReporterPointers(_action_names, _action_value_pointers); getReporterPointers(_log_probability_names, _log_probability_value_pointers); - // Fixing the RNG seed to make sure every experiment is the same. - // Otherwise sampling / stochastic gradient descent would be different. - torch::manual_seed(_seed); - bool filename_valid = isParamValid("filename_base"); const auto input_shift_factors = _observation_history.expandFeatureFactors(_state_shift_factors); const auto input_scaling_factors = @@ -238,22 +230,8 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par // We read parameters for the control neural net if it is requested if (_read_from_file) { - try - { - if (Moose::isLegacyLibtorchActorArchive(_control_nn->name())) - Moose::loadLegacyLibtorchActorNeuralNetState( - *_control_nn, - _control_nn->name(), - getParam>("action_standard_deviations")); - else - Moose::loadLibtorchActorNeuralNetState(*_control_nn, _control_nn->name()); - _console << "Loaded requested .pt file." << std::endl; - } - catch (const c10::Error & e) - { - mooseError("The requested pytorch file could not be loaded for the control neural net.\n", - e.msg()); - } + Moose::loadLibtorchActorNeuralNetState(*_control_nn, _control_nn->name()); + _console << "Loaded requested .pt file." << std::endl; } else if (filename_valid) torch::save(_control_nn, _control_nn->name()); @@ -293,8 +271,10 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par else if (filename_valid) torch::save(_critic_nn, _critic_nn->name()); - _control_nn->initializeNeuralNetwork(); - _critic_nn->initializeNeuralNetwork(); + _control_nn->initializeNeuralNetwork( + Moose::makeLibtorchCPUGenerator(static_cast(_seed))); + _critic_nn->initializeNeuralNetwork( + Moose::makeLibtorchCPUGenerator(static_cast(_seed) + 1)); } void @@ -386,15 +366,13 @@ LibtorchDRLControlTrainer::trainController(const LibtorchRLTrajectoryBuffer::Ten // fetch the local threads which are available. if (processor_id() == 0) { - // Reset the mini-batch RNG for each outer training step so optimizer shuffling remains - // independent of how rollout sampling happened to be partitioned across MPI ranks. - torch::manual_seed(static_cast(_seed) + - static_cast(_fe_problem.timeStep())); + auto shuffle_generator = Moose::makeLibtorchCPUGenerator( + static_cast(_seed) + static_cast(_fe_problem.timeStep())); for (unsigned int epoch = 0; epoch < _num_epochs; ++epoch) { - const auto mini_batches = - _sampler.sample(batch, getParam("batch_size"), _standardize_advantage); + const auto mini_batches = _sampler.sample( + batch, getParam("batch_size"), _standardize_advantage, shuffle_generator); bool printed_losses = false; for (const auto & mini_batch : mini_batches) { diff --git a/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C b/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C index 0508de85742c..a81c9f12daaf 100644 --- a/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C +++ b/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C @@ -10,6 +10,7 @@ #ifdef MOOSE_LIBTORCH_ENABLED #include "SamplerNeuralNetControlTransfer.h" +#include "LibtorchDRLControl.h" #include "LibtorchNeuralNetControl.h" registerMooseObject("StochasticToolsApp", SamplerNeuralNetControlTransfer); @@ -117,8 +118,6 @@ SamplerNeuralNetControlTransfer::executeToMultiapp() static_cast(_global_index) + static_cast(_sampler_ptr->getNumberOfRows()) * static_cast(_fe_problem.timeStep()); - torch::manual_seed(sample_seed); - // Get the control neural net from the trainer const Moose::LibtorchArtificialNeuralNet & trainer_nn = _trainer.controlNeuralNet(); @@ -134,6 +133,8 @@ SamplerNeuralNetControlTransfer::executeToMultiapp() // Copy and the neural net and execute it to get the initial values control_object->loadControlNeuralNet(trainer_nn); + if (auto * drl_control = dynamic_cast(control_object)) + drl_control->setPolicySampleSeed(sample_seed); control_object->execute(); } } diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C index 502586fde54b..992b907ec7f5 100644 --- a/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C @@ -11,6 +11,7 @@ #include "LibtorchActionDistribution.h" +#include "LibtorchRandomUtils.h" #include "LibtorchUtils.h" #include "MooseError.h" @@ -128,9 +129,9 @@ LibtorchGaussianActionDistribution::constructDistribution() } void -LibtorchGaussianActionDistribution::initialize() +LibtorchGaussianActionDistribution::initialize(const c10::optional generator) { - torch::nn::init::orthogonal_(_mean_module->weight); + Moose::orthogonalInitializeTensor(_mean_module->weight, 1.0, generator); torch::nn::init::zeros_(_mean_module->bias); if (_state_independent_std) @@ -140,7 +141,7 @@ LibtorchGaussianActionDistribution::initialize() return; } - torch::nn::init::orthogonal_(_std_module->weight); + Moose::orthogonalInitializeTensor(_std_module->weight, 1.0, generator); torch::nn::init::zeros_(_std_module->bias); } @@ -165,9 +166,9 @@ LibtorchGaussianActionDistribution::reset(const torch::Tensor & input) } torch::Tensor -LibtorchGaussianActionDistribution::sample() const +LibtorchGaussianActionDistribution::sample(const c10::optional generator) const { - return at::normal(_mean, _std_tensor) * actionScaleTensor(); + return at::normal(_mean, _std_tensor, generator) * actionScaleTensor(); } torch::Tensor @@ -244,12 +245,12 @@ LibtorchBetaActionDistribution::constructDistribution() } void -LibtorchBetaActionDistribution::initialize() +LibtorchBetaActionDistribution::initialize(const c10::optional generator) { - torch::nn::init::orthogonal_(_alpha_module->weight); + Moose::orthogonalInitializeTensor(_alpha_module->weight, 1.0, generator); torch::nn::init::zeros_(_alpha_module->bias); - torch::nn::init::orthogonal_(_beta_module->weight); + Moose::orthogonalInitializeTensor(_beta_module->weight, 1.0, generator); torch::nn::init::zeros_(_beta_module->bias); } @@ -268,10 +269,10 @@ LibtorchBetaActionDistribution::reset(const torch::Tensor & input) } torch::Tensor -LibtorchBetaActionDistribution::sample() const +LibtorchBetaActionDistribution::sample(const c10::optional generator) const { - const auto alpha_sample = at::_standard_gamma(_alpha_tensor); - const auto beta_sample = at::_standard_gamma(_beta_tensor); + const auto alpha_sample = at::_standard_gamma(_alpha_tensor, generator); + const auto beta_sample = at::_standard_gamma(_beta_tensor, generator); const auto sampled = alpha_sample / (alpha_sample + beta_sample); return (_min_tensor + (_max_tensor - _min_tensor) * sampled) * actionScaleTensor(); } diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C index 90e7e9983976..75371eef0db0 100644 --- a/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C @@ -10,244 +10,49 @@ #ifdef MOOSE_LIBTORCH_ENABLED #include "LibtorchActorNeuralNet.h" +#include "LibtorchRandomUtils.h" #include "MooseError.h" +#include "libmesh/utility.h" namespace { -/** - * Try to read one tensor from a plain libtorch archive. - * @param archive Archive being read. - * @param key Serialized tensor name. - * @param tensor Tensor that receives the loaded data. - * @return True when the tensor was found and loaded. - */ -bool -readArchiveTensor(torch::serialize::InputArchive & archive, - const std::string & key, - torch::Tensor & tensor) -{ - try - { - archive.read(key, tensor); - return true; - } - catch (const c10::Error &) - { - return false; - } -} - -/** - * Copy a stored tensor into an existing parameter or buffer. - * @param destination Tensor owned by the current module. - * @param source Tensor read from disk. - */ -void -copyTensor(torch::Tensor & destination, const torch::Tensor & source) -{ - destination.data().copy_(source.to(destination.options())); -} - -/** - * Read an actor tensor, while still accepting the legacy action_head.* prefix. - * @param archive Archive being read. - * @param key Serialized tensor name expected by the current actor. - * @param tensor Tensor that receives the loaded data. - * @return True when the tensor was found and loaded. - */ -bool -readActorStateTensor(torch::serialize::InputArchive & archive, - const std::string & key, - torch::Tensor & tensor) -{ - if (readArchiveTensor(archive, key, tensor)) - return true; - - if (key.rfind("action_head.", 0) == 0) - return readArchiveTensor(archive, key.substr(std::string("action_head.").size()), tensor); - - return false; -} - -/// Return true for actor buffers that older checkpoints may legitimately omit. -bool -isOptionalActorBuffer(const std::string & key) -{ - return key == "input_shift" || key == "input_scale" || key == "output_scale" || - key == "action_head.action_scale"; -} - -/// Return true for actor parameters that older checkpoints may legitimately omit. -bool -isOptionalActorParameter(const std::string & key) -{ - return key == "action_head.mean.bias" || key == "action_head.std.bias"; -} - -/** - * Look up one named tensor in a torch named-parameter or named-buffer list. - * @param tensors Torch named tensor list. - * @param key Tensor name to search for. - * @param tensor Tensor that receives the match. - * @return True when the requested tensor exists. - */ -template -bool -findNamedTensor(const NamedTensorList & tensors, const std::string & key, torch::Tensor & tensor) -{ - for (const auto & entry : tensors) - if (entry.name == key) - { - tensor = entry.value; - return true; - } - - return false; -} - -/** - * Read one tensor from a scripted actor checkpoint, with legacy action_head.* fallback. - * @param tensors Scripted parameter or buffer list. - * @param key Serialized tensor name expected by the current actor. - * @param tensor Tensor that receives the loaded data. - * @return True when the tensor was found and loaded. - */ template -bool -readScriptedActorStateTensor(const NamedTensorList & tensors, - const std::string & key, - torch::Tensor & tensor) +auto +captureTensorShapes(const NamedTensorList & tensors) { - if (findNamedTensor(tensors, key, tensor)) - return true; - - if (key.rfind("action_head.", 0) == 0) - return findNamedTensor(tensors, key.substr(std::string("action_head.").size()), tensor); - - return false; -} - -/** - * Load actor parameters and buffers from a plain libtorch archive. - * @param nn Actor that receives the loaded state. - * @param filename Checkpoint file to read. - * @param error Human-readable error string filled on failure. - * @return True when the actor was loaded successfully. - */ -bool -loadActorStateFromArchive(Moose::LibtorchActorNeuralNet & nn, - const std::string & filename, - std::string & error) -{ - try - { - torch::serialize::InputArchive archive; - archive.load_from(filename); + std::vector>> shapes; + shapes.reserve(tensors.size()); - for (auto & parameter : nn.named_parameters()) - { - torch::Tensor stored_tensor; - if (!readActorStateTensor(archive, parameter.key(), stored_tensor)) - { - if (isOptionalActorParameter(parameter.key())) - { - parameter.value().data().zero_(); - continue; - } - - error = "Missing serialized parameter: " + parameter.key(); - return false; - } - - copyTensor(parameter.value(), stored_tensor); - } - - for (auto & buffer : nn.named_buffers()) - { - torch::Tensor stored_tensor; - if (!readActorStateTensor(archive, buffer.key(), stored_tensor)) - { - if (isOptionalActorBuffer(buffer.key())) - continue; - - error = "Missing serialized buffer: " + buffer.key(); - return false; - } - - copyTensor(buffer.value(), stored_tensor); - } + for (const auto & tensor : tensors) + shapes.emplace_back( + tensor.key(), + std::vector(tensor.value().sizes().begin(), tensor.value().sizes().end())); - nn.synchronizeAffineFactorsFromBuffers(); - nn.actionDistribution().synchronizeScalingFactorsFromBuffer(); - return true; - } - catch (const c10::Error & e) - { - error = e.msg(); - return false; - } + return shapes; } -/** - * Load actor parameters and buffers from a scripted Torch module. - * @param nn Actor that receives the loaded state. - * @param filename Checkpoint file to read. - * @param error Human-readable error string filled on failure. - * @return True when the actor was loaded successfully. - */ -bool -loadActorStateFromTorchScript(Moose::LibtorchActorNeuralNet & nn, - const std::string & filename, - std::string & error) +template +void +verifyTensorShapes(const NamedTensorList & tensors, + const std::vector>> & expected, + const char * tensor_kind) { - try - { - const auto scripted = torch::jit::load(filename); - const auto scripted_parameters = scripted.named_parameters(); - const auto scripted_buffers = scripted.named_buffers(); - - for (auto & parameter : nn.named_parameters()) - { - torch::Tensor stored_tensor; - if (!readScriptedActorStateTensor(scripted_parameters, parameter.key(), stored_tensor)) - { - if (isOptionalActorParameter(parameter.key())) - { - parameter.value().data().zero_(); - continue; - } - - error = "Missing scripted parameter: " + parameter.key(); - return false; - } - - copyTensor(parameter.value(), stored_tensor); - } - - for (auto & buffer : nn.named_buffers()) - { - torch::Tensor stored_tensor; - if (!readScriptedActorStateTensor(scripted_buffers, buffer.key(), stored_tensor)) - { - if (isOptionalActorBuffer(buffer.key())) - continue; - - error = "Missing scripted buffer: " + buffer.key(); - return false; - } - - copyTensor(buffer.value(), stored_tensor); - } + if (tensors.size() != expected.size()) + mooseError("The loaded DRL actor ", tensor_kind, " count does not match the generated schema."); - nn.synchronizeAffineFactorsFromBuffers(); - nn.actionDistribution().synchronizeScalingFactorsFromBuffer(); - return true; - } - catch (const c10::Error & e) + for (const auto tensor_i : make_range(tensors.size())) { - error = e.msg(); - return false; + const auto actual_shape = std::vector(tensors[tensor_i].value().sizes().begin(), + tensors[tensor_i].value().sizes().end()); + + if (tensors[tensor_i].key() != expected[tensor_i].first || + actual_shape != expected[tensor_i].second) + mooseError("The loaded DRL actor ", + tensor_kind, + " '", + tensors[tensor_i].key(), + "' does not match the generated schema."); } } @@ -331,18 +136,18 @@ LibtorchActorNeuralNet::LibtorchActorNeuralNet(const Moose::LibtorchActorNeuralN } void -LibtorchActorNeuralNet::initializeNeuralNetwork() +LibtorchActorNeuralNet::initializeNeuralNetwork(const c10::optional generator) { for (unsigned int i = 0; i < numHiddenLayers(); ++i) { const auto & activation = _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0]; const Real gain = determineGain(activation); - torch::nn::init::orthogonal_(_weights[i]->weight, gain); + Moose::orthogonalInitializeTensor(_weights[i]->weight, gain, generator); torch::nn::init::zeros_(_weights[i]->bias); } - _action_distribution->initialize(); + _action_distribution->initialize(generator); } void @@ -488,7 +293,9 @@ LibtorchActorNeuralNet::forward(const torch::Tensor & x) } torch::Tensor -LibtorchActorNeuralNet::evaluate(torch::Tensor & x, bool sampled) +LibtorchActorNeuralNet::evaluate(torch::Tensor & x, + const bool sampled, + const c10::optional generator) { torch::Tensor output = forward(x); @@ -496,15 +303,15 @@ LibtorchActorNeuralNet::evaluate(torch::Tensor & x, bool sampled) resetDistributionParams(output); if (sampled) - return sample(); + return sample(generator); return _action_distribution->deterministicAction(); } torch::Tensor -LibtorchActorNeuralNet::sample() +LibtorchActorNeuralNet::sample(const c10::optional generator) { - return _action_distribution->sample(); + return _action_distribution->sample(generator); } torch::Tensor @@ -515,108 +322,28 @@ LibtorchActorNeuralNet::logProbability(const torch::Tensor & action) void loadLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, const std::string & filename) -{ - std::string archive_error; - if (loadActorStateFromArchive(nn, filename, archive_error)) - return; - - std::string torchscript_error; - if (loadActorStateFromTorchScript(nn, filename, torchscript_error)) - return; - - mooseError("The requested pytorch parameter file could not be loaded. This can either be " - "the result of the file not existing or a misalignment in the generated " - "container and the data in the file. Make sure the dimensions of the generated " - "neural net are the same as the dimensions of the parameters in the input file!\n" - "InputArchive load failed with: ", - archive_error, - "\nTorchScript load failed with: ", - torchscript_error); -} - -bool -isLegacyLibtorchActorArchive(const std::string & filename) { try { - const auto scripted = torch::jit::load(filename); - const auto parameters = scripted.named_parameters(); + const auto expected_parameters = captureTensorShapes(nn.named_parameters()); + const auto expected_buffers = captureTensorShapes(nn.named_buffers()); - torch::Tensor ignored; - return findNamedTensor(parameters, "output_layer_.weight", ignored) && - !findNamedTensor(parameters, "action_head.mean.weight", ignored); - } - catch (const c10::Error &) - { - return false; - } -} - -void -loadLegacyLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, - const std::string & filename, - const std::vector & action_standard_deviations) -{ - if (nn.actionDistribution().isBounded()) - mooseError("Legacy deterministic DRL checkpoints are only supported for unbounded actors."); - - const auto legacy_std = action_standard_deviations.empty() - ? std::vector(nn.numOutputs(), 1e-12) - : action_standard_deviations; - - if (legacy_std.size() != nn.numOutputs()) - mooseError("The number of action_standard_deviations entries must match the number of action " - "outputs when loading a legacy deterministic DRL checkpoint."); - - for (const auto std_value : legacy_std) - if (!(std_value > 0.0)) - mooseError("Legacy action_standard_deviations entries must be strictly positive."); + torch::serialize::InputArchive archive; + archive.load_from(filename); + nn.load(archive); - const auto scripted = torch::jit::load(filename); - const auto legacy_parameters = scripted.named_parameters(); + verifyTensorShapes(nn.named_parameters(), expected_parameters, "parameter"); + verifyTensorShapes(nn.named_buffers(), expected_buffers, "buffer"); - for (auto & parameter : nn.named_parameters()) + nn.synchronizeAffineFactorsFromBuffers(); + nn.actionDistribution().synchronizeScalingFactorsFromBuffer(); + } + catch (const c10::Error & e) { - const auto & key = parameter.key(); - torch::Tensor stored_tensor; - - if (key == "action_head.mean.weight") - { - if (!findNamedTensor(legacy_parameters, "output_layer_.weight", stored_tensor)) - mooseError("Legacy deterministic DRL checkpoint is missing output_layer_.weight."); - copyTensor(parameter.value(), stored_tensor); - continue; - } - - if (key == "action_head.mean.bias") - { - if (!findNamedTensor(legacy_parameters, "output_layer_.bias", stored_tensor)) - mooseError("Legacy deterministic DRL checkpoint is missing output_layer_.bias."); - copyTensor(parameter.value(), stored_tensor); - continue; - } - - if (key == "action_head.std.weight") - { - parameter.value().data().zero_(); - continue; - } - - if (key == "action_head.std.bias") - { - auto log_std = torch::log(torch::tensor(legacy_std, parameter.value().options())); - copyTensor(parameter.value(), log_std); - continue; - } - - if (!findNamedTensor(legacy_parameters, key, stored_tensor)) - mooseError("Legacy deterministic DRL checkpoint is missing serialized parameter: ", key); - - copyTensor(parameter.value(), stored_tensor); + mooseError("The requested DRL actor checkpoint could not be loaded as a native libtorch " + "archive. Make sure the file exists and matches the generated actor schema.\n", + e.msg()); } - - nn.synchronizeAffineFactorsFromBuffers(); - nn.actionDistribution().synchronizeScalingFactorsFromBuffer(); } } diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchRLMiniBatchSampler.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLMiniBatchSampler.C index bebec8f139ac..8bf6ec7381ca 100644 --- a/modules/stochastic_tools/src/libtorch/utils/LibtorchRLMiniBatchSampler.C +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLMiniBatchSampler.C @@ -18,7 +18,8 @@ std::vector LibtorchRLMiniBatchSampler::sample(const LibtorchRLTrajectoryBuffer::TensorBatch & batch, const unsigned int batch_size, - const bool standardize_advantage) const + const bool standardize_advantage, + const c10::optional generator) const { std::vector mini_batches; @@ -28,7 +29,8 @@ LibtorchRLMiniBatchSampler::sample(const LibtorchRLTrajectoryBuffer::TensorBatch validateBatch(batch); const auto effective_batch_size = std::max(1, batch_size); - auto permutation = torch::randperm(batch.size(), torch::TensorOptions().dtype(torch::kLong)); + auto permutation = + at::randperm(batch.size(), generator, torch::TensorOptions().dtype(torch::kLong)); for (std::int64_t batch_begin = 0; batch_begin < batch.size(); batch_begin += effective_batch_size) diff --git a/modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C b/modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C index 23a53d615e9b..a581518ea4d4 100644 --- a/modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C +++ b/modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C @@ -11,6 +11,7 @@ #include "gtest/gtest.h" #include "LibtorchActorNeuralNet.h" +#include "LibtorchRandomUtils.h" #include "MooseUnitUtils.h" #include @@ -191,6 +192,85 @@ TEST(LibtorchActorNeuralNetTest, gaussianActorCanUseStateDependentStdWhenRequest EXPECT_GT(second_std, first_std); } +TEST(LibtorchActorNeuralNetTest, explicitGeneratorKeepsGaussianSamplingStableAcrossCopies) +{ + TestableLibtorchActorNeuralNet original("test_gaussian", + 1, + 1, + {}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {0.0}, + {1.0}, + {1.0}); + + original.gaussianActionDistribution().meanModule()->weight.data().fill_(0.75); + original.gaussianActionDistribution().meanModule()->bias.data().fill_(1.25); + original.gaussianActionDistribution().stdModule()->weight.data().fill_(0.0); + original.gaussianActionDistribution().stdModule()->bias.data().fill_(std::log(0.5)); + + TestableLibtorchActorNeuralNet copied(original); + + auto original_input = torch::tensor({{2.0}}, at::kDouble); + auto copied_input = torch::tensor({{2.0}}, at::kDouble); + auto original_generator = Moose::makeLibtorchCPUGenerator(12345); + auto copied_generator = Moose::makeLibtorchCPUGenerator(12345); + + const auto original_action = original.evaluate(original_input, true, original_generator); + const auto copied_action = copied.evaluate(copied_input, true, copied_generator); + + EXPECT_TRUE(torch::allclose(original_action, copied_action, /* rtol = */ 0.0, /* atol = */ 0.0)); +} + +TEST(LibtorchActorNeuralNetTest, explicitGeneratorMakesInitializationIndependentOfConstructionOrder) +{ + TestableLibtorchActorNeuralNet first("first_actor", + 2, + 1, + {3}, + {"relu"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {0.0, 0.0}, + {1.0, 1.0}, + {1.0}); + TestableLibtorchActorNeuralNet second("second_actor", + 2, + 1, + {3}, + {"relu"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {0.0, 0.0}, + {1.0, 1.0}, + {1.0}); + + first.initializeNeuralNetwork(Moose::makeLibtorchCPUGenerator(2468)); + second.initializeNeuralNetwork(Moose::makeLibtorchCPUGenerator(2468)); + + const auto first_parameters = first.named_parameters(); + const auto second_parameters = second.named_parameters(); + ASSERT_EQ(first_parameters.size(), second_parameters.size()); + for (const auto i : index_range(first_parameters)) + { + EXPECT_EQ(first_parameters[i].key(), second_parameters[i].key()); + EXPECT_TRUE(torch::allclose(first_parameters[i].value(), + second_parameters[i].value(), + /* rtol = */ 0.0, + /* atol = */ 0.0)); + } +} + TEST(LibtorchActorNeuralNetTest, loadActorStateAcceptsTorchSaveArchive) { TestableLibtorchActorNeuralNet saved("saved_actor", @@ -267,4 +347,76 @@ TEST(LibtorchActorNeuralNetTest, loadActorStateAcceptsTorchSaveArchive) /*atol=*/0.0)); } +TEST(LibtorchActorNeuralNetTest, loadActorStateRejectsHiddenLayerMismatch) +{ + TestableLibtorchActorNeuralNet saved("saved_actor", + 2, + 1, + {2}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {1.0, -2.0}, + {0.5, 3.0}, + {4.0}); + + Moose::UnitUtils::TempFile archive; + torch::save(std::make_shared(saved), archive.path().string()); + + TestableLibtorchActorNeuralNet restored("restored_actor", + 2, + 1, + {3}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {1.0, -2.0}, + {0.5, 3.0}, + {4.0}); + + EXPECT_ANY_THROW(Moose::loadLibtorchActorNeuralNetState(restored, archive.path().string())); +} + +TEST(LibtorchActorNeuralNetTest, loadActorStateRejectsBoundednessMismatch) +{ + TestableLibtorchActorNeuralNet saved("saved_actor", + 1, + 1, + {2}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {1.0}, + {2.0}, + {1.0}); + + Moose::UnitUtils::TempFile archive; + torch::save(std::make_shared(saved), archive.path().string()); + + TestableLibtorchActorNeuralNet restored("restored_actor", + 1, + 1, + {2}, + {"linear"}, + {-2.0}, + {2.0}, + torch::kCPU, + torch::kDouble, + true, + {1.0}, + {2.0}, + {1.0}); + + EXPECT_ANY_THROW(Moose::loadLibtorchActorNeuralNetState(restored, archive.path().string())); +} + #endif diff --git a/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C b/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C index 6d5605f8b69d..c5b4c0bb892f 100644 --- a/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C +++ b/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C @@ -14,6 +14,7 @@ #include "LibtorchActorNeuralNet.h" #include "LibtorchArtificialNeuralNet.h" #include "LibtorchObservationHistory.h" +#include "LibtorchRandomUtils.h" #include "LibtorchRLMiniBatchSampler.h" #include "LibtorchRLPPOLoss.h" #include "LibtorchRLTrajectoryBuffer.h" @@ -126,6 +127,39 @@ TEST(LibtorchRLCoreTest, MiniBatchSamplerStandardizesAdvantagesPerBatch) } } +TEST(LibtorchRLCoreTest, MiniBatchSamplerUsesExplicitGeneratorForDeterministicShuffling) +{ + LibtorchRLTrajectoryBuffer::TensorBatch batch; + batch.observations = + torch::tensor({{0.0}, {1.0}, {2.0}, {3.0}}, torch::TensorOptions().dtype(torch::kDouble)); + batch.actions = + torch::tensor({{0.1}, {0.2}, {0.3}, {0.4}}, torch::TensorOptions().dtype(torch::kDouble)); + batch.log_probabilities = + torch::tensor({{-1.0}, {-1.1}, {-1.2}, {-1.3}}, torch::TensorOptions().dtype(torch::kDouble)); + batch.value_targets = + torch::tensor({{1.0}, {2.0}, {3.0}, {4.0}}, torch::TensorOptions().dtype(torch::kDouble)); + batch.advantages = + torch::tensor({{1.0}, {2.0}, {3.0}, {4.0}}, torch::TensorOptions().dtype(torch::kDouble)); + + LibtorchRLMiniBatchSampler sampler; + const auto first_batches = sampler.sample(batch, 2, false, Moose::makeLibtorchCPUGenerator(9876)); + const auto second_batches = + sampler.sample(batch, 2, false, Moose::makeLibtorchCPUGenerator(9876)); + + ASSERT_EQ(first_batches.size(), second_batches.size()); + for (const auto i : index_range(first_batches)) + { + EXPECT_TRUE(torch::allclose(first_batches[i].observations, + second_batches[i].observations, + /* rtol = */ 0.0, + /* atol = */ 0.0)); + EXPECT_TRUE(torch::allclose(first_batches[i].actions, + second_batches[i].actions, + /* rtol = */ 0.0, + /* atol = */ 0.0)); + } +} + } // namespace #endif From 0af111c7643beeb551931ec34a2d8f3f39f2908f Mon Sep 17 00:00:00 2001 From: Peter German Date: Fri, 24 Apr 2026 15:25:03 -0600 Subject: [PATCH 42/51] Restrict sampler controller transfer. --- .../libtorch_drl_control_trainer.i | 2 +- ...Transfer.h => SamplerDRLControlTransfer.h} | 4 +- ...Transfer.C => SamplerDRLControlTransfer.C} | 43 ++++++++---------- .../gold/parameter_read.csv | 20 ++++---- .../libtorch_drl_control.i | 7 +-- .../libtorch_drl_control/mynet_control.net | Bin 6348 -> 8633 bytes .../libtorch_nn_transfer/control.net_best | Bin 0 -> 8633 bytes .../libtorch_drl_control_sub.i | 1 - .../libtorch_drl_control_trainer.i | 1 - 9 files changed, 35 insertions(+), 43 deletions(-) rename modules/stochastic_tools/include/libtorch/transfers/{SamplerNeuralNetControlTransfer.h => SamplerDRLControlTransfer.h} (89%) rename modules/stochastic_tools/src/libtorch/transfers/{SamplerNeuralNetControlTransfer.C => SamplerDRLControlTransfer.C} (74%) create mode 100644 modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/control.net_best diff --git a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i index 42d6f52a8ea5..e4585c805b2d 100644 --- a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i +++ b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i @@ -19,7 +19,7 @@ [Transfers] [nn_transfer] - type = SamplerNeuralNetControlTransfer + type = SamplerDRLControlTransfer to_multi_app = runner trainer_name = nn_trainer control_name = src_control diff --git a/modules/stochastic_tools/include/libtorch/transfers/SamplerNeuralNetControlTransfer.h b/modules/stochastic_tools/include/libtorch/transfers/SamplerDRLControlTransfer.h similarity index 89% rename from modules/stochastic_tools/include/libtorch/transfers/SamplerNeuralNetControlTransfer.h rename to modules/stochastic_tools/include/libtorch/transfers/SamplerDRLControlTransfer.h index aa887d39cb01..0b97a480c159 100644 --- a/modules/stochastic_tools/include/libtorch/transfers/SamplerNeuralNetControlTransfer.h +++ b/modules/stochastic_tools/include/libtorch/transfers/SamplerDRLControlTransfer.h @@ -17,7 +17,7 @@ #include "StochasticToolsTransfer.h" #include "SurrogateModelInterface.h" -class SamplerNeuralNetControlTransfer : public StochasticToolsTransfer, public SurrogateModelInterface +class SamplerDRLControlTransfer : public StochasticToolsTransfer, public SurrogateModelInterface { public: static InputParameters validParams(); @@ -26,7 +26,7 @@ class SamplerNeuralNetControlTransfer : public StochasticToolsTransfer, public S * Build the transfer that pushes a trained controller into subapps. * @param parameters Input parameters for the transfer. */ - SamplerNeuralNetControlTransfer(const InputParameters & parameters); + SamplerDRLControlTransfer(const InputParameters & parameters); /// Execute the transfer in the standard non-batch path. virtual void execute() override; diff --git a/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C b/modules/stochastic_tools/src/libtorch/transfers/SamplerDRLControlTransfer.C similarity index 74% rename from modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C rename to modules/stochastic_tools/src/libtorch/transfers/SamplerDRLControlTransfer.C index a81c9f12daaf..b288d79f0e1a 100644 --- a/modules/stochastic_tools/src/libtorch/transfers/SamplerNeuralNetControlTransfer.C +++ b/modules/stochastic_tools/src/libtorch/transfers/SamplerDRLControlTransfer.C @@ -9,20 +9,20 @@ #ifdef MOOSE_LIBTORCH_ENABLED -#include "SamplerNeuralNetControlTransfer.h" +#include "SamplerDRLControlTransfer.h" #include "LibtorchDRLControl.h" -#include "LibtorchNeuralNetControl.h" -registerMooseObject("StochasticToolsApp", SamplerNeuralNetControlTransfer); +registerMooseObject("StochasticToolsApp", SamplerDRLControlTransfer); InputParameters -SamplerNeuralNetControlTransfer::validParams() +SamplerDRLControlTransfer::validParams() { InputParameters params = StochasticToolsTransfer::validParams(); params += SurrogateModelInterface::validParams(); - params.addClassDescription("Copies a neural network from a trainer object on the main app to a " - "LibtorchNeuralNetControl object on the subapp."); + params.addClassDescription( + "Copies a DRL actor from a trainer object on the main app to a LibtorchDRLControl object " + "on the subapp."); params.suppressParameter("from_multi_app"); @@ -32,7 +32,7 @@ SamplerNeuralNetControlTransfer::validParams() return params; } -SamplerNeuralNetControlTransfer::SamplerNeuralNetControlTransfer(const InputParameters & parameters) +SamplerDRLControlTransfer::SamplerDRLControlTransfer(const InputParameters & parameters) : StochasticToolsTransfer(parameters), SurrogateModelInterface(this), _control_name(getParam("control_name")), @@ -42,12 +42,12 @@ SamplerNeuralNetControlTransfer::SamplerNeuralNetControlTransfer(const InputPara } void -SamplerNeuralNetControlTransfer::initialSetup() +SamplerDRLControlTransfer::initialSetup() { } void -SamplerNeuralNetControlTransfer::execute() +SamplerDRLControlTransfer::execute() { const auto n = getToMultiApp()->numGlobalApps(); for (MooseIndex(n) i = 0; i < n; i++) @@ -63,11 +63,10 @@ SamplerNeuralNetControlTransfer::execute() FEProblemBase & app_problem = _multi_app->appProblemBase(i); auto & control_warehouse = app_problem.getControlWarehouse(); std::shared_ptr control_ptr = control_warehouse.getActiveObject(_control_name); - LibtorchNeuralNetControl * control_object = - dynamic_cast(control_ptr.get()); + LibtorchDRLControl * control_object = dynamic_cast(control_ptr.get()); if (!control_object) - paramError("control_name", "The given control is not a LibtorchNeuralNetrControl!"); + paramError("control_name", "The given control is not a LibtorchDRLControl!"); // Copy and the neural net and execute it to get the initial values control_object->loadControlNeuralNet(trainer_nn); @@ -87,27 +86,27 @@ SamplerNeuralNetControlTransfer::execute() } void -SamplerNeuralNetControlTransfer::initializeFromMultiapp() +SamplerDRLControlTransfer::initializeFromMultiapp() { } void -SamplerNeuralNetControlTransfer::executeFromMultiapp() +SamplerDRLControlTransfer::executeFromMultiapp() { } void -SamplerNeuralNetControlTransfer::finalizeFromMultiapp() +SamplerDRLControlTransfer::finalizeFromMultiapp() { } void -SamplerNeuralNetControlTransfer::initializeToMultiapp() +SamplerDRLControlTransfer::initializeToMultiapp() { } void -SamplerNeuralNetControlTransfer::executeToMultiapp() +SamplerDRLControlTransfer::executeToMultiapp() { if (getToMultiApp()->hasLocalApp(_app_index)) { @@ -125,22 +124,20 @@ SamplerNeuralNetControlTransfer::executeToMultiapp() FEProblemBase & app_problem = _multi_app->appProblemBase(_app_index); auto & control_warehouse = app_problem.getControlWarehouse(); std::shared_ptr control_ptr = control_warehouse.getActiveObject(_control_name); - LibtorchNeuralNetControl * control_object = - dynamic_cast(control_ptr.get()); + LibtorchDRLControl * control_object = dynamic_cast(control_ptr.get()); if (!control_object) - paramError("control_name", "The given control is not a LibtorchNeuralNetrControl!"); + paramError("control_name", "The given control is not a LibtorchDRLControl!"); // Copy and the neural net and execute it to get the initial values control_object->loadControlNeuralNet(trainer_nn); - if (auto * drl_control = dynamic_cast(control_object)) - drl_control->setPolicySampleSeed(sample_seed); + control_object->setPolicySampleSeed(sample_seed); control_object->execute(); } } void -SamplerNeuralNetControlTransfer::finalizeToMultiapp() +SamplerDRLControlTransfer::finalizeToMultiapp() { } diff --git a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/gold/parameter_read.csv b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/gold/parameter_read.csv index 1df81f626a79..cd7202f5163b 100644 --- a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/gold/parameter_read.csv +++ b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/gold/parameter_read.csv @@ -1,12 +1,12 @@ time,center_temp,center_temp_tend,env_temp,left_flux,log_prob_left_flux 0,300,300,273,0,0 -1800,300,271.57673855843,273.98104693845,-18.334498394569,-4.6349479692145 -3600,271.57673855843,272.46057412724,274.9578928833,-18.93250347727,-4.5957497134515 -5400,272.46057412724,272.899016984,275.92635483024,-22.95326764291,-3.9922233679219 -7200,272.899016984,269.81794459597,276.88228567654,-53.578809923663,-4.7726707362022 -9000,269.81794459597,279.64597914382,277.82159197955,13.875634892825,-6.0670584579465 -10800,279.64597914382,276.42458720301,278.74025148548,-17.573135974458,-4.0196727079328 -12600,276.42458720301,280.14763817482,279.63433035329,3.9077421760795,-5.2736274640494 -14400,280.14763817482,272.95496522133,280.5,-57.241609291066,-4.7661592246168 -16200,272.95496522133,276.2597915218,281.33355349529,-38.457322485066,-3.9868025342074 -18000,276.2597915218,278.57505613194,282.13142143513,-26.956170196589,-3.929639370556 +1800,300,283.69849346609,273.98104693845,73.630502862132,-5.8011541981082 +3600,283.69849346609,300.67435328161,274.9578928833,195.06575617849,-7.4224257273223 +5400,300.67435328161,267.4643690803,275.92635483024,-64.308701931811,-5.7737848258126 +7200,267.4643690803,259.55941422837,276.88228567654,-131.38405718619,-6.3770948269327 +9000,259.55941422837,272.14524323322,277.82159197955,-42.985660495371,-5.6608246378745 +10800,272.14524323322,275.68007219983,278.74025148548,-23.188639562158,-5.5654529493672 +12600,275.68007219983,282.82714752336,279.63433035329,24.239841099254,-5.5594090393934 +14400,282.82714752336,286.84577029599,280.5,48.132997254754,-5.6461974402188 +16200,286.84577029599,287.23729606816,281.33355349529,44.765462438375,-5.6305436912643 +18000,287.23729606816,269.39326109815,282.13142143513,-96.664605550218,-5.9868706945221 diff --git a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/libtorch_drl_control.i b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/libtorch_drl_control.i index e4621ac75239..e36c774cf4e6 100644 --- a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/libtorch_drl_control.i +++ b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/libtorch_drl_control.i @@ -98,12 +98,10 @@ input_timesteps = 2 response_scaling_factors = '0.03 0.03' response_shift_factors = '270 270' - action_standard_deviations = '0.1' - action_scaling_factors = 200 + action_scaling_factors = 100 filename = 'mynet_control.net' - torch_script_format = false - num_neurons_per_layer = '16 6' + num_neurons_per_layer = '4 2' activation_function = 'relu' execute_on = 'TIMESTEP_BEGIN' @@ -116,7 +114,6 @@ input_timesteps = 2 response_scaling_factors = '0.03 0.03' response_shift_factors = '270 270' - action_standard_deviations = '0.1' action_scaling_factors = 100 execute_on = 'TIMESTEP_BEGIN' diff --git a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/mynet_control.net b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/mynet_control.net index 66f9c6f3bd3ee5c7b2be5997dea2d464434960af..ea50e03c46efaac6866c25d0c5e288916ed32344 100644 GIT binary patch literal 8633 zcmeHMYj6|S6<$exMcBwM92*B)F@eZn^p+)&7#S5rTmccb0mT`zvyv8TiI8-qm9aZ@ zYtnI`oe4}yrX7+r<7t{HosuabeGnK>O8SS8Olt}>nIx@4NJv6UFf=U_8tA>Nm9%$d z$LG)>lXy|R42^L^*syJydybvb241W{H-WE``JTEZI&vXM|gd(Q3_cNk-=U__i|Y9GSB$d7CXjy}u& z^|dRuj{QHl`cK`9apd*r7wsQYKIlEQFq2hzddwy$+`DA|G)j7th*Cx zLnD_yk^k+;|LY&p*8btN|H$8ylZu8)XbX*8_KbKu%?`V^;C%!s;Bia)*K#ZScg3t_ z`+mP?9IgM@;XhsC;(7Xz^~MbRx6Z`>-=5$957%y%<8@TsT`~Rjt03;dnBP1nvgFRpyhyQ-9D^x&w^oLg5elTM*YlY7D&9@JT)0(nLt!d zMrcor3HV5s4n{)}azHN!QkBYG3Sm(=lj^5|ECG?P2*o@Bx+n3i92B`FaFP}jyJhg~ z4aY!>N}YttDS;o+ZwdQ-uQwD3&|a1a1*7u&BR)Dp`&KYsRu0NqyV-7Vn^2>d z3eZ6AhZ6iUe}X{=X89AA%SD#`2?G`Gaws4aV>vH53$rl`dZ2(7Bv4yr!h$5Jjki$c zo((Pd8J~|1k^yRnj*vP~osOxROon$W73>etq&5}h8ZOEu5WbCO`u!}Z#i$Kn4y3Mx z)OCouhzm;*)UuPOzTFKg2g1WpQBaQ|ji7;ddoG7Wl2fS0m?u-R9^8>bvMwcg9+K?G zb(%mEWGjO@n-QBdL#NV@%WDUBLVy$kS~vhEZ$6ZVHXg;TI_ak=AGj-rHhoIl0xq0r zJ$Y=DThE19Xw2Yl$c@%hg}Bi!#K6f@VcYJr>_`uq#@161ncDa74e8qaW_TtluUTJxl zZhV>q5A6rTV;_9ed7`f3=o{XD?^QZ3jM@V){JQ4NpHuh#=48*!4RI#+%atu#`q$}B zta(oJY9orus`}%P?1drdhUqSQAu*q*%N`ZA4`$hZlFRYcQ^aRP>aK?jj{CdUEbk>Z zcJ zMYG*b!b$7^McfH~8U@8WxMhA}@@U5vXb+9-4lO1+vN!}6oWI#5vmv+e)+roI7TD9A zy#Xp3RVHp+s>G>vsa&Zf$uJe6w$dyeiISwUL-}A!BFnb4DO*s^gy&7%JWp)%=F}wg zRbrlUsj`>5vB7W9{Pf>wWk-sou_z1z8rcJ3A{G@A2;o{U%rhik^7Yq!z5a=^00rVJ zxc>StZvCgY+FOK`0-q3sU!|{kr4p3{@b6^)A~Om*gPJd&l}dQ0&N&N zj=?m7My^!SoNp>YF%@p5#$2i7OP)UrU)=ur-@bpKKnNH8D{}uZe!c#Q`v(fd|I7N% zA!-D8Y8fh5qc&`C96VS-6p@~qZBB178 zD@5rCLj{;`QK-#AGCqfWQ2_zb8q5}>QL8nZ^z8<-S*z3PtvZ9rXfkQd+IFo)uh*J& zW`hMOB(0}+tDrhGTuuoJR_e`%@Erx}sNe_w9z={4zF)wxVQ2=!>cm*#I|CehS&Y?- zvBJJPjz#?~jMpH>3VZ4})&=8&VU1#}u-}bi4~VfQF;>{y#<4EgW59UZ#n|kz8WUs9 zVyv*IjPusROBUmWdj;Pc!oDz$9TQ`%V(fZBFIRzWs`_FPuTCol3mdXHcq{{)v9p9N z5!6jiANBAO6?%+PTY@^>HW9WC8q!es>Cf^sI-`jq_CWZ+J|8pl5$v)hMP!&s-$}#< zg%7@qF@uL;>uqX-Q$F>J4GWc;FvG7@6l8ct+Tpg!Q#*Q4D?mHkHU${Yu3h+)Ac{NO zTzOKUCxiU8&#o@WaCYt4_snPcQ=kGjf9_WUVOaV9pLeZR}<^45-5 zePRk>O*99)q&~wa1QnLpgs`x|GhpxSX_b5_B?b#Ck}8b za?4L$#JHR#C2c&WNwy?~p2vSmo=Lt*eGN5H;Q?{z{S~qCPV}1~(0eMEvl!J>#60+e I<|pR<7Xpg4Q2+n{ literal 6348 zcmeHLc~p~E7XJu)s%0o2{p2VBI1i!sWr7=U5HCb14M$2311eJiW(Qh@pQ(e zD%Pr~Y=V}$Ac#kKfb58}L&XK`RO_g8+^Qowo%dc8k`T1mKYHez2Hv^3U+(+e?Y;Z% z`@(p}CKP31LG}CCQ^TmG2~v?#5G9i;y9aqVV`mVDOSUNPVSg))C8+}aAU(Lt|oGPlNYkwQW zY7fL82Z$7J_3Xfu3g1vio>$m40-FxuhE~Nz80-fGKhHk^M?V|SYAmh?-4-L+Vt-F? z#*b4~`sMeKF>mhT+VDoubHc3T`}s{^>rRV^)HRKuEz_^<{Mbw2*6wTbn8np#)r#FE zlVjU~>E8v}iIZ}{^zhM#1SQR&HaxE`*6}io*s%G)sg~pL7h~gq`@iH8I~?c>n5x;C z#n`@L2e)=isog0paNWI{g`2ZsKI_5qA@P;)=G}=y9^7jNjPdtBi}TnB6EE4X zO|EH%GtL?(wiLrs@t!1(>xxI^s!pJ?skOjAo_p0}12Dq`#ll4D!|qv;4Pw z0XOI6i-$jN0=vH2#ECAt3_oXkWjGD%fY%k*XcK-N4C9$wtaiEaE5$&FXoS6DJxw;W zp1eYWCu*&y-i7rWzx?zde)oTJ>cJw0>%-}*ny|m>*;Q>YFrXo5R8}R}=eqvz!t4Tg z{9u7M%_s*auZMn^aOVn^<6e7u6$U?<-JK9n2$GxL8T0Xp2C$?uWwV`iE|`?FyrE-? zCZ4CZ6_B`9+MR>I_$X`fH|@a0|H&N7=*s~6|GlMYlNu{VW6=J4|33ed+nz0AAIyj6 z4{h`AoF2Y!2Z^Wn@mb>ksPA^eyG`%?>XcgwOymm^ z+|FJgILy!*LW^Qbh#cKohI#H!^@OU6j}~IS_%UB2-x#r-$Xk@}!}8p_j+%8DvFZrs zsZmAw_;+qkN(JFN<8m-hwSIUE^J<^WYVv&1%v8d=rk=u-yJE|EfaMnD7Ln(cQ!Zk@ z_*QHwrs|F1gr~UIL-FqvyOit`?gZaj!hcbIipcFfN$jR3WT%kl|DH+Ys49zXnCGTt zXzD;iQYkb)eW#=$g#-3iwoSdiFdfgQ#!Qk4&G zxu}hIgk=yt1rJWcEiEaW#p*P;A^gE=({*X^SbI$AcH2t0BTKqEXlW(U-`@Bors|&C zWgz}h1be!5J-C@Y@2&BUd%;(w&JXQ#E8wG}&S!lbNqnfZ9}a;BGe5MwvOF8STUFrU zePI{O+L*K~o?i-+S3GsTnMLX?_e@DG5W27J?nAQo?axONbHRGgrAb3QGTwua@j)JQY&D$Bq3)%f$OQjd zz^R&7T14t~&)Q4d-&nt&uKuK!n!*k5?IM09vX8doMgot4{c@mu4V(k+ai2>@ z8Y2-2JbR}z$aFI_eVHgWMy#ZbqlgA~fl3)GQ7{E^QKTwX5-m`Qqzai_5bw>TP1tO{ zIhvtBlEN4fZK|2Vd|#%Dl!#_)o-%1Oz7Z1gq|Nzwy+*~UXbT=!Ig7UBQEZ+y&qT@Q znedF18>6CR5{W2E87q@2n4$7$kz5r0er%MINi*F(QGPEnqY<|IFJcC=Dm%~8;abLsg!Xl<%^`zj<2xJyVtsR3|Qw&|K)|U z5m=cK(u-~Ev(8D+IzM_OnqY*ib0!mXtaDLCsv>EYJ3jH}E2p-|QMZE!Ug`FzgYMr# zyHHc~R(g~yTIBI^-3i>|5<-H52EF8UI-`|!fz0(>Ym@rK(1tFl>)`{PHj~y&ZQvsS}NX z_qiJt@H+3^qtQl+5sDikbD;bLou+e9XdB@o03<-(;RrAV$22v|X^ zg|3=n|1@!Sa1PPNCYAp;DlL;4yGe0;y4h z4~lkDS+5PxAC^C_Kl3N(x2SPY{!jAf{b&CA=MM+vkICN%>YaY2LaI;-rAmdSwHt1@ z6sQ?)q)IRKunfm74ITp*q~ZO}lz3G7P?HoA{D$X;4-=QehJ7(}U%bFT+Kuqn-gjc! zZ#V4m4>})1Z^-l!wBHZtB7`AL1`xdT5wveOdI+eGG=Si(kDz_$&_$?6jjjO%AAJPv zTZ1lw8a2KK5PbC!wC@MH2>8li0KrcmLA&pA(8&YeAOc=`8jQXUO}l$>(ZL9QOW%rq z{hGc(+RcTV4j6aHYr*<~iwZL0%y;EmCxNjDL?AutI4 zCVu1&RuMj$e!Cy%tOn_a?#ZXGXyANEtp}+HmuP+Xf%CD><_+Kn&WCoD9ifwWciaAo zwCcCM#o7gPxDL?ven2lubQsUfjPpX&*?3&1S-|&X|94{{FEg z@Efz?2bbX3+l6^P8aXtA1Bpi_Kkp#@G&$_nMg3Iz9sm2Wmwxt>>#tvkx*q6z>VEc; z?aJ}%=jQu9h}Jz)X8!QEmu&MN1*op6Ioq~3PYgTOd~7?t{goo-#D(b7YtPkJd^#JY zUYVS}>kk)gr;feU6aMb|(f+ZcE2^r`*~ZpC_xjPUIorCxX#@Y{G28i5*S@&e{)z3K z7mmyiOkc4r=iWLvH+nLP{eEQN>f__!Gai}zE+G9jJL~{h+Ws>?`(9 zTQ=+0^lUdXe);#dmA~oje*XP`MyvMDJUI9H@#qg9{%L*jnOWNtBYop{-hCxndFJY@ zZ~kob?A$L}j#i(wv40-@Y~x=(7X6=D+xzb>(1u4QeG32E7xEwb+vhy}_cE}BM<#n_ zGM*NPU7zzl0tz^8X#ZMoWdCCJlU=hmI8DvG^{E)o&@olb68!I2y8k%N%Zj-V8rj9K z-%{#!GVTUQjFy}OFI~XcfBBL9`j=8Bg$_uVt-0!mb7${=8MTSv)dlb49rpHIi6#HX zRUVoa0wIr&rqw$F-U!dC5!L*_nAW`lv~m7mM4-byZl8eiJmNeZ9T7M_tfoWkV1(nn zw7~kqfe<}xR3nvE>s|)1FwA6zSd<@wsJ8?ngFL${_N^Kfxbt9=9u>Ot;p_=UP?57h z*ol@oDXlXPKH}eb!b2WUfah6{zy!9pHrD~nmki))iCl)G1f1s~`2 zvVNLpM%fT;Ko!ZD>gafQ4>0~Ao~8APC|8P6E&}-XS#HQDpelmegjNG}8BkYa>H;w= z1)yb8qQ1!ul>;@%F=13gAkC;&a{F5%5=%~?>LP=wl8xx*43Z5A$@N&WkJQDV=O9QQOOoTrwB<0 zYL#T^MF0^J1B3)$Ct<;hj2SeU6Pip4O)5Ye^vEoUYFk)TyG)dbFd&Ph%9^UGBQl78 z&?X&DT_fAsmb+j(;(>3ORZ!BV*0i-$dm?pt0$#Q?er?dY;HblHTtW^Td+d+uSM?=_ zF5kKJ-EEr>o<|p&Mm}3tG5GX*_CGy(z2oY?`nw0_j_hw|3c_GUV(PECTSRg**Dt8SqIPUG)vALJtv#s}TdRK4H zwtKqi-p%&yTM^}k|57M#0;!yol)(3Mf{qSyp8Fuz$5J@w7kDlVc{N`cs3LlzGDH$;sy31XAjSW!eDH|rlO3veL5hU;(q^45QW ztG!WPDM$%H{#Ejt*JyA_0Ds5x7oAernRHV5tkJ-oI^$Gxd@4>QN2I0uXz42eR=!mfBXJ{10i4ZugU$x{LT8u?jJZ1|1aymnyQrLI)7MT z{6aX^<|tL>vT;?jSeYJZUA_lD4&#wNixDZ&g5-aa_li1@*Y?~4*?`@M<7xWki-gYH6eXORGSc?)X?Q@?;D{UhTzqT?b!%NZ*+bT=#_(3fP?XYcfFq~ey{3$^Zci3E6Qs5_p?6t3|$jNYe z?dkW-wd^Tyft$Vd{Hmd9oJB<~5~d-4 uV**_-eTtuHsY!hmH&NjL9DaXA?YRa2rYQWL%H=G?H5FA4KX`s@?tcN#jh%1+ literal 0 HcmV?d00001 diff --git a/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_sub.i b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_sub.i index 0352b4cea3a1..86eef63598d9 100644 --- a/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_sub.i +++ b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_sub.i @@ -127,7 +127,6 @@ input_timesteps = 2 response_scaling_factors = '0.03 0.03' response_shift_factors = '270 270' - action_standard_deviations = '0.1' action_scaling_factors = 100 execute_on = 'TIMESTEP_BEGIN' diff --git a/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_trainer.i b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_trainer.i index 8aadb3626c2b..bac8c7ff4687 100644 --- a/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_trainer.i +++ b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_trainer.i @@ -57,7 +57,6 @@ response_scaling_factors = '0.03 0.03' response_shift_factors = '270 270' action_scaling_factors = 100 - action_standard_deviations = '0.1' read_from_file = false shift_outputs = false From b2e8646bdabe8b7b643a1273c372b139063e2650 Mon Sep 17 00:00:00 2001 From: Peter German Date: Fri, 24 Apr 2026 16:30:22 -0600 Subject: [PATCH 43/51] Rename response to observation, make sure we can recover a DRL control based run. --- .../controls/LibtorchNeuralNetControl.h | 28 +++--- .../utils/LibtorchObservationHistory.h | 24 +++--- .../controls/LibtorchNeuralNetControl.C | 79 ++++++++--------- .../utils/LibtorchObservationHistory.C | 82 +++++++++--------- .../libtorch_drl_control_sub.i | 6 +- .../libtorch_drl_control_trainer.i | 6 +- .../libtorch/controls/LibtorchDRLControl.h | 21 ++++- .../surrogates/LibtorchDRLControlTrainer.h | 10 +-- .../libtorch/controls/LibtorchDRLControl.C | 85 +++++++++++++++---- .../trainers/LibtorchDRLControlTrainer.C | 26 +++--- .../libtorch_drl_control.i | 12 +-- .../tests/controls/libtorch_drl_control/tests | 25 ++++++ .../libtorch_drl_control_sub.i | 6 +- .../libtorch_drl_control_trainer.i | 6 +- .../libtorch_nn_control/read_control.i | 2 +- 15 files changed, 255 insertions(+), 163 deletions(-) diff --git a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h index e3d3dd01ed90..5c37d4da7e2b 100644 --- a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h +++ b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h @@ -60,7 +60,7 @@ class LibtorchNeuralNetControl : public Control const Moose::LibtorchNeuralNetBase & controlNeuralNet() const; /// Return true if the object already has a neural network. - bool hasControlNeuralNet() const { return (_nn != NULL); }; + bool hasControlNeuralNet() const { return _nn != nullptr; }; protected: /** @@ -74,37 +74,37 @@ class LibtorchNeuralNetControl : public Control bool should_be_defined = true); /// Refresh the current observation values from the linked postprocessors. - void updateCurrentResponse(); + void updateCurrentObservation(); /// Build the normalized input tensor passed into the controller neural network. torch::Tensor prepareInputTensor(); /// The values of the current observed postprocessor values - std::vector _current_response; + std::vector _current_observation; /// This variable is populated if the controller needs access to older values of the /// observed postprocessor values - std::vector> & _old_responses; + std::vector> & _old_observations; /// The names of the controllable parameters const std::vector & _control_names; - /// The control signals from the last evaluation of the controller - std::vector _current_control_signals; + /// The control signals from the last evaluation of the controller, saved for recover/restart. + std::vector & _current_control_signals; /// Names of the postprocessors which contain the observations of the system - const std::vector & _response_names; + const std::vector & _observation_names; - /// Links to the current response postprocessor values. This is necessary so that we can check + /// Links to the current observation postprocessor values. This is necessary so that we can check /// if the postprocessors exist. - std::vector _response_values; + std::vector _observation_values; /// Number of timesteps to use as input data from the reporters (this influences how many past - /// results are used, e.g. the size of _old_responses) + /// results are used, e.g. the size of _old_observations) const unsigned int _input_timesteps; - /// Shifting constants for the responses - const std::vector _response_shift_factors; - /// Scaling constants (multipliers) for the responses - const std::vector _response_scaling_factors; + /// Shifting constants for the observations + const std::vector _observation_shift_factors; + /// Scaling constants (multipliers) for the observations + const std::vector _observation_scaling_factors; /// Multipliers for the actions const std::vector _action_scaling_factors; diff --git a/framework/include/libtorch/utils/LibtorchObservationHistory.h b/framework/include/libtorch/utils/LibtorchObservationHistory.h index d4ba6970bb46..8f5add0340d8 100644 --- a/framework/include/libtorch/utils/LibtorchObservationHistory.h +++ b/framework/include/libtorch/utils/LibtorchObservationHistory.h @@ -28,32 +28,32 @@ class LibtorchObservationHistory unsigned int inputTimesteps() const { return _input_timesteps; } - std::vector normalize(const std::vector & response) const; + std::vector normalize(const std::vector & observation) const; - void normalizeInPlace(std::vector & response) const; + void normalizeInPlace(std::vector & observation) const; - void normalizeTrajectoryInPlace(std::vector> & response_trajectories) const; + void normalizeTrajectoryInPlace(std::vector> & observation_trajectories) const; - void initializeHistory(const std::vector & normalized_response, - std::vector> & old_responses) const; + void initializeHistory(const std::vector & normalized_observation, + std::vector> & old_observations) const; - void advanceHistory(const std::vector & normalized_response, - std::vector> & old_responses) const; + void advanceHistory(const std::vector & normalized_observation, + std::vector> & old_observations) const; std::vector expandFeatureFactors(const std::vector & feature_factors) const; - std::vector stackCurrentObservation( - const std::vector & normalized_response, - const std::vector> & old_responses) const; + std::vector + stackCurrentObservation(const std::vector & normalized_observation, + const std::vector> & old_observations) const; std::vector stackTrajectoryObservation( - const std::vector> & normalized_response_trajectories, + const std::vector> & normalized_observation_trajectories, unsigned int time_index) const; private: void validateFeatureCount(std::size_t feature_count) const; void validateTrajectoryShape( - const std::vector> & normalized_response_trajectories) const; + const std::vector> & normalized_observation_trajectories) const; const unsigned int _input_timesteps; const std::vector _shift_factors; diff --git a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C index 01fa0c351211..1e0b56bab535 100644 --- a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C +++ b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C @@ -26,12 +26,12 @@ LibtorchNeuralNetControl::validParams() params.addRequiredParam>( "parameters", "Controllable input parameters driven by the network."); params.addRequiredParam>( - "responses", "Postprocessors used as the current observation vector."); + "observations", "Postprocessors used as the current observation vector."); params.addParam>( - "response_shift_factors", + "observation_shift_factors", "Optional offsets applied to the observation values before scaling."); params.addParam>( - "response_scaling_factors", + "observation_scaling_factors", "Optional multipliers applied after shifting the observation values."); params.addParam("filename", "Checkpoint file to load for the controller network."); params.addParam("torch_script_format", @@ -56,21 +56,22 @@ LibtorchNeuralNetControl::validParams() LibtorchNeuralNetControl::LibtorchNeuralNetControl(const InputParameters & parameters) : Control(parameters), - _old_responses(declareRestartableData>>("old_responses")), + _old_observations(declareRestartableData>>("old_observations")), _control_names(getParam>("parameters")), - _current_control_signals(std::vector(_control_names.size(), 0.0)), - _response_names(getParam>("responses")), + _current_control_signals(declareRestartableData>( + "current_control_signals", std::vector(_control_names.size(), 0.0))), + _observation_names(getParam>("observations")), _input_timesteps(getParam("input_timesteps")), - _response_shift_factors(isParamValid("response_shift_factors") - ? getParam>("response_shift_factors") - : std::vector(_response_names.size(), 0.0)), - _response_scaling_factors(isParamValid("response_scaling_factors") - ? getParam>("response_scaling_factors") - : std::vector(_response_names.size(), 1.0)), + _observation_shift_factors(isParamValid("observation_shift_factors") + ? getParam>("observation_shift_factors") + : std::vector(_observation_names.size(), 0.0)), + _observation_scaling_factors(isParamValid("observation_scaling_factors") + ? getParam>("observation_scaling_factors") + : std::vector(_observation_names.size(), 1.0)), _action_scaling_factors(isParamValid("action_scaling_factors") ? getParam>("action_scaling_factors") : std::vector(_control_names.size(), 1.0)), - _observation_history(_input_timesteps, _response_shift_factors, _response_scaling_factors) + _observation_history(_input_timesteps, _observation_shift_factors, _observation_scaling_factors) { // We first check if the input parameters make sense and throw errors if different parameter // combinations are not allowed @@ -78,14 +79,14 @@ LibtorchNeuralNetControl::LibtorchNeuralNetControl(const InputParameters & param {"num_neurons_per_layer", "activation_function"}, !getParam("torch_script_format")); - if (_response_names.size() != _response_shift_factors.size()) - paramError("response_shift_factors", - "The number of shift factors is not the same as the number of responses!"); + if (_observation_names.size() != _observation_shift_factors.size()) + paramError("observation_shift_factors", + "The number of shift factors is not the same as the number of observations!"); - if (_response_names.size() != _response_scaling_factors.size()) - paramError( - "response_scaling_factors", - "The number of normalization coefficients is not the same as the number of responses!"); + if (_observation_names.size() != _observation_scaling_factors.size()) + paramError("observation_scaling_factors", + "The number of normalization coefficients is not the same as the number of " + "observations!"); if (_control_names.size() != _action_scaling_factors.size()) paramError("action_scaling_factors", @@ -94,8 +95,8 @@ LibtorchNeuralNetControl::LibtorchNeuralNetControl(const InputParameters & param // We link to the postprocessor values so that we can fetch them any time. This also raises // errors if we don't have the postprocessors requested in the input. - for (unsigned int resp_i = 0; resp_i < _response_names.size(); ++resp_i) - _response_values.push_back(&getPostprocessorValueByName(_response_names[resp_i])); + for (unsigned int obs_i = 0; obs_i < _observation_names.size(); ++obs_i) + _observation_values.push_back(&getPostprocessorValueByName(_observation_names[obs_i])); } void @@ -115,7 +116,7 @@ LibtorchNeuralNetControl::loadControlNeuralNetFromFile() _nn = std::make_shared(filename); else { - unsigned int num_inputs = _response_names.size() * _input_timesteps; + unsigned int num_inputs = _observation_names.size() * _input_timesteps; unsigned int num_outputs = _control_names.size(); std::vector num_neurons_per_layer = getParam>("num_neurons_per_layer"); @@ -124,9 +125,9 @@ LibtorchNeuralNetControl::loadControlNeuralNetFromFile() ? getParam>("activation_function") : std::vector({"relu"}); const auto input_shift_factors = - _observation_history.expandFeatureFactors(_response_shift_factors); + _observation_history.expandFeatureFactors(_observation_shift_factors); const auto input_scaling_factors = - _observation_history.expandFeatureFactors(_response_scaling_factors); + _observation_history.expandFeatureFactors(_observation_scaling_factors); auto nn = std::make_shared(filename, num_inputs, num_outputs, @@ -165,12 +166,12 @@ LibtorchNeuralNetControl::execute() { const unsigned int n_controls = _control_names.size(); - // Fetch current reporter values and populate _current_response - updateCurrentResponse(); + // Fetch current observation values from the linked postprocessors. + updateCurrentObservation(); // If this is the first timestep, we fill up the old values with the initial value - if (_old_responses.empty()) - _observation_history.initializeHistory(_current_response, _old_responses); + if (_old_observations.empty()) + _observation_history.initializeHistory(_current_observation, _old_observations); // Organize the old an current solution into a tensor so we can evaluate the neural net torch::Tensor input_tensor = prepareInputTensor(); @@ -187,8 +188,8 @@ LibtorchNeuralNetControl::execute() // We add the curent solution to the old solutions and move everything in there one step // backward - if (_old_responses.size()) - _observation_history.advanceHistory(_current_response, _old_responses); + if (_old_observations.size()) + _observation_history.advanceHistory(_current_observation, _old_observations); } } @@ -219,15 +220,14 @@ LibtorchNeuralNetControl::conditionalParameterError( } void -LibtorchNeuralNetControl::updateCurrentResponse() +LibtorchNeuralNetControl::updateCurrentObservation() { - // Gather the current response values from the reporters - std::vector raw_response; - raw_response.reserve(_response_names.size()); - for (const auto & resp_i : index_range(_response_names)) - raw_response.push_back(*_response_values[resp_i]); + std::vector raw_observation; + raw_observation.reserve(_observation_names.size()); + for (const auto & obs_i : index_range(_observation_names)) + raw_observation.push_back(*_observation_values[obs_i]); - _current_response = raw_response; + _current_observation = raw_observation; } void @@ -239,7 +239,8 @@ LibtorchNeuralNetControl::loadControlNeuralNet(const Moose::LibtorchArtificialNe torch::Tensor LibtorchNeuralNetControl::prepareInputTensor() { - auto raw_input = _observation_history.stackCurrentObservation(_current_response, _old_responses); + auto raw_input = + _observation_history.stackCurrentObservation(_current_observation, _old_observations); torch::Tensor input_tensor; LibtorchUtils::vectorToTensor(raw_input, input_tensor); diff --git a/framework/src/libtorch/utils/LibtorchObservationHistory.C b/framework/src/libtorch/utils/LibtorchObservationHistory.C index 782e9b65208b..42175597b876 100644 --- a/framework/src/libtorch/utils/LibtorchObservationHistory.C +++ b/framework/src/libtorch/utils/LibtorchObservationHistory.C @@ -16,10 +16,9 @@ #include #include "libmesh/utility.h" -LibtorchObservationHistory::LibtorchObservationHistory( - const unsigned int input_timesteps, - const std::vector & shift_factors, - const std::vector & scaling_factors) +LibtorchObservationHistory::LibtorchObservationHistory(const unsigned int input_timesteps, + const std::vector & shift_factors, + const std::vector & scaling_factors) : _input_timesteps(input_timesteps), _shift_factors(shift_factors), _scaling_factors(scaling_factors.empty() ? std::vector(shift_factors.size(), 1.0) @@ -38,69 +37,70 @@ LibtorchObservationHistory::validateFeatureCount(const std::size_t feature_count void LibtorchObservationHistory::validateTrajectoryShape( - const std::vector> & normalized_response_trajectories) const + const std::vector> & normalized_observation_trajectories) const { - if (normalized_response_trajectories.empty()) + if (normalized_observation_trajectories.empty()) return; - validateFeatureCount(normalized_response_trajectories.size()); + validateFeatureCount(normalized_observation_trajectories.size()); - const auto trajectory_size = normalized_response_trajectories.front().size(); - for (const auto & trajectory : normalized_response_trajectories) + const auto trajectory_size = normalized_observation_trajectories.front().size(); + for (const auto & trajectory : normalized_observation_trajectories) if (trajectory.size() != trajectory_size) mooseError("Observation trajectories must all have the same number of timesteps."); } std::vector -LibtorchObservationHistory::normalize(const std::vector & response) const +LibtorchObservationHistory::normalize(const std::vector & observation) const { - auto normalized = response; + auto normalized = observation; normalizeInPlace(normalized); return normalized; } void -LibtorchObservationHistory::normalizeInPlace(std::vector & response) const +LibtorchObservationHistory::normalizeInPlace(std::vector & observation) const { - validateFeatureCount(response.size()); + validateFeatureCount(observation.size()); if (_shift_factors.empty()) return; - for (const auto i : make_range(response.size())) - response[i] = (response[i] - _shift_factors[i]) * _scaling_factors[i]; + for (const auto i : make_range(observation.size())) + observation[i] = (observation[i] - _shift_factors[i]) * _scaling_factors[i]; } void LibtorchObservationHistory::normalizeTrajectoryInPlace( - std::vector> & response_trajectories) const + std::vector> & observation_trajectories) const { - validateTrajectoryShape(response_trajectories); + validateTrajectoryShape(observation_trajectories); if (_shift_factors.empty()) return; - for (const auto feature_i : make_range(response_trajectories.size())) - for (auto & value : response_trajectories[feature_i]) + for (const auto feature_i : make_range(observation_trajectories.size())) + for (auto & value : observation_trajectories[feature_i]) value = (value - _shift_factors[feature_i]) * _scaling_factors[feature_i]; } void -LibtorchObservationHistory::initializeHistory(const std::vector & normalized_response, - std::vector> & old_responses) const +LibtorchObservationHistory::initializeHistory( + const std::vector & normalized_observation, + std::vector> & old_observations) const { - old_responses.assign(_input_timesteps > 0 ? _input_timesteps - 1 : 0, normalized_response); + old_observations.assign(_input_timesteps > 0 ? _input_timesteps - 1 : 0, normalized_observation); } void -LibtorchObservationHistory::advanceHistory(const std::vector & normalized_response, - std::vector> & old_responses) const +LibtorchObservationHistory::advanceHistory(const std::vector & normalized_observation, + std::vector> & old_observations) const { - if (old_responses.empty()) + if (old_observations.empty()) return; - std::rotate(old_responses.rbegin(), old_responses.rbegin() + 1, old_responses.rend()); - old_responses[0] = normalized_response; + std::rotate(old_observations.rbegin(), old_observations.rbegin() + 1, old_observations.rend()); + old_observations[0] = normalized_observation; } std::vector @@ -123,21 +123,21 @@ LibtorchObservationHistory::expandFeatureFactors(const std::vector & featu std::vector LibtorchObservationHistory::stackCurrentObservation( - const std::vector & normalized_response, - const std::vector> & old_responses) const + const std::vector & normalized_observation, + const std::vector> & old_observations) const { - validateFeatureCount(normalized_response.size()); + validateFeatureCount(normalized_observation.size()); std::vector stacked; - stacked.reserve(normalized_response.size() * _input_timesteps); + stacked.reserve(normalized_observation.size() * _input_timesteps); - stacked.insert(stacked.end(), normalized_response.begin(), normalized_response.end()); + stacked.insert(stacked.end(), normalized_observation.begin(), normalized_observation.end()); for (const auto history_i : make_range(_input_timesteps > 0 ? _input_timesteps - 1 : 0)) { const auto & history_entry = - history_i < old_responses.size() ? old_responses[history_i] : normalized_response; - if (history_entry.size() != normalized_response.size()) + history_i < old_observations.size() ? old_observations[history_i] : normalized_observation; + if (history_entry.size() != normalized_observation.size()) mooseError("Observation history entries must have the same feature size as the current " "observation."); stacked.insert(stacked.end(), history_entry.begin(), history_entry.end()); @@ -148,26 +148,26 @@ LibtorchObservationHistory::stackCurrentObservation( std::vector LibtorchObservationHistory::stackTrajectoryObservation( - const std::vector> & normalized_response_trajectories, + const std::vector> & normalized_observation_trajectories, const unsigned int time_index) const { - validateTrajectoryShape(normalized_response_trajectories); + validateTrajectoryShape(normalized_observation_trajectories); - if (normalized_response_trajectories.empty()) + if (normalized_observation_trajectories.empty()) return {}; - const auto trajectory_size = normalized_response_trajectories.front().size(); + const auto trajectory_size = normalized_observation_trajectories.front().size(); if (time_index >= trajectory_size) mooseError("Requested observation time index is out of range."); std::vector stacked; - stacked.reserve(normalized_response_trajectories.size() * _input_timesteps); + stacked.reserve(normalized_observation_trajectories.size() * _input_timesteps); for (const auto lag : make_range(_input_timesteps)) { const auto source_index = time_index > lag ? time_index - lag : 0; - for (const auto feature_i : make_range(normalized_response_trajectories.size())) - stacked.push_back(normalized_response_trajectories[feature_i][source_index]); + for (const auto feature_i : make_range(normalized_observation_trajectories.size())) + stacked.push_back(normalized_observation_trajectories[feature_i][source_index]); } return stacked; diff --git a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i index 3a1f03dd8229..f99771fcaef6 100644 --- a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i +++ b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i @@ -121,12 +121,12 @@ air_effective_k = 0.5 # W/(m K) [src_control] type = LibtorchDRLControl parameters = "BCs/top_flux/value" - responses = 'center_temp_tend' + observations = 'center_temp_tend' # keep consistent with LibtorchDRLControlTrainer input_timesteps = 1 - response_scaling_factors = '0.03' - response_shift_factors = '290' + observation_scaling_factors = '0.03' + observation_shift_factors = '290' action_scaling_factors = 20 stochastic = true diff --git a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i index e4585c805b2d..e09ee3eb98c5 100644 --- a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i +++ b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i @@ -37,7 +37,7 @@ [Trainers] [nn_trainer] type = LibtorchDRLControlTrainer - response = 'storage/r_transfer:T_reporter:center_temp_tend:value' + observation = 'storage/r_transfer:T_reporter:center_temp_tend:value' control = 'storage/r_transfer:T_reporter:top_flux:value' log_probability = 'storage/r_transfer:T_reporter:log_prob_top_flux:value' reward = 'storage/r_transfer:T_reporter:reward:value' @@ -60,8 +60,8 @@ # keep consistent with LibtorchNeuralNetControl input_timesteps = 1 - response_scaling_factors = '0.03' - response_shift_factors = '290' + observation_scaling_factors = '0.03' + observation_shift_factors = '290' action_scaling_factors = 20 standardize_advantage = true diff --git a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h index 11c1f9e4bcd6..d3a800eb96fc 100644 --- a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h +++ b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h @@ -11,6 +11,8 @@ #pragma once +#include + #include "LibtorchActorNeuralNet.h" #include "LibtorchNeuralNetControl.h" @@ -29,6 +31,9 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl /// Construct using input parameters LibtorchDRLControl(const InputParameters & parameters); + /// Restore any restartable controller state after base setup completes. + virtual void initialSetup() override; + /// We compute the actions in this function together with the corresponding logarithmic probabilities. virtual void execute() override; @@ -53,18 +58,26 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl protected: /// The log probability of control signals from the last evaluation of the controller - std::vector _current_control_signal_log_probabilities; + std::vector & _current_control_signal_log_probabilities; - std::vector _previous_control_signal; - std::vector _current_smoothed_signal; + std::vector & _previous_control_signal; + std::vector & _current_smoothed_signal; std::shared_ptr _actor_nn; at::Generator _policy_generator; + std::vector & _policy_generator_state; - unsigned int _call_counter; + unsigned int & _call_counter; const unsigned int _num_steps_in_period; const Real _smoother; const bool _stochastic; + +private: + /// Restore the owned libtorch generator state from restartable storage. + void restorePolicyGeneratorState(); + + /// Mirror the owned libtorch generator state into restartable storage. + void savePolicyGeneratorState(); }; #endif diff --git a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h index 9dc9ae4faaf7..98fa12c87aa3 100644 --- a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h +++ b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h @@ -72,17 +72,17 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase /// Reset the stored rollout data after an update. void resetData(); - /// Response reporter names + /// Observation reporter names const std::vector _state_names; - /// Pointers to the current values of the responses - /// We can have multiple responses, multiple samples, multiple timesteps + /// Pointers to the current values of the observations + /// We can have multiple observations, multiple samples, multiple timesteps std::vector> *> _state_value_pointers; - /// Shifting constants for the responses + /// Shifting constants for the observations const std::vector _state_shift_factors; - /// Scaling constants for the responses + /// Scaling constants for the observations const std::vector _state_scaling_factors; /// Control reporter names diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C index 0fedd805cf31..4c59d63303ca 100644 --- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C +++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C @@ -14,6 +14,8 @@ #include "Transient.h" #include "LibtorchUtils.h" +#include + registerMooseObject("StochasticToolsApp", LibtorchDRLControl); InputParameters @@ -57,11 +59,16 @@ LibtorchDRLControl::validParams() LibtorchDRLControl::LibtorchDRLControl(const InputParameters & parameters) : LibtorchNeuralNetControl(parameters), - _current_control_signal_log_probabilities(std::vector(_control_names.size(), 0.0)), - _previous_control_signal(std::vector(_control_names.size(), 0.0)), - _current_smoothed_signal(std::vector(_control_names.size(), 0.0)), + _current_control_signal_log_probabilities(declareRestartableData>( + "current_control_signal_log_probabilities", std::vector(_control_names.size(), 0.0))), + _previous_control_signal(declareRestartableData>( + "previous_control_signal", std::vector(_control_names.size(), 0.0))), + _current_smoothed_signal(declareRestartableData>( + "current_smoothed_signal", std::vector(_control_names.size(), 0.0))), _policy_generator(Moose::makeLibtorchCPUGenerator()), - _call_counter(0), + _policy_generator_state(declareRestartableData>( + "policy_generator_state", std::vector())), + _call_counter(declareRestartableData("call_counter", 0)), _num_steps_in_period(parameters.isParamSetByUser("num_steps_in_period") ? getParam("num_steps_in_period") : getParam("num_stems_in_period")), @@ -70,13 +77,23 @@ LibtorchDRLControl::LibtorchDRLControl(const InputParameters & parameters) { if (isParamValid("seed")) setPolicySampleSeed(getParam("seed")); + + savePolicyGeneratorState(); +} + +void +LibtorchDRLControl::initialSetup() +{ + LibtorchNeuralNetControl::initialSetup(); + restorePolicyGeneratorState(); + savePolicyGeneratorState(); } void LibtorchDRLControl::loadControlNeuralNetFromFile() { const auto & filename = getParam("filename"); - unsigned int num_inputs = _response_names.size() * _input_timesteps; + unsigned int num_inputs = _observation_names.size() * _input_timesteps; unsigned int num_outputs = _control_names.size(); std::vector num_neurons_per_layer = getParam>("num_neurons_per_layer"); @@ -88,9 +105,9 @@ LibtorchDRLControl::loadControlNeuralNetFromFile() const std::vector & minimum_values = getParam>("min_control_value"); const std::vector & maximum_values = getParam>("max_control_value"); const auto input_shift_factors = - _observation_history.expandFeatureFactors(_response_shift_factors); + _observation_history.expandFeatureFactors(_observation_shift_factors); const auto input_scaling_factors = - _observation_history.expandFeatureFactors(_response_scaling_factors); + _observation_history.expandFeatureFactors(_observation_scaling_factors); _actor_nn = std::make_shared(filename, @@ -124,12 +141,12 @@ LibtorchDRLControl::execute() const unsigned int n_controls = _control_names.size(); const unsigned int num_old_timesteps = _input_timesteps - 1; - // Fill a vector with the current values of the responses. - updateCurrentResponse(); + // Fill a vector with the current observation values. + updateCurrentObservation(); - // Seed the response history with the initial response when the control first runs. - if (_old_responses.empty()) - _old_responses.assign(num_old_timesteps, _current_response); + // Seed the observation history with the initial observation when the control first runs. + if (_old_observations.empty()) + _old_observations.assign(num_old_timesteps, _current_observation); if (_call_counter % _num_steps_in_period == 0) { @@ -139,6 +156,7 @@ LibtorchDRLControl::execute() if (_actor_nn) { action = _actor_nn->evaluate(input_tensor, _stochastic, _policy_generator); + savePolicyGeneratorState(); if (_stochastic) { @@ -173,10 +191,11 @@ LibtorchDRLControl::execute() setControllableValueByName(_control_names[control_i], _current_smoothed_signal[control_i]); - if (_old_responses.size()) + if (_old_observations.size()) { - std::rotate(_old_responses.rbegin(), _old_responses.rbegin() + 1, _old_responses.rend()); - _old_responses[0] = _current_response; + std::rotate( + _old_observations.rbegin(), _old_observations.rbegin() + 1, _old_observations.rend()); + _old_observations[0] = _current_observation; } _call_counter++; @@ -195,7 +214,41 @@ LibtorchDRLControl::loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNe void LibtorchDRLControl::setPolicySampleSeed(const uint64_t seed) { - _policy_generator.set_current_seed(seed); + { + std::lock_guard lock(_policy_generator.mutex()); + _policy_generator.set_current_seed(seed); + } + savePolicyGeneratorState(); +} + +void +LibtorchDRLControl::restorePolicyGeneratorState() +{ + if (!_stochastic || _policy_generator_state.empty()) + return; + + auto state_tensor = + torch::from_blob(_policy_generator_state.data(), + {static_cast(_policy_generator_state.size())}, + torch::TensorOptions().dtype(torch::kUInt8).device(torch::kCPU)) + .clone(); + std::lock_guard lock(_policy_generator.mutex()); + _policy_generator.set_state(state_tensor); +} + +void +LibtorchDRLControl::savePolicyGeneratorState() +{ + if (!_stochastic) + { + _policy_generator_state.clear(); + return; + } + + std::lock_guard lock(_policy_generator.mutex()); + const auto state_tensor = _policy_generator.get_state().contiguous(); + const auto * data = state_tensor.data_ptr(); + _policy_generator_state.assign(data, data + static_cast(state_tensor.numel())); } Real diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index 23626fce7b50..ef720902e92d 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -26,12 +26,12 @@ LibtorchDRLControlTrainer::validParams() "Trains a neural network controller using fixed-horizon PPO on top of the libtorch RL core."); params.addRequiredParam>( - "response", "Reporter values containing the response values from the model."); + "observation", "Reporter values containing the observation values from the model."); params.addParam>( - "response_shift_factors", + "observation_shift_factors", "Optional offsets applied to the observed state values before scaling."); params.addParam>( - "response_scaling_factors", + "observation_scaling_factors", "Optional multipliers applied after shifting the observed state values."); params.addRequiredParam>( "control", @@ -136,12 +136,12 @@ LibtorchDRLControlTrainer::validParams() LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & parameters) : SurrogateTrainerBase(parameters), - _state_names(getParam>("response")), - _state_shift_factors(isParamValid("response_shift_factors") - ? getParam>("response_shift_factors") + _state_names(getParam>("observation")), + _state_shift_factors(isParamValid("observation_shift_factors") + ? getParam>("observation_shift_factors") : std::vector(_state_names.size(), 0.0)), - _state_scaling_factors(isParamValid("response_scaling_factors") - ? getParam>("response_scaling_factors") + _state_scaling_factors(isParamValid("observation_scaling_factors") + ? getParam>("observation_scaling_factors") : std::vector(_state_names.size(), 1.0)), _action_names(getParam>("control")), _action_scaling_factors(isParamValid("action_scaling_factors") @@ -182,13 +182,13 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _ppo_loss(_clip_param, _entropy_coeff) { if (_state_names.size() != _state_shift_factors.size()) - paramError("response_shift_factors", - "The number of shift factors is not the same as the number of responses!"); + paramError("observation_shift_factors", + "The number of shift factors is not the same as the number of observations!"); if (_state_names.size() != _state_scaling_factors.size()) - paramError( - "response_scaling_factors", - "The number of normalization coefficients is not the same as the number of responses!"); + paramError("observation_scaling_factors", + "The number of normalization coefficients is not the same as the number of " + "observations!"); if (_action_names.size() != _log_probability_names.size()) paramError("log_probability", diff --git a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/libtorch_drl_control.i b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/libtorch_drl_control.i index e36c774cf4e6..e85f3d7fc132 100644 --- a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/libtorch_drl_control.i +++ b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/libtorch_drl_control.i @@ -93,11 +93,11 @@ seed = 11 type = LibtorchDRLControl parameters = "BCs/left_flux/value" - responses = 'center_temp env_temp' + observations = 'center_temp env_temp' input_timesteps = 2 - response_scaling_factors = '0.03 0.03' - response_shift_factors = '270 270' + observation_scaling_factors = '0.03 0.03' + observation_shift_factors = '270 270' action_scaling_factors = 100 filename = 'mynet_control.net' @@ -109,11 +109,11 @@ [src_control_empty] type = LibtorchDRLControl parameters = "BCs/left_flux/value" - responses = 'center_temp env_temp' + observations = 'center_temp env_temp' input_timesteps = 2 - response_scaling_factors = '0.03 0.03' - response_shift_factors = '270 270' + observation_scaling_factors = '0.03 0.03' + observation_shift_factors = '270 270' action_scaling_factors = 100 execute_on = 'TIMESTEP_BEGIN' diff --git a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/tests b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/tests index 551b53122de5..7180f45852b5 100644 --- a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/tests +++ b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/tests @@ -10,6 +10,31 @@ "it to control a transient process." capabilities = 'libtorch' [] + [read-parameters-half-transient] + type = RunApp + input = libtorch_drl_control.i + cli_args = "Outputs/file_base='parameter_read' Outputs/checkpoint=true " + "--test-checkpoint-half-transient" + recover = false + restep = false + prereq = read-parameters + requirement = "The system shall be able to recover a seeded DRL controller from a checkpointed " + "transient." + capabilities = 'libtorch' + [] + [read-parameters-recover] + type = CSVDiff + input = libtorch_drl_control.i + csvdiff = parameter_read.csv + cli_args = "Outputs/file_base='parameter_read' --recover parameter_read_cp/LATEST" + recover = false + restep = false + delete_output_before_running = false + prereq = read-parameters-half-transient + requirement = "The system shall reproduce the same transient control history after recovering " + "a seeded DRL controller from a checkpoint." + capabilities = 'libtorch' + [] [without-nn] type = CSVDiff input = libtorch_drl_control.i diff --git a/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_sub.i b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_sub.i index 86eef63598d9..0efff026d328 100644 --- a/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_sub.i +++ b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_sub.i @@ -121,12 +121,12 @@ [src_control] type = LibtorchDRLControl parameters = "BCs/left_flux/value" - responses = 'center_temp env_temp' + observations = 'center_temp env_temp' # keep consistent with LibtorchDRLControlTrainer input_timesteps = 2 - response_scaling_factors = '0.03 0.03' - response_shift_factors = '270 270' + observation_scaling_factors = '0.03 0.03' + observation_shift_factors = '270 270' action_scaling_factors = 100 execute_on = 'TIMESTEP_BEGIN' diff --git a/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_trainer.i b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_trainer.i index bac8c7ff4687..f13f07fa8565 100644 --- a/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_trainer.i +++ b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_trainer.i @@ -35,7 +35,7 @@ [Trainers] [nn_trainer] type = LibtorchDRLControlTrainer - response = 'storage/r_transfer:T_reporter:center_temp_tend:value storage/r_transfer:T_reporter:env_temp:value' + observation = 'storage/r_transfer:T_reporter:center_temp_tend:value storage/r_transfer:T_reporter:env_temp:value' control = 'storage/r_transfer:T_reporter:left_flux:value' log_probability = 'storage/r_transfer:T_reporter:log_prob_left_flux:value' reward = 'storage/r_transfer:T_reporter:reward:value' @@ -54,8 +54,8 @@ # keep consistent with LibtorchNeuralNetControl input_timesteps = 2 - response_scaling_factors = '0.03 0.03' - response_shift_factors = '270 270' + observation_scaling_factors = '0.03 0.03' + observation_shift_factors = '270 270' action_scaling_factors = 100 read_from_file = false diff --git a/test/tests/controls/libtorch_nn_control/read_control.i b/test/tests/controls/libtorch_nn_control/read_control.i index 5c8aff04a390..ac9a711a9ac6 100644 --- a/test/tests/controls/libtorch_nn_control/read_control.i +++ b/test/tests/controls/libtorch_nn_control/read_control.i @@ -97,7 +97,7 @@ cp = 1.0 [src_control] type = LibtorchNeuralNetControl parameters = "Kernels/anti_source/value" - responses = 'T_max' + observations = 'T_max' execute_on = 'TIMESTEP_BEGIN' [] [] From c295c2fc71260eed86d647e7669afdb84c7cd068 Mon Sep 17 00:00:00 2001 From: Peter German Date: Sat, 25 Apr 2026 10:56:59 -0600 Subject: [PATCH 44/51] Add more docstrings. --- .../controls/LibtorchNeuralNetControl.h | 23 ++++++++++--------- .../libtorch/controls/LibtorchDRLControl.h | 11 +++------ 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h index 5c37d4da7e2b..f7641fe8a7c6 100644 --- a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h +++ b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h @@ -40,7 +40,7 @@ class LibtorchNeuralNetControl : public Control /** * Get the (signal_index)-th signal of the control neural net * @param signal_index The index of the queried control signal - * @return The requested control signal. + * @return The (signal_index)-th constol signal */ Real getSignal(const unsigned int signal_index) const; @@ -48,26 +48,27 @@ class LibtorchNeuralNetControl : public Control unsigned int numberOfControlSignals() const { return _control_names.size(); } /** - * Copy a trained neural network into the controller. - * @param input_nn Neural network that should replace the currently stored controller. + * Function responsible for loading the neural network for the controller. This function is used + * when copying the neural network from a main app which trains it. + * @param input_nn Reference to a neural network which will be copied into this object */ virtual void loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn); - /// Load the controller neural network from the configured file on disk. virtual void loadControlNeuralNetFromFile(); - /// Return a reference to the stored neural network. + /// Return a reference to the stored neural network const Moose::LibtorchNeuralNetBase & controlNeuralNet() const; - /// Return true if the object already has a neural network. + /// Return true if the object already has a neural netwok bool hasControlNeuralNet() const { return _nn != nullptr; }; protected: /** - * Check one conditional-parameter rule and raise an input error if it is violated. - * @param param_name Main parameter that controls the rule. - * @param conditional_param Parameters that depend on the main parameter. - * @param should_be_defined Whether the dependent parameters should be present or absent. + * Function responsible for checking for potential user errors in the input file + * @param param_name The name of the main parameter + * @param conditional_param Vector parameter names that depend on the main parameter + * @param should_be_defined If the conditional parameters should be defined when the main + * parameter is defined */ void conditionalParameterError(const std::string & param_name, const std::vector & conditional_param, @@ -76,7 +77,7 @@ class LibtorchNeuralNetControl : public Control /// Refresh the current observation values from the linked postprocessors. void updateCurrentObservation(); - /// Build the normalized input tensor passed into the controller neural network. + /// Function that prepares the input tensor for the controller neural network torch::Tensor prepareInputTensor(); /// The values of the current observed postprocessor values diff --git a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h index d3a800eb96fc..c70d9eb20c76 100644 --- a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h +++ b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h @@ -38,19 +38,14 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl virtual void execute() override; /** - * Return the log-probability of one control signal from the latest actor evaluation. - * @param signal_index Index of the queried control signal. - * @return Log-probability of the queried signal. + * Get the logarithmic probability of (signal_index)-th signal of the control neural net + * @param signal_index The index of the signal + * @return The logarithmic probability of the (signal_index)-th signal */ Real getSignalLogProbability(const unsigned int signal_index) const; - /** - * Copy a trained actor into the controller. - * @param input_nn Actor network that should replace the currently stored controller. - */ virtual void loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn) override; - /// Load the actor network from the configured checkpoint file. virtual void loadControlNeuralNetFromFile() override; /// Reset the owned policy-sampling generator to a known seed. From 199386e017cabcdfd8e550976794e11dbf4ff166 Mon Sep 17 00:00:00 2001 From: Peter German Date: Sat, 25 Apr 2026 14:58:24 -0600 Subject: [PATCH 45/51] Simplify observationhistory. --- .../controls/LibtorchNeuralNetControl.h | 2 +- .../utils/LibtorchArtificialNeuralNet.h | 20 ++- .../utils/LibtorchObservationHistory.h | 66 ++++++---- .../controls/LibtorchNeuralNetControl.C | 22 ++-- .../utils/LibtorchArtificialNeuralNet.C | 26 ++-- .../utils/LibtorchObservationHistory.C | 118 ++++++------------ .../surrogates/LibtorchDRLControlTrainer.h | 1 + .../libtorch/controls/LibtorchDRLControl.C | 4 +- .../trainers/LibtorchDRLControlTrainer.C | 24 ++-- .../unit/src/TestLibtorchRLCore.C | 56 +++++++-- 10 files changed, 177 insertions(+), 162 deletions(-) diff --git a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h index f7641fe8a7c6..0a91a97159f1 100644 --- a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h +++ b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h @@ -109,7 +109,7 @@ class LibtorchNeuralNetControl : public Control /// Multipliers for the actions const std::vector _action_scaling_factors; - /// Shared observation normalization and history stacking helper + /// Shared observation history stacking and factor-expansion helper const LibtorchObservationHistory _observation_history; /// Pointer to the neural net object which is supposed to be used to control diff --git a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h index 6d5bc88c4b22..9c9d2461f20b 100644 --- a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h +++ b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h @@ -74,7 +74,6 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu /** * Run a forward pass through the network. * @param x Input tensor for the evaluation. - * @return Network output tensor. */ virtual torch::Tensor forward(const torch::Tensor & x) override; @@ -109,7 +108,6 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu /** * Map an activation name to the orthogonal-initialization gain we want to use. * @param activation Activation name to look up. - * @return Gain used for orthogonal initialization. */ Real determineGain(const std::string & activation); @@ -121,19 +119,16 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu protected: /** - * Normalize affine metadata and fill in defaults when needed. + * Set affine metadata by either accepting the user values or filling defaults. * @param factors User-provided affine factors. * @param expected_size Expected number of entries. * @param default_value Default value used when the vector is empty. * @param factor_name Name used in error messages. - * @param forbid_zero Whether zero entries should be rejected. - * @return Normalized affine-factor vector. */ - static std::vector normalizeAffineFactors(const std::vector & factors, - unsigned int expected_size, - Real default_value, - const std::string & factor_name, - bool forbid_zero = false); + static std::vector setAffineFactors(const std::vector & factors, + unsigned int expected_size, + Real default_value, + const std::string & factor_name); /// Initialize the registered affine metadata buffers used by serialization. void initializeAffineBuffers(); @@ -141,14 +136,12 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu /** * Apply affine preprocessing to the raw input tensor. * @param x Raw input tensor. - * @return Preprocessed input tensor. */ virtual torch::Tensor preprocessInput(const torch::Tensor & x) const; /** * Apply the configured output scaling to a network output tensor. * @param y Raw network output tensor. - * @return Scaled output tensor. */ virtual torch::Tensor scaleOutput(const torch::Tensor & y) const; @@ -176,8 +169,11 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu std::vector _input_scaling_factors; /// Multiplicative scaling applied after the network output is formed std::vector _output_scaling_factors; + /// Registered libtorch buffer holding the affine input shifts torch::Tensor _input_shift_tensor; + /// Registered libtorch buffer holding the affine input scaling factors torch::Tensor _input_scale_tensor; + /// Registered libtorch buffer holding the output scaling factors torch::Tensor _output_scale_tensor; }; diff --git a/framework/include/libtorch/utils/LibtorchObservationHistory.h b/framework/include/libtorch/utils/LibtorchObservationHistory.h index 8f5add0340d8..c3f5206e99d5 100644 --- a/framework/include/libtorch/utils/LibtorchObservationHistory.h +++ b/framework/include/libtorch/utils/LibtorchObservationHistory.h @@ -16,48 +16,68 @@ #include /** - * Shared observation normalization and history stacking logic for libtorch-based controls and + * Shared observation history stacking and factor-expansion logic for libtorch-based controls and * trainers. */ class LibtorchObservationHistory { public: - LibtorchObservationHistory(unsigned int input_timesteps, - const std::vector & shift_factors = {}, - const std::vector & scaling_factors = {}); + /** + * Build an observation-history helper for libtorch inputs. + * @param input_timesteps Number of timesteps to stack into each flattened input. + */ + LibtorchObservationHistory(unsigned int input_timesteps); + /// Return the number of timesteps stacked into each flattened input. unsigned int inputTimesteps() const { return _input_timesteps; } - std::vector normalize(const std::vector & observation) const; - - void normalizeInPlace(std::vector & observation) const; - - void normalizeTrajectoryInPlace(std::vector> & observation_trajectories) const; - - void initializeHistory(const std::vector & normalized_observation, + /** + * Fill the history buffer with copies of the current observation. + * @param observation Current observation. + * @param old_observations History buffer that stores previous observations. + */ + void initializeHistory(const std::vector & observation, std::vector> & old_observations) const; - void advanceHistory(const std::vector & normalized_observation, + /** + * Advance the history buffer by inserting the latest observation. + * @param observation Current observation. + * @param old_observations History buffer ordered from newest to oldest. + */ + void advanceHistory(const std::vector & observation, std::vector> & old_observations) const; - std::vector expandFeatureFactors(const std::vector & feature_factors) const; - + /** + * Repeat per-observation-entry factors across all stacked timesteps. + * @param observation_factors Per-entry factors for one observation vector. + */ + std::vector expandObservationFactors(const std::vector & observation_factors) const; + + /** + * Flatten the current observation together with its stored history. + * @param observation Current observation. + * @param old_observations History buffer ordered from newest to oldest. + */ std::vector - stackCurrentObservation(const std::vector & normalized_observation, + stackCurrentObservation(const std::vector & observation, const std::vector> & old_observations) const; - std::vector stackTrajectoryObservation( - const std::vector> & normalized_observation_trajectories, - unsigned int time_index) const; + /** + * Flatten one time slice of observation-component trajectories with causal history. + * @param observation_trajectories Observation trajectories indexed as [component][time]. + * @param time_index Time index to stack. + */ + std::vector + stackTrajectoryObservation(const std::vector> & observation_trajectories, + unsigned int time_index) const; private: - void validateFeatureCount(std::size_t feature_count) const; - void validateTrajectoryShape( - const std::vector> & normalized_observation_trajectories) const; + /// Check that all observation-component trajectories have a consistent shape. + void + validateTrajectoryShape(const std::vector> & observation_trajectories) const; + /// Number of timesteps stacked into each flattened observation. const unsigned int _input_timesteps; - const std::vector _shift_factors; - const std::vector _scaling_factors; }; #endif diff --git a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C index 1e0b56bab535..31ebd467a5ed 100644 --- a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C +++ b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C @@ -29,16 +29,21 @@ LibtorchNeuralNetControl::validParams() "observations", "Postprocessors used as the current observation vector."); params.addParam>( "observation_shift_factors", + {}, "Optional offsets applied to the observation values before scaling."); params.addParam>( "observation_scaling_factors", + {}, "Optional multipliers applied after shifting the observation values."); params.addParam("filename", "Checkpoint file to load for the controller network."); params.addParam("torch_script_format", false, "Whether the checkpoint should be read as a scripted Torch module."); - params.addParam( - "input_timesteps", 1, "Number of recent timesteps to stack into each network input."); + params.addRangeCheckedParam( + "input_timesteps", + 1, + "1<=input_timesteps", + "Number of recent timesteps to stack into each network input."); params.addParam>( "num_neurons_per_layer", "Hidden-layer widths used when constructing the controller."); params.addParam>( @@ -48,6 +53,7 @@ LibtorchNeuralNetControl::validParams() params.addParam>( "action_scaling_factors", + {}, "Per-action scaling embedded in the controller outputs so saved checkpoints stay in " "physical units."); @@ -62,16 +68,16 @@ LibtorchNeuralNetControl::LibtorchNeuralNetControl(const InputParameters & param "current_control_signals", std::vector(_control_names.size(), 0.0))), _observation_names(getParam>("observations")), _input_timesteps(getParam("input_timesteps")), - _observation_shift_factors(isParamValid("observation_shift_factors") + _observation_shift_factors(isParamSetByUser("observation_shift_factors") ? getParam>("observation_shift_factors") : std::vector(_observation_names.size(), 0.0)), - _observation_scaling_factors(isParamValid("observation_scaling_factors") + _observation_scaling_factors(isParamSetByUser("observation_scaling_factors") ? getParam>("observation_scaling_factors") : std::vector(_observation_names.size(), 1.0)), - _action_scaling_factors(isParamValid("action_scaling_factors") + _action_scaling_factors(isParamSetByUser("action_scaling_factors") ? getParam>("action_scaling_factors") : std::vector(_control_names.size(), 1.0)), - _observation_history(_input_timesteps, _observation_shift_factors, _observation_scaling_factors) + _observation_history(_input_timesteps) { // We first check if the input parameters make sense and throw errors if different parameter // combinations are not allowed @@ -125,9 +131,9 @@ LibtorchNeuralNetControl::loadControlNeuralNetFromFile() ? getParam>("activation_function") : std::vector({"relu"}); const auto input_shift_factors = - _observation_history.expandFeatureFactors(_observation_shift_factors); + _observation_history.expandObservationFactors(_observation_shift_factors); const auto input_scaling_factors = - _observation_history.expandFeatureFactors(_observation_scaling_factors); + _observation_history.expandObservationFactors(_observation_scaling_factors); auto nn = std::make_shared(filename, num_inputs, num_outputs, diff --git a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C index ead957d43260..b0bbe31634b4 100644 --- a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C +++ b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C @@ -215,11 +215,11 @@ LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( _device_type(device_type), _data_type(data_type), _input_shift_factors( - normalizeAffineFactors(input_shift_factors, num_inputs, 0.0, "input_shift_factors")), + setAffineFactors(input_shift_factors, num_inputs, 0.0, "input_shift_factors")), _input_scaling_factors( - normalizeAffineFactors(input_scaling_factors, num_inputs, 1.0, "input_scaling_factors")), + setAffineFactors(input_scaling_factors, num_inputs, 1.0, "input_scaling_factors")), _output_scaling_factors( - normalizeAffineFactors(output_scaling_factors, num_outputs, 1.0, "output_scaling_factors")) + setAffineFactors(output_scaling_factors, num_outputs, 1.0, "output_scaling_factors")) { _activation_function = activation_function; initializeAffineBuffers(); @@ -295,24 +295,18 @@ LibtorchArtificialNeuralNet::initializeNeuralNetwork(const c10::optional -LibtorchArtificialNeuralNet::normalizeAffineFactors(const std::vector & factors, - const unsigned int expected_size, - const Real default_value, - const std::string & factor_name, - const bool forbid_zero) +LibtorchArtificialNeuralNet::setAffineFactors(const std::vector & factors, + const unsigned int expected_size, + const Real default_value, + const std::string & factor_name) { - const auto normalized = + const auto resolved_factors = factors.empty() ? std::vector(expected_size, default_value) : factors; - if (normalized.size() != expected_size) + if (resolved_factors.size() != expected_size) mooseError("The number of ", factor_name, " entries must match ", expected_size, "."); - if (forbid_zero) - for (const auto factor : normalized) - if (std::abs(factor) == 0.0) - mooseError("The ", factor_name, " entries must be non-zero."); - - return normalized; + return resolved_factors; } void diff --git a/framework/src/libtorch/utils/LibtorchObservationHistory.C b/framework/src/libtorch/utils/LibtorchObservationHistory.C index 42175597b876..8e989d808d5c 100644 --- a/framework/src/libtorch/utils/LibtorchObservationHistory.C +++ b/framework/src/libtorch/utils/LibtorchObservationHistory.C @@ -16,106 +16,58 @@ #include #include "libmesh/utility.h" -LibtorchObservationHistory::LibtorchObservationHistory(const unsigned int input_timesteps, - const std::vector & shift_factors, - const std::vector & scaling_factors) - : _input_timesteps(input_timesteps), - _shift_factors(shift_factors), - _scaling_factors(scaling_factors.empty() ? std::vector(shift_factors.size(), 1.0) - : scaling_factors) +LibtorchObservationHistory::LibtorchObservationHistory(const unsigned int input_timesteps) + : _input_timesteps(input_timesteps) { - if (!_shift_factors.empty() && _shift_factors.size() != _scaling_factors.size()) - mooseError("Observation shift and scaling factors must have the same size."); -} - -void -LibtorchObservationHistory::validateFeatureCount(const std::size_t feature_count) const -{ - if (!_shift_factors.empty() && feature_count != _shift_factors.size()) - mooseError("Observation feature count does not match the configured normalization factors."); + if (_input_timesteps == 0) + mooseError("Observation history requires at least one input timestep."); } void LibtorchObservationHistory::validateTrajectoryShape( - const std::vector> & normalized_observation_trajectories) const + const std::vector> & observation_trajectories) const { - if (normalized_observation_trajectories.empty()) + if (observation_trajectories.empty()) return; - validateFeatureCount(normalized_observation_trajectories.size()); - - const auto trajectory_size = normalized_observation_trajectories.front().size(); - for (const auto & trajectory : normalized_observation_trajectories) + const auto trajectory_size = observation_trajectories.front().size(); + for (const auto & trajectory : observation_trajectories) if (trajectory.size() != trajectory_size) mooseError("Observation trajectories must all have the same number of timesteps."); } -std::vector -LibtorchObservationHistory::normalize(const std::vector & observation) const -{ - auto normalized = observation; - normalizeInPlace(normalized); - return normalized; -} - -void -LibtorchObservationHistory::normalizeInPlace(std::vector & observation) const -{ - validateFeatureCount(observation.size()); - - if (_shift_factors.empty()) - return; - - for (const auto i : make_range(observation.size())) - observation[i] = (observation[i] - _shift_factors[i]) * _scaling_factors[i]; -} - -void -LibtorchObservationHistory::normalizeTrajectoryInPlace( - std::vector> & observation_trajectories) const -{ - validateTrajectoryShape(observation_trajectories); - - if (_shift_factors.empty()) - return; - - for (const auto feature_i : make_range(observation_trajectories.size())) - for (auto & value : observation_trajectories[feature_i]) - value = (value - _shift_factors[feature_i]) * _scaling_factors[feature_i]; -} - void LibtorchObservationHistory::initializeHistory( - const std::vector & normalized_observation, - std::vector> & old_observations) const + const std::vector & observation, std::vector> & old_observations) const { - old_observations.assign(_input_timesteps > 0 ? _input_timesteps - 1 : 0, normalized_observation); + old_observations.assign(_input_timesteps - 1, observation); } void -LibtorchObservationHistory::advanceHistory(const std::vector & normalized_observation, +LibtorchObservationHistory::advanceHistory(const std::vector & observation, std::vector> & old_observations) const { if (old_observations.empty()) return; std::rotate(old_observations.rbegin(), old_observations.rbegin() + 1, old_observations.rend()); - old_observations[0] = normalized_observation; + old_observations[0] = observation; } std::vector -LibtorchObservationHistory::expandFeatureFactors(const std::vector & feature_factors) const +LibtorchObservationHistory::expandObservationFactors( + const std::vector & observation_factors) const { - if (feature_factors.empty()) + if (observation_factors.empty()) return {}; std::vector expanded; - expanded.reserve(feature_factors.size() * _input_timesteps); + expanded.reserve(observation_factors.size() * _input_timesteps); for (const auto lag : make_range(_input_timesteps)) { libmesh_ignore(lag); - expanded.insert(expanded.end(), feature_factors.begin(), feature_factors.end()); + expanded.insert(expanded.end(), observation_factors.begin(), observation_factors.end()); } return expanded; @@ -123,22 +75,26 @@ LibtorchObservationHistory::expandFeatureFactors(const std::vector & featu std::vector LibtorchObservationHistory::stackCurrentObservation( - const std::vector & normalized_observation, + const std::vector & observation, const std::vector> & old_observations) const { - validateFeatureCount(normalized_observation.size()); + const auto expected_history_size = _input_timesteps - 1; + if (old_observations.size() != expected_history_size) + mooseError("Observation history must contain ", + expected_history_size, + " stored entries, but got ", + old_observations.size(), + "."); std::vector stacked; - stacked.reserve(normalized_observation.size() * _input_timesteps); + stacked.reserve(observation.size() * _input_timesteps); - stacked.insert(stacked.end(), normalized_observation.begin(), normalized_observation.end()); + stacked.insert(stacked.end(), observation.begin(), observation.end()); - for (const auto history_i : make_range(_input_timesteps > 0 ? _input_timesteps - 1 : 0)) + for (const auto & history_entry : old_observations) { - const auto & history_entry = - history_i < old_observations.size() ? old_observations[history_i] : normalized_observation; - if (history_entry.size() != normalized_observation.size()) - mooseError("Observation history entries must have the same feature size as the current " + if (history_entry.size() != observation.size()) + mooseError("Observation history entries must have the same size as the current " "observation."); stacked.insert(stacked.end(), history_entry.begin(), history_entry.end()); } @@ -148,26 +104,26 @@ LibtorchObservationHistory::stackCurrentObservation( std::vector LibtorchObservationHistory::stackTrajectoryObservation( - const std::vector> & normalized_observation_trajectories, + const std::vector> & observation_trajectories, const unsigned int time_index) const { - validateTrajectoryShape(normalized_observation_trajectories); + validateTrajectoryShape(observation_trajectories); - if (normalized_observation_trajectories.empty()) + if (observation_trajectories.empty()) return {}; - const auto trajectory_size = normalized_observation_trajectories.front().size(); + const auto trajectory_size = observation_trajectories.front().size(); if (time_index >= trajectory_size) mooseError("Requested observation time index is out of range."); std::vector stacked; - stacked.reserve(normalized_observation_trajectories.size() * _input_timesteps); + stacked.reserve(observation_trajectories.size() * _input_timesteps); for (const auto lag : make_range(_input_timesteps)) { const auto source_index = time_index > lag ? time_index - lag : 0; - for (const auto feature_i : make_range(normalized_observation_trajectories.size())) - stacked.push_back(normalized_observation_trajectories[feature_i][source_index]); + for (const auto component_i : make_range(observation_trajectories.size())) + stacked.push_back(observation_trajectories[component_i][source_index]); } return stacked; diff --git a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h index 98fa12c87aa3..7f901b536daf 100644 --- a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h +++ b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h @@ -223,6 +223,7 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase unsigned int _timestep_window; + /// Shared observation history stacking and factor-expansion helper const LibtorchObservationHistory _observation_history; LibtorchRLTrajectoryBuffer _trajectory_buffer; const LibtorchRLMiniBatchSampler _sampler; diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C index 4c59d63303ca..24043fd6c4a3 100644 --- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C +++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C @@ -105,9 +105,9 @@ LibtorchDRLControl::loadControlNeuralNetFromFile() const std::vector & minimum_values = getParam>("min_control_value"); const std::vector & maximum_values = getParam>("max_control_value"); const auto input_shift_factors = - _observation_history.expandFeatureFactors(_observation_shift_factors); + _observation_history.expandObservationFactors(_observation_shift_factors); const auto input_scaling_factors = - _observation_history.expandFeatureFactors(_observation_scaling_factors); + _observation_history.expandObservationFactors(_observation_scaling_factors); _actor_nn = std::make_shared(filename, diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index ef720902e92d..6ecee63a2c8a 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -29,9 +29,11 @@ LibtorchDRLControlTrainer::validParams() "observation", "Reporter values containing the observation values from the model."); params.addParam>( "observation_shift_factors", + {}, "Optional offsets applied to the observed state values before scaling."); params.addParam>( "observation_scaling_factors", + {}, "Optional multipliers applied after shifting the observed state values."); params.addRequiredParam>( "control", @@ -39,6 +41,7 @@ LibtorchDRLControlTrainer::validParams() "model simulations."); params.addParam>( "action_scaling_factors", + {}, "Scale factors embedded into the trained policy outputs so transferred and checkpointed " "controllers operate in physical units."); params.addRequiredParam>( @@ -137,14 +140,14 @@ LibtorchDRLControlTrainer::validParams() LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & parameters) : SurrogateTrainerBase(parameters), _state_names(getParam>("observation")), - _state_shift_factors(isParamValid("observation_shift_factors") + _state_shift_factors(isParamSetByUser("observation_shift_factors") ? getParam>("observation_shift_factors") : std::vector(_state_names.size(), 0.0)), - _state_scaling_factors(isParamValid("observation_scaling_factors") + _state_scaling_factors(isParamSetByUser("observation_scaling_factors") ? getParam>("observation_scaling_factors") : std::vector(_state_names.size(), 1.0)), _action_names(getParam>("control")), - _action_scaling_factors(isParamValid("action_scaling_factors") + _action_scaling_factors(isParamSetByUser("action_scaling_factors") ? getParam>("action_scaling_factors") : std::vector(_action_names.size(), 1.0)), _log_probability_names(getParam>("log_probability")), @@ -177,7 +180,7 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _entropy_coeff(getParam("entropy_coeff")), _update_counter(_update_frequency), _timestep_window(getParam("timestep_window")), - _observation_history(_input_timesteps, _state_shift_factors, _state_scaling_factors), + _observation_history(_input_timesteps), _value_estimator(_decay_factor, _lambda_factor), _ppo_loss(_clip_param, _entropy_coeff) { @@ -206,9 +209,10 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par getReporterPointers(_log_probability_names, _log_probability_value_pointers); bool filename_valid = isParamValid("filename_base"); - const auto input_shift_factors = _observation_history.expandFeatureFactors(_state_shift_factors); + const auto input_shift_factors = + _observation_history.expandObservationFactors(_state_shift_factors); const auto input_scaling_factors = - _observation_history.expandFeatureFactors(_state_scaling_factors); + _observation_history.expandObservationFactors(_state_scaling_factors); // Initializing the control neural net so that the control can grab it right away _control_nn = std::make_shared( @@ -435,9 +439,9 @@ LibtorchDRLControlTrainer::collectTrajectoriesFromReporters() if (!num_transitions) continue; - std::vector> normalized_responses(_state_names.size()); + std::vector> observation_trajectories(_state_names.size()); for (const auto state_i : index_range(_state_value_pointers)) - normalized_responses[state_i] = extractDownsampledSequence( + observation_trajectories[state_i] = extractDownsampledSequence( (*_state_value_pointers[state_i])[sample_i], 0, num_transitions + 1); LibtorchRLTrajectoryBuffer::Trajectory trajectory; @@ -454,9 +458,9 @@ LibtorchDRLControlTrainer::collectTrajectoriesFromReporters() for (const auto step_i : make_range(num_transitions)) { trajectory.observations.push_back( - _observation_history.stackTrajectoryObservation(normalized_responses, step_i)); + _observation_history.stackTrajectoryObservation(observation_trajectories, step_i)); trajectory.next_observations.push_back( - _observation_history.stackTrajectoryObservation(normalized_responses, step_i + 1)); + _observation_history.stackTrajectoryObservation(observation_trajectories, step_i + 1)); } for (const auto action_i : index_range(_action_value_pointers)) diff --git a/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C b/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C index c5b4c0bb892f..735553b60628 100644 --- a/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C +++ b/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C @@ -27,23 +27,61 @@ namespace TEST(LibtorchRLCoreTest, ObservationHistoryStacksCurrentAndTrajectoryData) { - LibtorchObservationHistory history(3, {10.0, -2.0}, {0.5, 2.0}); + LibtorchObservationHistory history(3); - const auto normalized = history.normalize({16.0, -1.0}); - EXPECT_DOUBLE_EQ(normalized[0], 3.0); - EXPECT_DOUBLE_EQ(normalized[1], 2.0); + const auto expanded_observation_factors = history.expandObservationFactors({0.5, 2.0}); + EXPECT_EQ(expanded_observation_factors, std::vector({0.5, 2.0, 0.5, 2.0, 0.5, 2.0})); - std::vector> old_responses; - history.initializeHistory({1.0, 6.0}, old_responses); + std::vector> old_observations; + history.initializeHistory({1.0, 6.0}, old_observations); - const auto stacked_current = history.stackCurrentObservation(normalized, old_responses); + const auto stacked_current = history.stackCurrentObservation({3.0, 2.0}, old_observations); EXPECT_EQ(stacked_current, std::vector({3.0, 2.0, 1.0, 6.0, 1.0, 6.0})); - std::vector> trajectories = {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}; - const auto stacked_trajectory = history.stackTrajectoryObservation(trajectories, 2); + std::vector> observation_trajectories = {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}; + const auto stacked_trajectory = history.stackTrajectoryObservation(observation_trajectories, 2); EXPECT_EQ(stacked_trajectory, std::vector({3.0, 6.0, 2.0, 5.0, 1.0, 4.0})); } +TEST(LibtorchRLCoreTest, ObservationHistoryRejectsMalformedStoredHistory) +{ + LibtorchObservationHistory history(3); + + EXPECT_THROW( + { + try + { + history.stackCurrentObservation({3.0, 2.0}, {{1.0, 6.0}}); + } + catch (const std::exception & e) + { + EXPECT_EQ(std::string(e.what()), + "Observation history must contain 2 stored entries, but got 1."); + throw; + } + }, + std::exception); +} + +TEST(LibtorchRLCoreTest, ObservationHistoryRejectsZeroTimesteps) +{ + EXPECT_THROW( + { + try + { + LibtorchObservationHistory history(0); + static_cast(history); + } + catch (const std::exception & e) + { + EXPECT_EQ(std::string(e.what()), + "Observation history requires at least one input timestep."); + throw; + } + }, + std::exception); +} + TEST(LibtorchRLCoreTest, ValueEstimatorComputesGAETargets) { Moose::LibtorchArtificialNeuralNet value_network("value", 1, 1, {}, {"linear"}); From 5458d59630b5aaf2ce40c185bf700bf73627ae2a Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 28 Apr 2026 08:15:57 -0600 Subject: [PATCH 46/51] Rename some object to make more sense. --- .../controls/LibtorchNeuralNetControl.h | 7 +++++-- .../utils/LibtorchArtificialNeuralNet.h | 5 ++++- ...y.h => LibtorchObservationHistoryHelper.h} | 4 ++-- .../controls/LibtorchNeuralNetControl.C | 9 +++------ ...y.C => LibtorchObservationHistoryHelper.C} | 19 ++++++++++--------- .../surrogates/LibtorchDRLControlTrainer.h | 4 ++-- .../unit/src/TestLibtorchRLCore.C | 8 ++++---- 7 files changed, 30 insertions(+), 26 deletions(-) rename framework/include/libtorch/utils/{LibtorchObservationHistory.h => LibtorchObservationHistoryHelper.h} (96%) rename framework/src/libtorch/utils/{LibtorchObservationHistory.C => LibtorchObservationHistoryHelper.C} (86%) diff --git a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h index 0a91a97159f1..db05d7d8d254 100644 --- a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h +++ b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h @@ -12,7 +12,7 @@ #pragma once #include "LibtorchArtificialNeuralNet.h" -#include "LibtorchObservationHistory.h" +#include "LibtorchObservationHistoryHelper.h" #include "Control.h" /** @@ -54,6 +54,9 @@ class LibtorchNeuralNetControl : public Control */ virtual void loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn); + /** + * Load the controller neural network from the configured checkpoint file. + */ virtual void loadControlNeuralNetFromFile(); /// Return a reference to the stored neural network @@ -110,7 +113,7 @@ class LibtorchNeuralNetControl : public Control const std::vector _action_scaling_factors; /// Shared observation history stacking and factor-expansion helper - const LibtorchObservationHistory _observation_history; + const LibtorchObservationHistoryHelper _observation_history; /// Pointer to the neural net object which is supposed to be used to control /// the parameter values. The controller owns this object, but it can be read diff --git a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h index 9c9d2461f20b..2239d1e07ed7 100644 --- a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h +++ b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h @@ -111,7 +111,10 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu */ Real determineGain(const std::string & activation); - /// Initialize the trainable weights and biases. + /** + * Initialize the trainable weights and biases. + * @param generator Optional torch random-number generator used for reproducible initialization. + */ virtual void initializeNeuralNetwork(c10::optional generator = c10::nullopt); /// Store the network architecture in a json file (for debugging, visualization) diff --git a/framework/include/libtorch/utils/LibtorchObservationHistory.h b/framework/include/libtorch/utils/LibtorchObservationHistoryHelper.h similarity index 96% rename from framework/include/libtorch/utils/LibtorchObservationHistory.h rename to framework/include/libtorch/utils/LibtorchObservationHistoryHelper.h index c3f5206e99d5..1a7d1b7090ac 100644 --- a/framework/include/libtorch/utils/LibtorchObservationHistory.h +++ b/framework/include/libtorch/utils/LibtorchObservationHistoryHelper.h @@ -19,14 +19,14 @@ * Shared observation history stacking and factor-expansion logic for libtorch-based controls and * trainers. */ -class LibtorchObservationHistory +class LibtorchObservationHistoryHelper { public: /** * Build an observation-history helper for libtorch inputs. * @param input_timesteps Number of timesteps to stack into each flattened input. */ - LibtorchObservationHistory(unsigned int input_timesteps); + LibtorchObservationHistoryHelper(unsigned int input_timesteps); /// Return the number of timesteps stacked into each flattened input. unsigned int inputTimesteps() const { return _input_timesteps; } diff --git a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C index 31ebd467a5ed..9f3c8a39ce2f 100644 --- a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C +++ b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C @@ -228,12 +228,9 @@ LibtorchNeuralNetControl::conditionalParameterError( void LibtorchNeuralNetControl::updateCurrentObservation() { - std::vector raw_observation; - raw_observation.reserve(_observation_names.size()); - for (const auto & obs_i : index_range(_observation_names)) - raw_observation.push_back(*_observation_values[obs_i]); - - _current_observation = raw_observation; + _current_observation.resize(_observation_names.size()); + for (const auto obs_i : index_range(_observation_names)) + _current_observation[obs_i] = *_observation_values[obs_i]; } void diff --git a/framework/src/libtorch/utils/LibtorchObservationHistory.C b/framework/src/libtorch/utils/LibtorchObservationHistoryHelper.C similarity index 86% rename from framework/src/libtorch/utils/LibtorchObservationHistory.C rename to framework/src/libtorch/utils/LibtorchObservationHistoryHelper.C index 8e989d808d5c..21d4cf5bbc11 100644 --- a/framework/src/libtorch/utils/LibtorchObservationHistory.C +++ b/framework/src/libtorch/utils/LibtorchObservationHistoryHelper.C @@ -9,14 +9,15 @@ #ifdef MOOSE_LIBTORCH_ENABLED -#include "LibtorchObservationHistory.h" +#include "LibtorchObservationHistoryHelper.h" #include "MooseError.h" #include #include "libmesh/utility.h" -LibtorchObservationHistory::LibtorchObservationHistory(const unsigned int input_timesteps) +LibtorchObservationHistoryHelper::LibtorchObservationHistoryHelper( + const unsigned int input_timesteps) : _input_timesteps(input_timesteps) { if (_input_timesteps == 0) @@ -24,7 +25,7 @@ LibtorchObservationHistory::LibtorchObservationHistory(const unsigned int input_ } void -LibtorchObservationHistory::validateTrajectoryShape( +LibtorchObservationHistoryHelper::validateTrajectoryShape( const std::vector> & observation_trajectories) const { if (observation_trajectories.empty()) @@ -37,15 +38,15 @@ LibtorchObservationHistory::validateTrajectoryShape( } void -LibtorchObservationHistory::initializeHistory( +LibtorchObservationHistoryHelper::initializeHistory( const std::vector & observation, std::vector> & old_observations) const { old_observations.assign(_input_timesteps - 1, observation); } void -LibtorchObservationHistory::advanceHistory(const std::vector & observation, - std::vector> & old_observations) const +LibtorchObservationHistoryHelper::advanceHistory( + const std::vector & observation, std::vector> & old_observations) const { if (old_observations.empty()) return; @@ -55,7 +56,7 @@ LibtorchObservationHistory::advanceHistory(const std::vector & observation } std::vector -LibtorchObservationHistory::expandObservationFactors( +LibtorchObservationHistoryHelper::expandObservationFactors( const std::vector & observation_factors) const { if (observation_factors.empty()) @@ -74,7 +75,7 @@ LibtorchObservationHistory::expandObservationFactors( } std::vector -LibtorchObservationHistory::stackCurrentObservation( +LibtorchObservationHistoryHelper::stackCurrentObservation( const std::vector & observation, const std::vector> & old_observations) const { @@ -103,7 +104,7 @@ LibtorchObservationHistory::stackCurrentObservation( } std::vector -LibtorchObservationHistory::stackTrajectoryObservation( +LibtorchObservationHistoryHelper::stackTrajectoryObservation( const std::vector> & observation_trajectories, const unsigned int time_index) const { diff --git a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h index 7f901b536daf..6031fc9de0cb 100644 --- a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h +++ b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h @@ -13,7 +13,7 @@ #include #include "LibtorchActorNeuralNet.h" -#include "LibtorchObservationHistory.h" +#include "LibtorchObservationHistoryHelper.h" #include "LibtorchRLMiniBatchSampler.h" #include "LibtorchRLPPOLoss.h" #include "LibtorchRLTrajectoryBuffer.h" @@ -224,7 +224,7 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase unsigned int _timestep_window; /// Shared observation history stacking and factor-expansion helper - const LibtorchObservationHistory _observation_history; + const LibtorchObservationHistoryHelper _observation_history; LibtorchRLTrajectoryBuffer _trajectory_buffer; const LibtorchRLMiniBatchSampler _sampler; const LibtorchRLValueEstimator _value_estimator; diff --git a/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C b/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C index 735553b60628..7ac8c35b3892 100644 --- a/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C +++ b/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C @@ -13,7 +13,7 @@ #include "LibtorchActorNeuralNet.h" #include "LibtorchArtificialNeuralNet.h" -#include "LibtorchObservationHistory.h" +#include "LibtorchObservationHistoryHelper.h" #include "LibtorchRandomUtils.h" #include "LibtorchRLMiniBatchSampler.h" #include "LibtorchRLPPOLoss.h" @@ -27,7 +27,7 @@ namespace TEST(LibtorchRLCoreTest, ObservationHistoryStacksCurrentAndTrajectoryData) { - LibtorchObservationHistory history(3); + LibtorchObservationHistoryHelper history(3); const auto expanded_observation_factors = history.expandObservationFactors({0.5, 2.0}); EXPECT_EQ(expanded_observation_factors, std::vector({0.5, 2.0, 0.5, 2.0, 0.5, 2.0})); @@ -45,7 +45,7 @@ TEST(LibtorchRLCoreTest, ObservationHistoryStacksCurrentAndTrajectoryData) TEST(LibtorchRLCoreTest, ObservationHistoryRejectsMalformedStoredHistory) { - LibtorchObservationHistory history(3); + LibtorchObservationHistoryHelper history(3); EXPECT_THROW( { @@ -69,7 +69,7 @@ TEST(LibtorchRLCoreTest, ObservationHistoryRejectsZeroTimesteps) { try { - LibtorchObservationHistory history(0); + LibtorchObservationHistoryHelper history(0); static_cast(history); } catch (const std::exception & e) From 1dad4e1d49be6fc69fe2ddbbe2fb3d317efcc66e Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 28 Apr 2026 13:39:16 -0600 Subject: [PATCH 47/51] Remove LiftDragRewardPostprocessor from drl-mods --- .../LiftDragRewardPostprocessor.h | 53 ------------- .../LiftDragRewardPostprocessor.C | 78 ------------------- 2 files changed, 131 deletions(-) delete mode 100644 modules/stochastic_tools/include/postprocessors/LiftDragRewardPostprocessor.h delete mode 100644 modules/stochastic_tools/src/postprocessors/LiftDragRewardPostprocessor.C diff --git a/modules/stochastic_tools/include/postprocessors/LiftDragRewardPostprocessor.h b/modules/stochastic_tools/include/postprocessors/LiftDragRewardPostprocessor.h deleted file mode 100644 index 52217ac90d19..000000000000 --- a/modules/stochastic_tools/include/postprocessors/LiftDragRewardPostprocessor.h +++ /dev/null @@ -1,53 +0,0 @@ -//* This file is part of the MOOSE framework -//* https://www.mooseframework.org -//* -//* All rights reserved, see COPYRIGHT for full restrictions -//* https://github.com/idaholab/moose/blob/master/COPYRIGHT -//* -//* Licensed under LGPL 2.1, please see LICENSE for details -//* https://www.gnu.org/licenses/lgpl-2.1.html - -#pragma once - -// MOOSE includes -#include "GeneralPostprocessor.h" - -class LiftDragRewardPostprocessor : public GeneralPostprocessor -{ -public: - static InputParameters validParams(); - - /** - * Build the rolling lift/drag reward postprocessor. - * @param parameters Input parameters for the postprocessor. - */ - LiftDragRewardPostprocessor(const InputParameters & parameters); - - /// Update the rolling lift and drag averages. - virtual void execute() override; - virtual void initialize() override {} - using Postprocessor::getValue; - - /** - * Return the current reward value. - * @return Reward built from the rolling drag penalty and lift penalty. - */ - virtual Real getValue() const override; - -protected: - const PostprocessorValue & _lift; - const PostprocessorValue & _drag; - - const unsigned int _averaging_window; - - const Real _coeff_1; - const Real _coeff_2; - - Real _avg_lift; - Real _avg_drag; - - std::vector _lift_history; - std::vector _drag_history; - - unsigned int _replace_counter; -}; diff --git a/modules/stochastic_tools/src/postprocessors/LiftDragRewardPostprocessor.C b/modules/stochastic_tools/src/postprocessors/LiftDragRewardPostprocessor.C deleted file mode 100644 index ed10858284c6..000000000000 --- a/modules/stochastic_tools/src/postprocessors/LiftDragRewardPostprocessor.C +++ /dev/null @@ -1,78 +0,0 @@ -//* This file is part of the MOOSE framework -//* https://www.mooseframework.org -//* -//* All rights reserved, see COPYRIGHT for full restrictions -//* https://github.com/idaholab/moose/blob/master/COPYRIGHT -//* -//* Licensed under LGPL 2.1, please see LICENSE for details -//* https://www.gnu.org/licenses/lgpl-2.1.html - -#include "LiftDragRewardPostprocessor.h" - -#include -#include - -registerMooseObject("StochasticToolsApp", LiftDragRewardPostprocessor); - -InputParameters -LiftDragRewardPostprocessor::validParams() -{ - InputParameters params = GeneralPostprocessor::validParams(); - - params.addRequiredParam( - "lift", "Postprocessor that supplies the current lift coefficient."); - params.addRequiredParam( - "drag", "Postprocessor that supplies the current drag coefficient."); - - params.addParam( - "averaging_window", - 1, - "Number of timesteps to include in the rolling lift and drag averages."); - params.addParam( - "coeff_1", 1.59, "Baseline reward offset before drag and lift penalties are applied."); - params.addParam("coeff_2", 0.2, "Multiplier applied to the absolute-value lift penalty."); - - params.addClassDescription( - "Turns rolling lift and drag coefficients into a simple scalar reward signal."); - - return params; -} - -LiftDragRewardPostprocessor::LiftDragRewardPostprocessor(const InputParameters & parameters) - : GeneralPostprocessor(parameters), - _lift(getPostprocessorValue("lift")), - _drag(getPostprocessorValue("drag")), - _averaging_window(getParam("averaging_window")), - _coeff_1(getParam("coeff_1")), - _coeff_2(getParam("coeff_2")), - _avg_lift(0.0), - _avg_drag(0.0), - _lift_history(std::vector(_averaging_window, 0.0)), - _drag_history(std::vector(_averaging_window, 0.0)) -{ -} - -Real -LiftDragRewardPostprocessor::getValue() const -{ - return _coeff_1 - _avg_drag - _coeff_2 * std::abs(_avg_lift); -} - -void -LiftDragRewardPostprocessor::execute() -{ - auto rolling_index = _t_step % _averaging_window; - _lift_history[rolling_index] = _lift; - _drag_history[rolling_index] = _drag; - - if (!rolling_index) - { - const auto normalization = _t_step ? _averaging_window : 1; - _avg_lift = std::reduce(_lift_history.begin(), _lift_history.end()) / normalization; - _avg_drag = std::reduce(_drag_history.begin(), _drag_history.end()) / normalization; - _lift_history = std::vector(_averaging_window, 0.0); - _drag_history = std::vector(_averaging_window, 0.0); - } - - _replace_counter++; -} From e17d7f8b061080e1b8b242e9a85d3d8e958fa19f Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 28 Apr 2026 13:44:45 -0600 Subject: [PATCH 48/51] More docstrings, cleanup remove testing PPs. --- .../utils/LibtorchObservationHistoryHelper.h | 10 +-- .../libtorch/utils/LibtorchRandomUtils.h | 8 ++- .../utils/LibtorchObservationHistoryHelper.C | 22 +++---- ...arFVAdvectionDiffusionFunctorDirichletBC.C | 2 - .../FunctionValuePostprocessor.C | 3 - framework/src/postprocessors/PointValue.C | 4 -- .../libtorch_drl_control_sub.i | 1 - .../libtorch/controls/LibtorchDRLControl.h | 15 +++++ .../libtorch/reporters/DRLRewardReporter.h | 3 + .../surrogates/LibtorchDRLControlTrainer.h | 19 +++++- .../utils/LibtorchActionDistribution.h | 65 +++++++++++++++---- .../libtorch/utils/LibtorchActorNeuralNet.h | 56 ++++++++++++---- .../utils/LibtorchRLMiniBatchSampler.h | 5 +- .../libtorch/utils/LibtorchRLPPOLoss.h | 4 +- .../libtorch/utils/LibtorchRLValueEstimator.h | 8 ++- .../libtorch/controls/LibtorchDRLControl.C | 10 ++- .../trainers/LibtorchDRLControlTrainer.C | 8 +-- .../transfers/SamplerDRLControlTransfer.C | 4 +- .../unit/src/TestLibtorchRLCore.C | 4 +- 19 files changed, 176 insertions(+), 75 deletions(-) diff --git a/framework/include/libtorch/utils/LibtorchObservationHistoryHelper.h b/framework/include/libtorch/utils/LibtorchObservationHistoryHelper.h index 1a7d1b7090ac..97a19521a2db 100644 --- a/framework/include/libtorch/utils/LibtorchObservationHistoryHelper.h +++ b/framework/include/libtorch/utils/LibtorchObservationHistoryHelper.h @@ -64,17 +64,19 @@ class LibtorchObservationHistoryHelper /** * Flatten one time slice of observation-component trajectories with causal history. - * @param observation_trajectories Observation trajectories indexed as [component][time]. + * This uses [component][time] because the trainer receives reporter data one observation + * component at a time, so keeping that layout avoids building an extra transposed + * [time][component] container before stacking. + * @param component_trajectories Observation trajectories indexed as [component][time]. * @param time_index Time index to stack. */ std::vector - stackTrajectoryObservation(const std::vector> & observation_trajectories, + stackTrajectoryObservation(const std::vector> & component_trajectories, unsigned int time_index) const; private: /// Check that all observation-component trajectories have a consistent shape. - void - validateTrajectoryShape(const std::vector> & observation_trajectories) const; + void validateTrajectoryShape(const std::vector> & component_trajectories) const; /// Number of timesteps stacked into each flattened observation. const unsigned int _input_timesteps; diff --git a/framework/include/libtorch/utils/LibtorchRandomUtils.h b/framework/include/libtorch/utils/LibtorchRandomUtils.h index 2cb664198528..9ac469c57ced 100644 --- a/framework/include/libtorch/utils/LibtorchRandomUtils.h +++ b/framework/include/libtorch/utils/LibtorchRandomUtils.h @@ -21,12 +21,18 @@ namespace Moose /// Create an owned CPU generator using libtorch's default seed behavior. at::Generator makeLibtorchCPUGenerator(); -/// Create an owned CPU generator with an explicit seed. +/** + * Create an owned CPU generator with an explicit seed. + * @param seed Seed value passed to the libtorch CPU generator. + */ at::Generator makeLibtorchCPUGenerator(uint64_t seed); /** * Fill a tensor with a (semi) orthogonal matrix using the provided generator. * This mirrors torch::nn::init::orthogonal_, but avoids the ambient default RNG. + * @param tensor Tensor to initialize in place. + * @param gain Scaling factor applied after the orthogonal initialization. + * @param generator Optional torch random-number generator used to sample the initialization. */ void orthogonalInitializeTensor(torch::Tensor & tensor, Real gain = 1.0, diff --git a/framework/src/libtorch/utils/LibtorchObservationHistoryHelper.C b/framework/src/libtorch/utils/LibtorchObservationHistoryHelper.C index 21d4cf5bbc11..a98d931d71e1 100644 --- a/framework/src/libtorch/utils/LibtorchObservationHistoryHelper.C +++ b/framework/src/libtorch/utils/LibtorchObservationHistoryHelper.C @@ -26,13 +26,13 @@ LibtorchObservationHistoryHelper::LibtorchObservationHistoryHelper( void LibtorchObservationHistoryHelper::validateTrajectoryShape( - const std::vector> & observation_trajectories) const + const std::vector> & component_trajectories) const { - if (observation_trajectories.empty()) + if (component_trajectories.empty()) return; - const auto trajectory_size = observation_trajectories.front().size(); - for (const auto & trajectory : observation_trajectories) + const auto trajectory_size = component_trajectories.front().size(); + for (const auto & trajectory : component_trajectories) if (trajectory.size() != trajectory_size) mooseError("Observation trajectories must all have the same number of timesteps."); } @@ -105,26 +105,26 @@ LibtorchObservationHistoryHelper::stackCurrentObservation( std::vector LibtorchObservationHistoryHelper::stackTrajectoryObservation( - const std::vector> & observation_trajectories, + const std::vector> & component_trajectories, const unsigned int time_index) const { - validateTrajectoryShape(observation_trajectories); + validateTrajectoryShape(component_trajectories); - if (observation_trajectories.empty()) + if (component_trajectories.empty()) return {}; - const auto trajectory_size = observation_trajectories.front().size(); + const auto trajectory_size = component_trajectories.front().size(); if (time_index >= trajectory_size) mooseError("Requested observation time index is out of range."); std::vector stacked; - stacked.reserve(observation_trajectories.size() * _input_timesteps); + stacked.reserve(component_trajectories.size() * _input_timesteps); for (const auto lag : make_range(_input_timesteps)) { const auto source_index = time_index > lag ? time_index - lag : 0; - for (const auto component_i : make_range(observation_trajectories.size())) - stacked.push_back(observation_trajectories[component_i][source_index]); + for (const auto component_i : make_range(component_trajectories.size())) + stacked.push_back(component_trajectories[component_i][source_index]); } return stacked; diff --git a/framework/src/linearfvbcs/LinearFVAdvectionDiffusionFunctorDirichletBC.C b/framework/src/linearfvbcs/LinearFVAdvectionDiffusionFunctorDirichletBC.C index c30480d2c51c..8d799da2e592 100644 --- a/framework/src/linearfvbcs/LinearFVAdvectionDiffusionFunctorDirichletBC.C +++ b/framework/src/linearfvbcs/LinearFVAdvectionDiffusionFunctorDirichletBC.C @@ -57,8 +57,6 @@ LinearFVAdvectionDiffusionFunctorDirichletBC::computeBoundaryValueMatrixContribu Real LinearFVAdvectionDiffusionFunctorDirichletBC::computeBoundaryValueRHSContribution() const { - // if (name() == "gap_x") - // std::cout << name() << std::endl; // Fetch the boundary value from the provided functor. return computeBoundaryValue(); } diff --git a/framework/src/postprocessors/FunctionValuePostprocessor.C b/framework/src/postprocessors/FunctionValuePostprocessor.C index d3159730058c..71159ce07ca2 100644 --- a/framework/src/postprocessors/FunctionValuePostprocessor.C +++ b/framework/src/postprocessors/FunctionValuePostprocessor.C @@ -83,8 +83,5 @@ FunctionValuePostprocessor::getValue() const p(j) = *_point[j]; if (_time_pp) return _scale_factor * _function.value(*_time_pp, p); - - // std::cout << name() << " " << _t << " " << _scale_factor * _function.value(_t, p) << std::endl; - return _scale_factor * _function.value(_t, p); } diff --git a/framework/src/postprocessors/PointValue.C b/framework/src/postprocessors/PointValue.C index 2e9f25f5ef9c..3b9cf803f65e 100644 --- a/framework/src/postprocessors/PointValue.C +++ b/framework/src/postprocessors/PointValue.C @@ -50,8 +50,6 @@ void PointValue::execute() { _value = _system.point_value(_var_number, _point, false); - // if (name() == "p1y") - // std::cout << "executing" << name() << " " << _value << std::endl; /** * If we get exactly zero, we don't know if the locator couldn't find an element, or * if the solution is truly zero, more checking is needed. @@ -73,7 +71,5 @@ PointValue::execute() Real PointValue::getValue() const { - // if (name() == "p1y") - // std::cout << name() << " " << _value << std::endl; return _value; } diff --git a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i index f99771fcaef6..ff4f127ef0af 100644 --- a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i +++ b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i @@ -117,7 +117,6 @@ air_effective_k = 0.5 # W/(m K) [] [Controls] - # inactive = 'src_control_final' [src_control] type = LibtorchDRLControl parameters = "BCs/top_flux/value" diff --git a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h index c70d9eb20c76..92e06e48e8bd 100644 --- a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h +++ b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h @@ -44,6 +44,12 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl */ Real getSignalLogProbability(const unsigned int signal_index) const; + /** + * Copy an actor network into this DRL controller. + * @param input_nn Actor network to copy into the controller. + */ + void loadControlNeuralNet(const Moose::LibtorchActorNeuralNet & input_nn); + virtual void loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn) override; virtual void loadControlNeuralNetFromFile() override; @@ -55,16 +61,25 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl /// The log probability of control signals from the last evaluation of the controller std::vector & _current_control_signal_log_probabilities; + /// The smoothed control signal from the previous execution, saved for restart/recover. std::vector & _previous_control_signal; + /// The current smoothed control signal applied to the controllable parameters. std::vector & _current_smoothed_signal; + /// Actor network used when the controller operates as a stochastic policy. std::shared_ptr _actor_nn; + /// Owned libtorch CPU generator used for policy sampling. at::Generator _policy_generator; + /// Restartable serialized state for the owned policy-sampling generator. std::vector & _policy_generator_state; + /// Number of controller executions since initialization or restart. unsigned int & _call_counter; + /// Number of executions to reuse a sampled action before evaluating the policy again. const unsigned int _num_steps_in_period; + /// Relaxation factor applied while smoothing control updates. const Real _smoother; + /// Whether to sample actions stochastically instead of using the deterministic actor output. const bool _stochastic; private: diff --git a/modules/stochastic_tools/include/libtorch/reporters/DRLRewardReporter.h b/modules/stochastic_tools/include/libtorch/reporters/DRLRewardReporter.h index d3421239d970..2482e7166a9d 100644 --- a/modules/stochastic_tools/include/libtorch/reporters/DRLRewardReporter.h +++ b/modules/stochastic_tools/include/libtorch/reporters/DRLRewardReporter.h @@ -29,10 +29,13 @@ class DRLRewardReporter : public GeneralReporter, public SurrogateModelInterface /// The reward values which will be saved Real & _average_reward; + /// The standard deviation of the reward values which will be saved Real & _std_reward; + /// The per-sample average reward values which will be saved std::vector & _sample_average_reward; + /// The per-sample reward standard deviations which will be saved std::vector & _sample_std_reward; /// The DRL trainer which computes the reward values diff --git a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h index 6031fc9de0cb..30cf9041d316 100644 --- a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h +++ b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h @@ -61,7 +61,7 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase void trainController(const LibtorchRLTrajectoryBuffer::TensorBatch & batch); /// Return the current actor network. - const Moose::LibtorchArtificialNeuralNet & controlNeuralNet() const { return *_control_nn; } + const Moose::LibtorchActorNeuralNet & controlNeuralNet() const { return *_control_nn; } /// Return the trainer seed used for sampling and shuffling. unsigned int seed() const { return _seed; } @@ -140,6 +140,7 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase /// Decaying factor that is used when calculating the return from the reward const Real _decay_factor; + /// GAE lambda factor used while estimating advantages and returns. const Real _lambda_factor; /// Name of the pytorch output file. This is used for loading and storing @@ -158,9 +159,12 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase /// Storage for the current average episode reward Real _average_episode_reward; + /// Storage for the current episode reward standard deviation Real _std_episode_reward; + /// Per-sample mean episodic rewards over the latest update window std::vector _sample_average_episode_reward; + /// Per-sample episodic reward standard deviations over the latest update window std::vector _sample_std_episode_reward; /// Switch to enable the standardization of the advantages @@ -172,9 +176,9 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase /// Base seed for stochastic optimizers and policy sampling. const unsigned int _seed; - /// min + /// Optional lower bounds for each control signal std::vector _min_values; - /// max + /// Optional upper bounds for each control signal std::vector _max_values; /// Pointer to the control (or actor) neural net object @@ -182,10 +186,14 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase /// Pointer to the critic neural net object std::shared_ptr _critic_nn; + /// Best average episode reward seen so far while training Real _highest_reward; + /// Entropy bonus coefficient used in the PPO actor loss Real _entropy_coeff; + /// Adam optimizer used to update the actor network std::unique_ptr _actor_optimizer; + /// Adam optimizer used to update the critic network std::unique_ptr _critic_optimizer; private: @@ -221,13 +229,18 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase /// Counter for number of transient simulations that have been run before updating the controller unsigned int _update_counter; + /// Reporter downsampling stride used while assembling rollout trajectories unsigned int _timestep_window; /// Shared observation history stacking and factor-expansion helper const LibtorchObservationHistoryHelper _observation_history; + /// Accumulated on-policy rollout data waiting to be flattened and trained on LibtorchRLTrajectoryBuffer _trajectory_buffer; + /// Mini-batch sampler used to split flattened rollout data for PPO updates const LibtorchRLMiniBatchSampler _sampler; + /// Helper that builds value targets and advantages from collected trajectories const LibtorchRLValueEstimator _value_estimator; + /// PPO loss helper for the actor and critic updates const LibtorchRLPPOLoss _ppo_loss; }; diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h index 119e9f7be58f..e1e8f04b4b55 100644 --- a/modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h @@ -54,50 +54,56 @@ class LibtorchActionDistribution : public torch::nn::Module /** * Draw a stochastic action sample in physical units. - * @return Sampled action tensor. + * @param generator Optional random-number generator used for sampling. */ virtual torch::Tensor sample(c10::optional generator = c10::nullopt) const = 0; - /** - * Return the deterministic action used for evaluation. - * @return Deterministic action tensor. - */ + /// Return the deterministic action used for evaluation. virtual torch::Tensor deterministicAction() const = 0; /** * Evaluate the log-probability of an action under the current distribution. * @param action Action tensor in physical units. - * @return Log-probability tensor for the action. */ virtual torch::Tensor logProbability(const torch::Tensor & action) const = 0; - /** - * Compute the entropy of the current distribution. - * @return Entropy tensor. - */ + /// Compute the entropy of the current distribution. virtual torch::Tensor entropy() const = 0; - /** - * Tell callers whether the distribution enforces explicit action bounds. - * @return True for bounded distributions, false for unbounded ones. - */ + /// Tell callers whether the distribution enforces explicit action bounds. virtual bool isBounded() const = 0; /// Sync cached scaling metadata from the registered buffers after loading state. void synchronizeScalingFactorsFromBuffer(); protected: + /** + * Convert actor features to the configured device and scalar type. + * @param input Raw actor feature tensor. + */ torch::Tensor prepareFeatures(const torch::Tensor & input) const; + /** + * Convert actions to the configured device and scalar type. + * @param action Raw action tensor. + */ torch::Tensor prepareAction(const torch::Tensor & action) const; + /// Return the registered tensor that stores per-action scaling factors. const torch::Tensor & actionScaleTensor() const { return _action_scale_tensor; } + /// Module name used for registration and serialization. const std::string _name; + /// Number of actor features feeding this distribution. const unsigned int _num_inputs; + /// Number of action dimensions produced by this distribution. const unsigned int _num_outputs; + /// Torch device used by this distribution. const torch::DeviceType _device_type; + /// Torch scalar type used by this distribution. const torch::ScalarType _data_type; + /// Cached per-action scaling factors applied in physical units. std::vector _output_scaling_factors; + /// Registered libtorch buffer holding the per-action scaling factors. torch::Tensor _action_scale_tensor; }; @@ -142,21 +148,34 @@ class LibtorchGaussianActionDistribution : public LibtorchActionDistribution virtual bool isBounded() const override { return false; } + /// Return whether the Gaussian std ignores the current actor features. bool stateIndependentStd() const { return _state_independent_std; } + /// Return the Gaussian mean head. torch::nn::Linear & meanModule() { return _mean_module; } + /// Return the Gaussian mean head. const torch::nn::Linear & meanModule() const { return _mean_module; } + /// Return the Gaussian std head. torch::nn::Linear & stdModule() { return _std_module; } + /// Return the Gaussian std head. const torch::nn::Linear & stdModule() const { return _std_module; } + /// Return the cached Gaussian standard deviation tensor. const torch::Tensor & stdTensor() const { return _std_tensor; } private: + /// Build and register the Gaussian distribution heads. void constructDistribution(); + /// Whether the Gaussian std ignores the current actor features. const bool _state_independent_std; + /// Linear head that produces the Gaussian action mean. torch::nn::Linear _mean_module{nullptr}; + /// Linear head that produces the Gaussian log-std inputs or bias-only std state. torch::nn::Linear _std_module{nullptr}; + /// Cached Gaussian action mean from the latest reset. torch::Tensor _mean; + /// Cached Gaussian action standard deviation from the latest reset. torch::Tensor _std_tensor; + /// Cached Gaussian action log standard deviation from the latest reset. torch::Tensor _log_std_tensor; }; @@ -203,27 +222,45 @@ class LibtorchBetaActionDistribution : public LibtorchActionDistribution virtual bool isBounded() const override { return true; } + /// Return the Beta alpha head. torch::nn::Linear & alphaModule() { return _alpha_module; } + /// Return the Beta alpha head. const torch::nn::Linear & alphaModule() const { return _alpha_module; } + /// Return the Beta beta head. torch::nn::Linear & betaModule() { return _beta_module; } + /// Return the Beta beta head. const torch::nn::Linear & betaModule() const { return _beta_module; } + /// Return the cached Beta alpha tensor. const torch::Tensor & alphaTensor() const { return _alpha_tensor; } + /// Return the cached Beta beta tensor. const torch::Tensor & betaTensor() const { return _beta_tensor; } private: + /// Build and register the Beta distribution heads. void constructDistribution(); + /// Lower action bounds in physical units. const std::vector _minimum_values; + /// Upper action bounds in physical units. const std::vector _maximum_values; + /// Linear head that produces the Beta alpha parameters. torch::nn::Linear _alpha_module{nullptr}; + /// Linear head that produces the Beta beta parameters. torch::nn::Linear _beta_module{nullptr}; + /// Tensor form of the lower action bounds. torch::Tensor _min_tensor; + /// Tensor form of the upper action bounds. torch::Tensor _max_tensor; + /// Cached Beta alpha parameters from the latest reset. torch::Tensor _alpha_tensor; + /// Cached Beta beta parameters from the latest reset. torch::Tensor _beta_tensor; + /// Cached sum of the alpha and beta parameters. torch::Tensor _alpha_beta_tensor; + /// Cached log normalization factor for the latest Beta distribution state. torch::Tensor _log_norm; + /// Cached normalized Beta mean from the latest reset. torch::Tensor _mean; }; diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h index 9613c56af151..774cb198b051 100644 --- a/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h @@ -18,6 +18,9 @@ namespace Moose { +/** + * Feed-forward actor network coupled to a Gaussian or Beta action distribution. + */ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet { public: @@ -61,18 +64,13 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet LibtorchActorNeuralNet(const Moose::LibtorchActorNeuralNet & nn, const bool build_on_construct = true); - /** - * Run the actor forward pass and return a sampled action. - * @param x Input tensor for the evaluation. - * @return Action tensor produced by the actor. - */ virtual torch::Tensor forward(const torch::Tensor & x) override; /** * Evaluate the actor and either sample from it or use its deterministic action. * @param input Input tensor for the evaluation. * @param sampled Whether to draw a stochastic sample. - * @return Action tensor produced by the actor. + * @param generator Optional random-number generator used for sampling. */ virtual torch::Tensor evaluate(torch::Tensor & input, bool sampled, @@ -80,33 +78,40 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet /** * Sample an action from the already-reset distribution. - * @return Sampled action tensor. + * @param generator Optional random-number generator used for sampling. */ virtual torch::Tensor sample(c10::optional generator = c10::nullopt); - /// Build the hidden layers and the matching action-distribution module. virtual void constructNeuralNetwork() override; /// Return the active action distribution as the common base type. const LibtorchActionDistribution & actionDistribution() const { return *_action_distribution; } + /// Return the active action distribution as the common base type. LibtorchActionDistribution & actionDistribution() { return *_action_distribution; } /// Return the Gaussian action distribution pointer, or nullptr for bounded actors. const LibtorchGaussianActionDistribution * gaussianActionDistributionPtr() const; + /// Return the Gaussian action distribution pointer, or nullptr for bounded actors. LibtorchGaussianActionDistribution * gaussianActionDistributionPtr(); /// Return the Gaussian action distribution reference. Errors if the actor is bounded. const LibtorchGaussianActionDistribution & gaussianActionDistribution() const; + /// Return the Gaussian action distribution reference. Errors if the actor is bounded. LibtorchGaussianActionDistribution & gaussianActionDistribution(); /// Return the Beta action distribution pointer, or nullptr for Gaussian actors. const LibtorchBetaActionDistribution * betaActionDistributionPtr() const; + /// Return the Beta action distribution pointer, or nullptr for Gaussian actors. LibtorchBetaActionDistribution * betaActionDistributionPtr(); /// Return the Beta action distribution reference. Errors if the actor is unbounded. const LibtorchBetaActionDistribution & betaActionDistribution() const; + /// Return the Beta action distribution reference. Errors if the actor is unbounded. LibtorchBetaActionDistribution & betaActionDistribution(); + /// Return whether the Gaussian std ignores the current actor features. bool stateIndependentStd() const { return _state_independent_std; } + /// Return the configured lower action bounds. const std::vector & minValues() const { return _minimum_values; } + /// Return the configured upper action bounds. const std::vector & maxValues() const { return _maximum_values; } /** @@ -118,24 +123,23 @@ class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet /** * Evaluate the log-probability of an action under the current actor state. * @param other Action tensor in physical units. - * @return Log-probability tensor. */ torch::Tensor logProbability(const torch::Tensor & other); - /** - * Compute the entropy of the current action distribution. - * @return Entropy tensor. - */ + /// Compute the entropy of the current action distribution. torch::Tensor entropy(); - /// Initialize the hidden layers and action-distribution parameters. virtual void initializeNeuralNetwork(c10::optional generator = c10::nullopt) override; protected: + /// Lower action bounds used by bounded actor distributions. const std::vector _minimum_values; + /// Upper action bounds used by bounded actor distributions. const std::vector _maximum_values; + /// Whether the Gaussian std ignores the current actor features. const bool _state_independent_std; + /// Action-distribution module attached to the actor output. std::shared_ptr _action_distribution; }; @@ -156,11 +160,23 @@ void loadLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, } +/** + * Serialize the actor-network metadata needed for restart. + * @param stream Stream that receives the serialized data. + * @param nn Actor network shared pointer to serialize. + * @param context Serialization context passed through MOOSE data I/O. + */ template <> void dataStore(std::ostream & stream, std::shared_ptr & nn, void * context); +/** + * Deserialize the actor-network metadata needed for restart. + * @param stream Stream that provides the serialized data. + * @param nn Actor network shared pointer to populate. + * @param context Serialization context passed through MOOSE data I/O. + */ template <> void dataLoad(std::istream & stream, std::shared_ptr & nn, @@ -169,11 +185,23 @@ void dataLoad(std::istream & stream, // This is needed because the reporter which is used to output the neural net parameters to JSON // requires a dataStore/dataLoad. However, these functions will be empty due to the fact that // we are only interested in the JSON output and we don't want to output everything +/** + * Placeholder serializer for reporter-only actor pointers. + * @param stream Stream that would receive the serialized data. + * @param nn Reporter actor pointer. + * @param context Serialization context passed through MOOSE data I/O. + */ template <> void dataStore(std::ostream & stream, Moose::LibtorchActorNeuralNet const *& nn, void * context); +/** + * Placeholder deserializer for reporter-only actor pointers. + * @param stream Stream that would provide the serialized data. + * @param nn Reporter actor pointer. + * @param context Serialization context passed through MOOSE data I/O. + */ template <> void dataLoad(std::istream & stream, Moose::LibtorchActorNeuralNet const *& nn, diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h index af3c290923d1..8ea0ba65a1e0 100644 --- a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h @@ -45,8 +45,8 @@ class LibtorchRLMiniBatchSampler * Shuffle a flattened rollout batch into PPO-sized chunks. * @param batch Flattened rollout tensors ready for PPO updates. * @param batch_size Preferred number of rows per mini-batch. - * @param standardize_advantage Whether to normalize the advantages inside each chunk. - * @return Vector of sampled mini-batches. + * @param standardize_advantage Whether to normalize the advantages inside each mini-batch. + * @param generator Optional random-number generator used for the row permutation. */ std::vector sample(const LibtorchRLTrajectoryBuffer::TensorBatch & batch, @@ -66,7 +66,6 @@ class LibtorchRLMiniBatchSampler * @param batch Flattened rollout tensors. * @param indices Row indices assigned to this mini-batch. * @param standardize_advantage Whether to normalize the advantages in this slice. - * @return One PPO mini-batch. */ static LibtorchRLMiniBatch makeMiniBatch(const LibtorchRLTrajectoryBuffer::TensorBatch & batch, const torch::Tensor & indices, diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLPPOLoss.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLPPOLoss.h index 46650f154cf6..8e526138bfc8 100644 --- a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLPPOLoss.h +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLPPOLoss.h @@ -43,7 +43,6 @@ class LibtorchRLPPOLoss * @param policy_network Actor network used for the policy term. * @param value_network Critic network used for the value term. * @param batch Mini-batch pulled from the on-policy trajectory buffer. - * @return The three loss components for the mini-batch. */ LibtorchRLPPOLossOutput compute(Moose::LibtorchActorNeuralNet & policy_network, Moose::LibtorchArtificialNeuralNet & value_network, @@ -53,11 +52,12 @@ class LibtorchRLPPOLoss /** * Collapse multi-action log-probabilities or entropies into one column tensor. * @param tensor Action-wise tensor to reduce. - * @return Column tensor with one value per row in the mini-batch. */ static torch::Tensor reduceActionDimension(const torch::Tensor & tensor); + /// PPO clipping width used in the actor surrogate objective. const Real _clip_parameter; + /// Weight applied to the entropy bonus inside the actor loss. const Real _entropy_coeff; }; diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLValueEstimator.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLValueEstimator.h index 81fd07ac2f71..020683b46ba0 100644 --- a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLValueEstimator.h +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLValueEstimator.h @@ -17,7 +17,9 @@ #include /** - * Computes GAE advantages and value targets for an on-policy trajectory buffer. + * Computes generalized-advantage estimates and value targets for an on-policy trajectory buffer, + * following Schulman et al., "High-Dimensional Continuous Control Using Generalized Advantage + * Estimation." */ class LibtorchRLValueEstimator { @@ -49,7 +51,6 @@ class LibtorchRLValueEstimator * Compute value targets and advantages for one trajectory. * @param trajectory Trajectory to evaluate. * @param value_network Critic used for target estimation. - * @return Advantage and value-target vectors for the trajectory. */ Targets estimate(const LibtorchRLTrajectoryBuffer::Trajectory & trajectory, Moose::LibtorchArtificialNeuralNet & value_network) const; @@ -59,12 +60,13 @@ class LibtorchRLValueEstimator * Evaluate the critic on a batch of observations. * @param observations Observation matrix to feed through the critic. * @param value_network Critic used for the evaluation. - * @return One value estimate per observation row. */ std::vector evaluate(const std::vector> & observations, Moose::LibtorchArtificialNeuralNet & value_network) const; + /// Reward discount factor used in the temporal-difference recursion. const Real _discount_factor; + /// GAE lambda factor used in the reverse-time advantage recursion. const Real _lambda_factor; }; diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C index 24043fd6c4a3..22904ea97a28 100644 --- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C +++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C @@ -201,14 +201,20 @@ LibtorchDRLControl::execute() _call_counter++; } +void +LibtorchDRLControl::loadControlNeuralNet(const Moose::LibtorchActorNeuralNet & input_nn) +{ + _actor_nn = std::make_shared(input_nn); + _nn = _actor_nn; +} + void LibtorchDRLControl::loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn) { const auto * check = dynamic_cast(&input_nn); if (!check) mooseError("This needs to be a LibtorchActorNeuralNet!"); - _actor_nn = std::make_shared(*check); - _nn = _actor_nn; + loadControlNeuralNet(*check); } void diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index 6ecee63a2c8a..425279d455a0 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -439,9 +439,9 @@ LibtorchDRLControlTrainer::collectTrajectoriesFromReporters() if (!num_transitions) continue; - std::vector> observation_trajectories(_state_names.size()); + std::vector> component_trajectories(_state_names.size()); for (const auto state_i : index_range(_state_value_pointers)) - observation_trajectories[state_i] = extractDownsampledSequence( + component_trajectories[state_i] = extractDownsampledSequence( (*_state_value_pointers[state_i])[sample_i], 0, num_transitions + 1); LibtorchRLTrajectoryBuffer::Trajectory trajectory; @@ -458,9 +458,9 @@ LibtorchDRLControlTrainer::collectTrajectoriesFromReporters() for (const auto step_i : make_range(num_transitions)) { trajectory.observations.push_back( - _observation_history.stackTrajectoryObservation(observation_trajectories, step_i)); + _observation_history.stackTrajectoryObservation(component_trajectories, step_i)); trajectory.next_observations.push_back( - _observation_history.stackTrajectoryObservation(observation_trajectories, step_i + 1)); + _observation_history.stackTrajectoryObservation(component_trajectories, step_i + 1)); } for (const auto action_i : index_range(_action_value_pointers)) diff --git a/modules/stochastic_tools/src/libtorch/transfers/SamplerDRLControlTransfer.C b/modules/stochastic_tools/src/libtorch/transfers/SamplerDRLControlTransfer.C index b288d79f0e1a..ab8f97fcb280 100644 --- a/modules/stochastic_tools/src/libtorch/transfers/SamplerDRLControlTransfer.C +++ b/modules/stochastic_tools/src/libtorch/transfers/SamplerDRLControlTransfer.C @@ -57,7 +57,7 @@ SamplerDRLControlTransfer::execute() if (getToMultiApp()->hasLocalApp(i)) { // Get the control neural net from the trainer - const Moose::LibtorchArtificialNeuralNet & trainer_nn = _trainer.controlNeuralNet(); + const Moose::LibtorchActorNeuralNet & trainer_nn = _trainer.controlNeuralNet(); // Get the control object from the other app FEProblemBase & app_problem = _multi_app->appProblemBase(i); @@ -118,7 +118,7 @@ SamplerDRLControlTransfer::executeToMultiapp() static_cast(_sampler_ptr->getNumberOfRows()) * static_cast(_fe_problem.timeStep()); // Get the control neural net from the trainer - const Moose::LibtorchArtificialNeuralNet & trainer_nn = _trainer.controlNeuralNet(); + const Moose::LibtorchActorNeuralNet & trainer_nn = _trainer.controlNeuralNet(); // Get the control object from the other app FEProblemBase & app_problem = _multi_app->appProblemBase(_app_index); diff --git a/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C b/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C index 7ac8c35b3892..d1dbfd01d71e 100644 --- a/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C +++ b/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C @@ -38,8 +38,8 @@ TEST(LibtorchRLCoreTest, ObservationHistoryStacksCurrentAndTrajectoryData) const auto stacked_current = history.stackCurrentObservation({3.0, 2.0}, old_observations); EXPECT_EQ(stacked_current, std::vector({3.0, 2.0, 1.0, 6.0, 1.0, 6.0})); - std::vector> observation_trajectories = {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}; - const auto stacked_trajectory = history.stackTrajectoryObservation(observation_trajectories, 2); + std::vector> component_trajectories = {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}; + const auto stacked_trajectory = history.stackTrajectoryObservation(component_trajectories, 2); EXPECT_EQ(stacked_trajectory, std::vector({3.0, 6.0, 2.0, 5.0, 1.0, 4.0})); } From 3df5d51400d5843a6574b146cdacc71ca79159c4 Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 28 Apr 2026 15:34:06 -0600 Subject: [PATCH 49/51] Finish first round of cleanup. --- .../libtorch/controls/LibtorchDRLControl.h | 9 +- .../libtorch/controls/LibtorchDRLControl.C | 85 ++++++++++--------- .../trainers/LibtorchDRLControlTrainer.C | 4 - .../transfers/SamplerDRLControlTransfer.C | 12 --- .../utils/LibtorchActionDistribution.C | 6 +- .../libtorch/utils/LibtorchActorNeuralNet.C | 2 - .../utils/LibtorchRLMiniBatchSampler.C | 3 +- .../tests/controls/libtorch_drl_control/tests | 8 ++ .../unit/src/TestLibtorchActorNeuralNet.C | 3 +- .../unit/src/TestLibtorchRLCore.C | 7 +- 10 files changed, 66 insertions(+), 73 deletions(-) diff --git a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h index 92e06e48e8bd..e6c8074935db 100644 --- a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h +++ b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h @@ -73,9 +73,9 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl /// Restartable serialized state for the owned policy-sampling generator. std::vector & _policy_generator_state; - /// Number of controller executions since initialization or restart. - unsigned int & _call_counter; - /// Number of executions to reuse a sampled action before evaluating the policy again. + /// Number of controller executions remaining before the next policy evaluation. + unsigned int & _executions_until_next_policy_evaluation; + /// Number of controller executions between policy evaluations. const unsigned int _num_steps_in_period; /// Relaxation factor applied while smoothing control updates. const Real _smoother; @@ -83,6 +83,9 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl const bool _stochastic; private: + /// Advance the reuse schedule and report whether this execution should evaluate the policy. + bool shouldEvaluatePolicy(); + /// Restore the owned libtorch generator state from restartable storage. void restorePolicyGeneratorState(); diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C index 22904ea97a28..4bd5f2ef6441 100644 --- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C +++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C @@ -25,17 +25,16 @@ LibtorchDRLControl::validParams() params.addClassDescription( "Sets the value of multiple 'Real' input parameters and postprocessors based on a Deep " "Reinforcement Learning (DRL) neural network trained using a PPO algorithm."); + params.set("execute_on") = EXEC_TIMESTEP_BEGIN; params.suppressParameter("torch_script_format"); params.addParam("seed", "Seed for the random number generator."); - params.addParam( + params.addRangeCheckedParam( "num_steps_in_period", 1, - "Preferred spelling for the number of timesteps to reuse the most recent sampled " - "action before evaluating the policy again."); - params.addParam( - "num_stems_in_period", 1, "Deprecated compatibility spelling for num_steps_in_period."); + "1<=num_steps_in_period", + "Number of controller executions between policy evaluations."); params.addParam( "smoother", 1.0, "Relaxation factor applied when smoothing control updates."); @@ -68,13 +67,16 @@ LibtorchDRLControl::LibtorchDRLControl(const InputParameters & parameters) _policy_generator(Moose::makeLibtorchCPUGenerator()), _policy_generator_state(declareRestartableData>( "policy_generator_state", std::vector())), - _call_counter(declareRestartableData("call_counter", 0)), - _num_steps_in_period(parameters.isParamSetByUser("num_steps_in_period") - ? getParam("num_steps_in_period") - : getParam("num_stems_in_period")), + _executions_until_next_policy_evaluation( + declareRestartableData("executions_until_next_policy_evaluation", 0)), + _num_steps_in_period(getParam("num_steps_in_period")), _smoother(getParam("smoother")), _stochastic(getParam("stochastic")) { + const auto & execute_on = getParam("execute_on"); + if (execute_on.size() != 1 || !execute_on.contains(EXEC_TIMESTEP_BEGIN)) + paramError("execute_on", "LibtorchDRLControl only supports 'TIMESTEP_BEGIN' for 'execute_on'."); + if (isParamValid("seed")) setPolicySampleSeed(getParam("seed")); @@ -132,51 +134,44 @@ LibtorchDRLControl::loadControlNeuralNetFromFile() void LibtorchDRLControl::execute() { - if (!_actor_nn && !_nn) + if (!_actor_nn) + { + mooseAssert(!_nn, "LibtorchDRLControl should not store a non-actor controller network."); return; + } - if (_current_execute_flag != EXEC_TIMESTEP_BEGIN) - return; + mooseAssert(_current_execute_flag == EXEC_TIMESTEP_BEGIN, + "LibtorchDRLControl should only execute on TIMESTEP_BEGIN."); const unsigned int n_controls = _control_names.size(); - const unsigned int num_old_timesteps = _input_timesteps - 1; + const bool first_control_execution = _old_observations.empty(); // Fill a vector with the current observation values. updateCurrentObservation(); // Seed the observation history with the initial observation when the control first runs. - if (_old_observations.empty()) - _old_observations.assign(num_old_timesteps, _current_observation); + if (first_control_execution) + _observation_history.initializeHistory(_current_observation, _old_observations); - if (_call_counter % _num_steps_in_period == 0) + if (shouldEvaluatePolicy()) { torch::Tensor input_tensor = prepareInputTensor(); - torch::Tensor action; + torch::Tensor action = _actor_nn->evaluate(input_tensor, _stochastic, _policy_generator); + savePolicyGeneratorState(); - if (_actor_nn) + if (_stochastic) { - action = _actor_nn->evaluate(input_tensor, _stochastic, _policy_generator); - savePolicyGeneratorState(); - - if (_stochastic) - { - torch::Tensor log_probability = _actor_nn->logProbability(action); - _current_control_signal_log_probabilities = {log_probability.data_ptr(), - log_probability.data_ptr() + - log_probability.size(1)}; - } - else - _current_control_signal_log_probabilities.assign(n_controls, 0.0); + torch::Tensor log_probability = _actor_nn->logProbability(action); + _current_control_signal_log_probabilities = {log_probability.data_ptr(), + log_probability.data_ptr() + + log_probability.size(1)}; } else - { - action = _nn->forward(input_tensor); _current_control_signal_log_probabilities.assign(n_controls, 0.0); - } _current_control_signals = {action.data_ptr(), action.data_ptr() + action.size(1)}; - if (_call_counter == 0) + if (first_control_execution) _current_smoothed_signal = _current_control_signals; } @@ -191,14 +186,7 @@ LibtorchDRLControl::execute() setControllableValueByName(_control_names[control_i], _current_smoothed_signal[control_i]); - if (_old_observations.size()) - { - std::rotate( - _old_observations.rbegin(), _old_observations.rbegin() + 1, _old_observations.rend()); - _old_observations[0] = _current_observation; - } - - _call_counter++; + _observation_history.advanceHistory(_current_observation, _old_observations); } void @@ -227,6 +215,19 @@ LibtorchDRLControl::setPolicySampleSeed(const uint64_t seed) savePolicyGeneratorState(); } +bool +LibtorchDRLControl::shouldEvaluatePolicy() +{ + if (_executions_until_next_policy_evaluation == 0) + { + _executions_until_next_policy_evaluation = _num_steps_in_period - 1; + return true; + } + + --_executions_until_next_policy_evaluation; + return false; +} + void LibtorchDRLControl::restorePolicyGeneratorState() { diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index 425279d455a0..d24799c3a08a 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -55,10 +55,6 @@ LibtorchDRLControlTrainer::validParams() "1<=input_timesteps", "Number of time steps to use in the input data, if larger than 1, " "data from the previous timesteps will be used as inputs in the training."); - params.addParam("skip_num_rows", - 1, - "Unused compatibility parameter reserved for future reporter-row " - "offset handling."); params.addRequiredParam("num_epochs", "Number of epochs for the training."); diff --git a/modules/stochastic_tools/src/libtorch/transfers/SamplerDRLControlTransfer.C b/modules/stochastic_tools/src/libtorch/transfers/SamplerDRLControlTransfer.C index ab8f97fcb280..713a7c10da2b 100644 --- a/modules/stochastic_tools/src/libtorch/transfers/SamplerDRLControlTransfer.C +++ b/modules/stochastic_tools/src/libtorch/transfers/SamplerDRLControlTransfer.C @@ -52,8 +52,6 @@ SamplerDRLControlTransfer::execute() const auto n = getToMultiApp()->numGlobalApps(); for (MooseIndex(n) i = 0; i < n; i++) { - // std::cout << "Do I have this app? " << i << " " << getToMultiApp()->hasLocalApp(i) << - // std::endl; if (getToMultiApp()->hasLocalApp(i)) { // Get the control neural net from the trainer @@ -71,16 +69,6 @@ SamplerDRLControlTransfer::execute() // Copy and the neural net and execute it to get the initial values control_object->loadControlNeuralNet(trainer_nn); control_object->execute(); - - // const auto & named_params = trainer_nn.named_parameters(); - // for (const auto & param_i : make_range(named_params.size())) - // { - // // We cast the parameters into a 1D vector - // std::cout << "Transferring " << Moose::stringify(std::vector( - // named_params[param_i].value().data_ptr(), - // named_params[param_i].value().data_ptr() + - // named_params[param_i].value().numel())) << std::endl; - // } } } } diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C index 992b907ec7f5..311d9676f4a5 100644 --- a/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C @@ -26,10 +26,10 @@ namespace * Fill in default action scaling and catch shape mistakes early. * @param factors User-provided scaling factors. * @param expected_size Number of action outputs expected by the distribution. - * @return A fully populated scaling-factor vector. + * @return A fully populated and validated scaling-factor vector. */ std::vector -normalizeActionScalingFactors(const std::vector & factors, const unsigned int expected_size) +setActionScalingFactors(const std::vector & factors, const unsigned int expected_size) { const auto normalized = factors.empty() ? std::vector(expected_size, 1.0) : factors; @@ -61,7 +61,7 @@ LibtorchActionDistribution::LibtorchActionDistribution( _num_outputs(num_outputs), _device_type(device_type), _data_type(data_type), - _output_scaling_factors(normalizeActionScalingFactors(output_scaling_factors, num_outputs)) + _output_scaling_factors(setActionScalingFactors(output_scaling_factors, num_outputs)) { auto action_scale = _output_scaling_factors; LibtorchUtils::vectorToTensor(action_scale, _action_scale_tensor); diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C index 75371eef0db0..0a21edd88be5 100644 --- a/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C @@ -286,7 +286,6 @@ LibtorchActorNeuralNet::forward(const torch::Tensor & x) else if (activation == "linear") output = _weights[i]->forward(output); - // std::cout << "midresult" << i << output << std::endl; } return output; @@ -299,7 +298,6 @@ LibtorchActorNeuralNet::evaluate(torch::Tensor & x, { torch::Tensor output = forward(x); - // std::cout << "midresult" << output << std::endl; resetDistributionParams(output); if (sampled) diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchRLMiniBatchSampler.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLMiniBatchSampler.C index 8bf6ec7381ca..f25b6fefe9d5 100644 --- a/modules/stochastic_tools/src/libtorch/utils/LibtorchRLMiniBatchSampler.C +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLMiniBatchSampler.C @@ -14,6 +14,7 @@ #include "MooseError.h" #include +#include std::vector LibtorchRLMiniBatchSampler::sample(const LibtorchRLTrajectoryBuffer::TensorBatch & batch, @@ -53,7 +54,7 @@ LibtorchRLMiniBatchSampler::validateBatch(const LibtorchRLTrajectoryBuffer::Tens "targets, and advantages before mini-batch sampling."); const auto batch_size = batch.size(); - const auto validate_rows = [batch_size](const torch::Tensor & tensor, const char * name) + const auto validate_rows = [batch_size](const torch::Tensor & tensor, std::string_view name) { if (!tensor.defined() || tensor.size(0) != batch_size) mooseError( diff --git a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/tests b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/tests index 7180f45852b5..add6e3b26270 100644 --- a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/tests +++ b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/tests @@ -46,4 +46,12 @@ "initialized in it." capabilities = 'libtorch' [] + [invalid-execute-on] + type = RunException + input = libtorch_drl_control.i + cli_args = "Controls/src_control/execute_on='TIMESTEP_END'" + expect_err = "LibtorchDRLControl only supports 'TIMESTEP_BEGIN' for 'execute_on'." + requirement = "The system shall reject DRL controller execute flags other than timestep begin." + capabilities = 'libtorch' + [] [] diff --git a/modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C b/modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C index a581518ea4d4..a5a2b77a5f30 100644 --- a/modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C +++ b/modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C @@ -141,10 +141,9 @@ TEST(LibtorchActorNeuralNetTest, gaussianActorUsesPhysicalActionScalingAndStateI const Real unscaled_mean = expected_deterministic_action / action_scale; const Real unscaled_action = physical_action / action_scale; - constexpr Real pi = 3.14159265358979323846; const Real expected_log_probability = -std::pow(unscaled_action - unscaled_mean, 2) / (2.0 * 4.0) - log_std - - 0.5 * std::log(2.0 * pi) - std::log(action_scale); + 0.5 * std::log(2.0 * libMesh::pi) - std::log(action_scale); auto action = torch::tensor({{physical_action}}, at::kDouble); const Real actual_log_probability = network.logProbability(action).item(); diff --git a/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C b/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C index d1dbfd01d71e..a45a16455d33 100644 --- a/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C +++ b/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C @@ -107,8 +107,6 @@ TEST(LibtorchRLCoreTest, ValueEstimatorComputesGAETargets) TEST(LibtorchRLCoreTest, PPOLossUsesStoredLogProbabilityAndValueTarget) { - constexpr Real pi = 3.14159265358979323846; - Moose::LibtorchActorNeuralNet policy_network("policy", 1, 1, {}, {"linear"}); policy_network.gaussianActionDistribution().meanModule()->weight.data().fill_(0.0); policy_network.gaussianActionDistribution().meanModule()->bias.data().fill_(0.0); @@ -124,14 +122,15 @@ TEST(LibtorchRLCoreTest, PPOLossUsesStoredLogProbabilityAndValueTarget) batch.observations = torch::zeros({1, 1}, torch::TensorOptions().dtype(torch::kDouble)); batch.actions = torch::zeros({1, 1}, torch::TensorOptions().dtype(torch::kDouble)); batch.old_log_probabilities = - torch::tensor({{-0.5 * std::log(2.0 * pi)}}, torch::TensorOptions().dtype(torch::kDouble)); + torch::tensor({{-0.5 * std::log(2.0 * libMesh::pi)}}, + torch::TensorOptions().dtype(torch::kDouble)); batch.value_targets = torch::tensor({{1.5}}, torch::TensorOptions().dtype(torch::kDouble)); batch.advantages = torch::tensor({{2.0}}, torch::TensorOptions().dtype(torch::kDouble)); LibtorchRLPPOLoss loss(0.2, 0.01); const auto loss_values = loss.compute(policy_network, value_network, batch); - const Real expected_entropy = 0.5 * std::log(2.0 * pi) + 0.5; + const Real expected_entropy = 0.5 * std::log(2.0 * libMesh::pi) + 0.5; const Real expected_actor_loss = -(2.0 + 0.01 * expected_entropy); const Real expected_critic_loss = 0.25; From 62dbc5ea050679689f0129fad4cd0b4bd846b854 Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 28 Apr 2026 16:24:09 -0600 Subject: [PATCH 50/51] Add modification for the documentation as well. --- framework/doc/content/bib/moose.bib | 7 -- .../doc/content/bib/stochastic_tools.bib | 16 +++- .../examples/libtorch_drl_control.md | 26 +++++-- .../libtorch/controls/LibtorchDRLControl.md | 42 ++++++++++- .../trainers/LibtorchDRLControlTrainer.md | 74 ++++++++++++++++++- 5 files changed, 145 insertions(+), 20 deletions(-) diff --git a/framework/doc/content/bib/moose.bib b/framework/doc/content/bib/moose.bib index 003a179ae1da..25a3b2529b42 100644 --- a/framework/doc/content/bib/moose.bib +++ b/framework/doc/content/bib/moose.bib @@ -45,13 +45,6 @@ @book{muller1995neural publisher={Springer Science \& Business Media} } -@article{schulman2017proximal, - title={Proximal policy optimization algorithms}, - author={Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg}, - journal={arXiv preprint arXiv:1707.06347}, - year={2017} -} - @article{tonks2012object, title={An object-oriented finite element framework for multiphysics phase field simulations}, author={Tonks, Michael R and Gaston, Derek and Millett, Paul C and Andr\v{s}, David and Talbot, Paul}, diff --git a/modules/stochastic_tools/doc/content/bib/stochastic_tools.bib b/modules/stochastic_tools/doc/content/bib/stochastic_tools.bib index 9af1f07f1888..c321338e7187 100644 --- a/modules/stochastic_tools/doc/content/bib/stochastic_tools.bib +++ b/modules/stochastic_tools/doc/content/bib/stochastic_tools.bib @@ -135,6 +135,20 @@ @article{kingma2014adam url={https://doi.org/10.48550/arXiv.1412.6980} } +@article{schulman2015gae, + title={High-dimensional continuous control using generalized advantage estimation}, + author={Schulman, John and Moritz, Philipp and Levine, Sergey and Jordan, Michael and Abbeel, Pieter}, + journal={arXiv preprint arXiv:1506.02438}, + year={2015} +} + +@article{schulman2017proximal, + title={Proximal policy optimization algorithms}, + author={Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg}, + journal={arXiv preprint arXiv:1707.06347}, + year={2017} +} + @article{li2016pss, title={Matlab codes of Subset Simulation for reliability analysis and structural optimization}, author={Li, H. S. and Cao, Z. J.}, @@ -363,4 +377,4 @@ @article{calderhead2014general pages = {17408--17413}, year = {2014}, doi = {10.1073/pnas.1408184111} -} \ No newline at end of file +} diff --git a/modules/stochastic_tools/doc/content/modules/stochastic_tools/examples/libtorch_drl_control.md b/modules/stochastic_tools/doc/content/modules/stochastic_tools/examples/libtorch_drl_control.md index 71e7a0517bfe..3bf2772eac80 100644 --- a/modules/stochastic_tools/doc/content/modules/stochastic_tools/examples/libtorch_drl_control.md +++ b/modules/stochastic_tools/doc/content/modules/stochastic_tools/examples/libtorch_drl_control.md @@ -5,6 +5,17 @@ The following example demonstrates how to set up a Proximal Policy Optimization Deep Reinforcement Learning (DRL) training sequence for neural-net-based controllers of MOOSE simulations. See [!cite](schulman2017proximal) for a more theoretical background on the PPO algorithm. +The detailed PPO and GAE equations used by the trainer are documented on +[LibtorchDRLControlTrainer](source/libtorch/trainers/LibtorchDRLControlTrainer.md). +At a high level, the trainer collects on-policy tuples +$(o_t, a_t, \log \pi_{\mathrm{old}}(a_t|o_t), r_t, o_{t+1})$, computes GAE +advantages [!cite](schulman2015gae), and updates separate actor and critic +networks with the PPO clipped objective [!cite](schulman2017proximal). The +paired [LibtorchDRLControl](source/libtorch/controls/LibtorchDRLControl.md) +executes the actor at `TIMESTEP_BEGIN`, stacks `input_timesteps` observations, +and can optionally reuse or smooth sampled actions before they are applied to +the physics model. + ## Problem Statement In this example we would like to design a DRL-based controller for the air conditioning of a @@ -80,9 +91,10 @@ using [LibtorchControlValuePostprocessor](source/libtorch/postprocessors/Libtorc [LibtorchDRLLogProbabilityPostprocessor](source/libtorch/postprocessors/LibtorchDRLLogProbabilityPostprocessor.md) as shown above. Furthermore, the additional [LibtorchNeuralNetControl](LibtorchNeuralNetControl.md) (`src_control_final`) -can be used to evaluate the neural network without the additional random -sampling process needed for the training process. In other words, this object will evaluate the -final product of this training process. +is optional and can be used to evaluate the trained network with a plain +deterministic control object. The same trained actor could also be executed +through [LibtorchDRLControl.md] with +[!param](/Controls/LibtorchDRLControl/stochastic) set to `false`. ### Main Application @@ -157,10 +169,12 @@ to balance these two factors by tuning the parameters in the `Trainer` and `Cont layout={'xaxis':{'type':'linear', 'title':'Number of simulations'}, 'yaxis':{'type':'linear','title':'Average Episodic Reward'}} -Following the training procedure, we can replace the [LibtorchDRLControl.md] object with +Following the training procedure, we can either set +[!param](/Controls/LibtorchDRLControl/stochastic) to `false` on +[LibtorchDRLControl.md] or replace it with [LibtorchNeuralNetControl](source/libtorch/controls/LibtorchNeuralNetControl.md) -to evaluate the final version of the neural network -without the additional randomization. By doing this, the following results are obtained: +for plain deterministic inference. In this example we use the latter, and the +following results are obtained: !plot scatter id=results caption=The evolution of the room temperature at the sensor over the day. diff --git a/modules/stochastic_tools/doc/content/source/libtorch/controls/LibtorchDRLControl.md b/modules/stochastic_tools/doc/content/source/libtorch/controls/LibtorchDRLControl.md index 1a1ad7c057a3..ef7038e3d0c5 100644 --- a/modules/stochastic_tools/doc/content/source/libtorch/controls/LibtorchDRLControl.md +++ b/modules/stochastic_tools/doc/content/source/libtorch/controls/LibtorchDRLControl.md @@ -5,10 +5,44 @@ ## Overview -This object controls a physical process using a neural network, just like [LibtorchNeuralNetControl](source/libtorch/controls/LibtorchNeuralNetControl.md), -with an additional functionality of randomizing the action values to avoid overfitting in the control process. -This control object is supposed to be used in conjunction with [LibtorchDRLControlTrainer.md]. In other -cases when the neural network needs to be simply evaluated, the user is encouraged to use [LibtorchNeuralNetControl](source/libtorch/controls/LibtorchNeuralNetControl.md). +This object is the runtime policy executor paired with +[LibtorchDRLControlTrainer](source/libtorch/trainers/LibtorchDRLControlTrainer.md). +It extends +[LibtorchNeuralNetControl](source/libtorch/controls/LibtorchNeuralNetControl.md) +with stochastic policy sampling, action reuse, optional smoothing, and +restartable policy state. For deterministic execution of the same actor, set +[!param](/Controls/LibtorchDRLControl/stochastic) to `false`. Use +[LibtorchNeuralNetControl](source/libtorch/controls/LibtorchNeuralNetControl.md) +instead when a plain deterministic neural-net control object is preferred +without the DRL-specific execution features. + +## Execution Model + +`LibtorchDRLControl` only supports `TIMESTEP_BEGIN`. On each execution it reads +the current observation values, combines them with the stored history implied by +[!param](/Controls/LibtorchDRLControl/input_timesteps), and evaluates the actor +when a new policy action is needed. + +If [!param](/Controls/LibtorchDRLControl/stochastic) is `true`, the action is +sampled from the actor distribution and the corresponding log probabilities are +stored for PPO training. If it is `false`, the deterministic actor output is +used instead. The policy evaluation can be reused across multiple controller +executions with [!param](/Controls/LibtorchDRLControl/num_steps_in_period), so a +new action is only sampled every configured number of executions. + +The applied control can also be relaxed with +[!param](/Controls/LibtorchDRLControl/smoother): +\begin{equation} +u_t^{\mathrm{applied}} = u_{t-1}^{\mathrm{applied}} + +\alpha\left(u_t^{\mathrm{policy}} - u_{t-1}^{\mathrm{applied}}\right), +\end{equation} +where $\alpha$ is the `smoother` value. Setting `smoother = 1` applies the raw +policy action directly. + +The controller stores the observation history, smoothed signal, and the libtorch +CPU random-number-generator state as restartable data. This keeps stochastic +recovered runs aligned with uninterrupted runs, provided the same controller +state is recovered. !if! function=hasCapability('libtorch') diff --git a/modules/stochastic_tools/doc/content/source/libtorch/trainers/LibtorchDRLControlTrainer.md b/modules/stochastic_tools/doc/content/source/libtorch/trainers/LibtorchDRLControlTrainer.md index 7d5fcc1e1a54..659ed5b20691 100644 --- a/modules/stochastic_tools/doc/content/source/libtorch/trainers/LibtorchDRLControlTrainer.md +++ b/modules/stochastic_tools/doc/content/source/libtorch/trainers/LibtorchDRLControlTrainer.md @@ -5,8 +5,78 @@ ## Overview -This object is supposed to train a Deep Reinforcement Learning (DRL) controller -using the Proximal Policy Optimization (PPO) algorithm [!cite](schulman2017proximal). +This object trains an on-policy actor-critic controller using Proximal Policy +Optimization (PPO) [!cite](schulman2017proximal) together with generalized +advantage estimation (GAE) [!cite](schulman2015gae). Reporter trajectories are +assembled into transitions, flattened across samples, shuffled into mini-batches, +and then used to update separate actor and critic neural networks. + +## Algorithm Summary + +For `input_timesteps = H`, the observation passed to the actor and critic is the +stacked history +\begin{equation} +o_t = \left[s_t,\ s_{t-1},\ \ldots,\ s_{t-H+1}\right], +\end{equation} +where early missing history entries are filled with the earliest available +reporter value. + +Each collected trajectory is converted into the tuples +$(o_t, a_t, \log \pi_{\mathrm{old}}(a_t|o_t), r_t, o_{t+1})$. The +[!param](/Trainers/LibtorchDRLControlTrainer/shift_outputs) option aligns the +reported action and log-probability sequences with the reward sequence when the +control is applied at the beginning of a time step and the reward is measured at +the end of that step. The +[!param](/Trainers/LibtorchDRLControlTrainer/timestep_window) option can be used +to downsample long reporter trajectories before these transitions are assembled. + +The critic is first evaluated on $o_t$ and $o_{t+1}$, and the temporal-difference +residual is computed as +\begin{equation} +\delta_t = r_t + \gamma V_{\phi}(o_{t+1}) - V_{\phi}(o_t). +\end{equation} +The trainer then computes the reverse-time GAE recursion +\begin{equation} +\hat{A}_t = \delta_t + \gamma \lambda \hat{A}_{t+1}, +\end{equation} +with critic regression targets +\begin{equation} +\hat{V}_t = \hat{A}_t + V_{\phi}(o_t). +\end{equation} + +For each mini-batch, PPO uses the probability ratio +\begin{equation} +r_t(\theta) = \exp\left(\log \pi_{\theta}(a_t|o_t) - +\log \pi_{\mathrm{old}}(a_t|o_t)\right), +\end{equation} +and the actor objective implemented here is +\begin{equation} +L_{\mathrm{actor}} = +-\frac{1}{N}\sum_t \left[ +\min\left(r_t(\theta)\hat{A}_t, +\mathrm{clip}\left(r_t(\theta), 1-\epsilon, 1+\epsilon\right)\hat{A}_t\right) ++ c_H \mathcal{H}\left[\pi_{\theta}(\cdot|o_t)\right] +\right]. +\end{equation} +The critic is trained with a mean-squared-error loss, +\begin{equation} +L_{\mathrm{critic}} = \frac{1}{N}\sum_t +\left(V_{\phi}(o_t) - \hat{V}_t\right)^2. +\end{equation} + +If [!param](/Trainers/LibtorchDRLControlTrainer/standardize_advantage) is set, +each sampled mini-batch uses zero-mean, unit-variance advantages before the PPO +loss is evaluated. + +## Notes + +The actor embeds the configured input shifting, input scaling, and action scaling +directly into the neural-network module so that transferred and checkpointed +controllers operate in the same normalized coordinates used during training. + +The trainer updates the actor and critic with separate Adam optimizers and then +broadcasts the updated parameters so all MPI ranks hold the same networks after +each PPO update. ## Example Input File Syntax From ef3ee6e1599c7ab83db35db25feaa9c310490e56 Mon Sep 17 00:00:00 2001 From: Peter German Date: Tue, 28 Apr 2026 16:27:01 -0600 Subject: [PATCH 51/51] Add proper format. --- .../include/libtorch/utils/LibtorchArtificialNeuralNet.h | 1 - .../src/libtorch/utils/LibtorchArtificialNeuralNet.C | 1 - .../src/libtorch/reporters/DRLRewardReporter.C | 6 ++++-- .../src/libtorch/utils/LibtorchActorNeuralNet.C | 1 - .../src/libtorch/utils/LibtorchRLValueEstimator.C | 9 ++++----- 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h index 2239d1e07ed7..aa2a40eb25d4 100644 --- a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h +++ b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h @@ -184,7 +184,6 @@ void to_json(nlohmann::json & json, const Moose::LibtorchArtificialNeuralNet * c void loadLibtorchArtificialNeuralNetState(Moose::LibtorchArtificialNeuralNet & nn, const std::string & filename); - } template <> diff --git a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C index b0bbe31634b4..99175551d987 100644 --- a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C +++ b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C @@ -472,7 +472,6 @@ loadLibtorchArtificialNeuralNetState(Moose::LibtorchArtificialNeuralNet & nn, "\nTorchScript load error: ", scripted_error); } - } template <> diff --git a/modules/stochastic_tools/src/libtorch/reporters/DRLRewardReporter.C b/modules/stochastic_tools/src/libtorch/reporters/DRLRewardReporter.C index 1979a2840cbd..c91caf5736c9 100644 --- a/modules/stochastic_tools/src/libtorch/reporters/DRLRewardReporter.C +++ b/modules/stochastic_tools/src/libtorch/reporters/DRLRewardReporter.C @@ -31,8 +31,10 @@ DRLRewardReporter::DRLRewardReporter(const InputParameters & parameters) SurrogateModelInterface(this), _average_reward(declareValueByName("average_reward", REPORTER_MODE_ROOT)), _std_reward(declareValueByName("std_reward", REPORTER_MODE_ROOT)), - _sample_average_reward(declareValueByName>("sample_average_reward", REPORTER_MODE_ROOT)), - _sample_std_reward(declareValueByName>("sample_std_reward", REPORTER_MODE_ROOT)), + _sample_average_reward( + declareValueByName>("sample_average_reward", REPORTER_MODE_ROOT)), + _sample_std_reward( + declareValueByName>("sample_std_reward", REPORTER_MODE_ROOT)), _trainer(getSurrogateTrainer("drl_trainer_name")) { } diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C index 0a21edd88be5..036517259627 100644 --- a/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C @@ -285,7 +285,6 @@ LibtorchActorNeuralNet::forward(const torch::Tensor & x) output = torch::gelu(_weights[i]->forward(output)); else if (activation == "linear") output = _weights[i]->forward(output); - } return output; diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchRLValueEstimator.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLValueEstimator.C index a20484202ac2..dd87f584cf29 100644 --- a/modules/stochastic_tools/src/libtorch/utils/LibtorchRLValueEstimator.C +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLValueEstimator.C @@ -25,9 +25,8 @@ valueEstimatorMatrixToTensor(const std::vector> & rows) return {}; const auto num_columns = rows.front().size(); - auto tensor = torch::zeros( - {static_cast(rows.size()), static_cast(num_columns)}, - torch::TensorOptions().dtype(torch::kDouble)); + auto tensor = torch::zeros({static_cast(rows.size()), static_cast(num_columns)}, + torch::TensorOptions().dtype(torch::kDouble)); auto accessor = tensor.accessor(); for (const auto row_i : make_range(rows.size())) @@ -46,8 +45,8 @@ LibtorchRLValueEstimator::LibtorchRLValueEstimator(const Real discount_factor, } void -LibtorchRLValueEstimator::computeValueTargets(LibtorchRLTrajectoryBuffer & buffer, - Moose::LibtorchArtificialNeuralNet & value_network) const +LibtorchRLValueEstimator::computeValueTargets( + LibtorchRLTrajectoryBuffer & buffer, Moose::LibtorchArtificialNeuralNet & value_network) const { for (auto & trajectory : buffer.trajectories()) {