diff --git a/framework/doc/content/bib/moose.bib b/framework/doc/content/bib/moose.bib index 003a179ae1da..25a3b2529b42 100644 --- a/framework/doc/content/bib/moose.bib +++ b/framework/doc/content/bib/moose.bib @@ -45,13 +45,6 @@ @book{muller1995neural publisher={Springer Science \& Business Media} } -@article{schulman2017proximal, - title={Proximal policy optimization algorithms}, - author={Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg}, - journal={arXiv preprint arXiv:1707.06347}, - year={2017} -} - @article{tonks2012object, title={An object-oriented finite element framework for multiphysics phase field simulations}, author={Tonks, Michael R and Gaston, Derek and Millett, Paul C and Andr\v{s}, David and Talbot, Paul}, diff --git a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h index b6229008cce3..db05d7d8d254 100644 --- a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h +++ b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h @@ -12,6 +12,7 @@ #pragma once #include "LibtorchArtificialNeuralNet.h" +#include "LibtorchObservationHistoryHelper.h" #include "Control.h" /** @@ -30,6 +31,9 @@ class LibtorchNeuralNetControl : public Control /// Construct using input parameters LibtorchNeuralNetControl(const InputParameters & parameters); + /// Load any file-backed controller state after full object construction + virtual void initialSetup() override; + /// Execute neural network to determine the controllable parameter values virtual void execute() override; @@ -48,13 +52,18 @@ class LibtorchNeuralNetControl : public Control * when copying the neural network from a main app which trains it. * @param input_nn Reference to a neural network which will be copied into this object */ - void loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn); + virtual void loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn); + + /** + * Load the controller neural network from the configured checkpoint file. + */ + virtual void loadControlNeuralNetFromFile(); /// Return a reference to the stored neural network const Moose::LibtorchNeuralNetBase & controlNeuralNet() const; /// Return true if the object already has a neural netwok - bool hasControlNeuralNet() const { return (_nn != NULL); }; + bool hasControlNeuralNet() const { return _nn != nullptr; }; protected: /** @@ -68,41 +77,44 @@ class LibtorchNeuralNetControl : public Control const std::vector & conditional_param, bool should_be_defined = true); - /// Function that updates the values of the current response - void updateCurrentResponse(); + /// Refresh the current observation values from the linked postprocessors. + void updateCurrentObservation(); /// Function that prepares the input tensor for the controller neural network torch::Tensor prepareInputTensor(); /// The values of the current observed postprocessor values - std::vector _current_response; - /// This variable is populated if the controller needs acess to older values of the + std::vector _current_observation; + /// This variable is populated if the controller needs access to older values of the /// observed postprocessor values - std::vector> & _old_responses; + std::vector> & _old_observations; /// The names of the controllable parameters const std::vector & _control_names; - /// The control signals from the last evaluation of the controller - std::vector _current_control_signals; + /// The control signals from the last evaluation of the controller, saved for recover/restart. + std::vector & _current_control_signals; /// Names of the postprocessors which contain the observations of the system - const std::vector & _response_names; + const std::vector & _observation_names; - /// Links to the current response postprocessor values. This is necessary so that we can check + /// Links to the current observation postprocessor values. This is necessary so that we can check /// if the postprocessors exist. - std::vector _response_values; + std::vector _observation_values; /// Number of timesteps to use as input data from the reporters (this influences how many past - /// results are used, e.g. the size of _old_responses) + /// results are used, e.g. the size of _old_observations) const unsigned int _input_timesteps; - /// Shifting constants for the responses - const std::vector _response_shift_factors; - /// Scaling constants (multipliers) for the responses - const std::vector _response_scaling_factors; + /// Shifting constants for the observations + const std::vector _observation_shift_factors; + /// Scaling constants (multipliers) for the observations + const std::vector _observation_scaling_factors; /// Multipliers for the actions const std::vector _action_scaling_factors; + /// Shared observation history stacking and factor-expansion helper + const LibtorchObservationHistoryHelper _observation_history; + /// Pointer to the neural net object which is supposed to be used to control /// the parameter values. The controller owns this object, but it can be read /// from file or copied by a transfer. diff --git a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h index c995d61ed7d1..aa2a40eb25d4 100644 --- a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h +++ b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h @@ -13,6 +13,7 @@ #include #include +#include #include "LibtorchNeuralNetBase.h" #include "MooseError.h" #include "DataIO.h" @@ -22,16 +23,25 @@ namespace Moose { -// A class that describes a simple feed-forward neural net. +/** + * Simple feed-forward neural net with optional affine input and output scaling. + */ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeuralNetBase { public: /** - * Construct using input parameters - * @param name Name of the neural network - * @param num_inputs The number of input neurons/parameters - * @param num_neurons_per_layer Number of neurons per hidden layer - * @param num_outputs The number of output neurons + * Build a plain feed-forward neural network. + * @param name Name of the neural network module. + * @param num_inputs Number of input neurons or parameters. + * @param num_outputs Number of output neurons. + * @param num_neurons_per_layer Hidden-layer widths. + * @param activation_function Hidden-layer activation names. + * @param device_type Torch device used by the module. + * @param scalar_type Torch scalar type used by the module. + * @param build_on_construct Whether to build the torch modules right away. + * @param input_shift_factors Optional affine input shifts. + * @param input_scaling_factors Optional affine input scales. + * @param output_scaling_factors Optional output scaling factors. */ LibtorchArtificialNeuralNet(const std::string name, const unsigned int num_inputs, @@ -39,27 +49,31 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu const std::vector & num_neurons_per_layer, const std::vector & activation_function = {"relu"}, const torch::DeviceType device_type = torch::kCPU, - const torch::ScalarType scalar_type = torch::kDouble); + const torch::ScalarType scalar_type = torch::kDouble, + const bool build_on_construct = true, + const std::vector & input_shift_factors = {}, + const std::vector & input_scaling_factors = {}, + const std::vector & output_scaling_factors = {}); /** - * Copy construct an artificial neural network - * @param nn The neural network which needs to be copied + * Copy-construct a feed-forward neural network. + * @param nn Neural network to copy. + * @param build_on_construct Whether to rebuild the module structure during the copy. */ - LibtorchArtificialNeuralNet(const Moose::LibtorchArtificialNeuralNet & nn); + LibtorchArtificialNeuralNet(const Moose::LibtorchArtificialNeuralNet & nn, + const bool build_on_construct = true); /** - * Add layers to the neural network - * @param layer_name The name of the layer to be added - * @param parameters A map of parameter names and the corresponding values which - * describe the neural net layer architecture + * Add one linear layer to the network. + * @param layer_name Name of the layer to add. + * @param parameters Small parameter map that describes the layer shape. */ virtual void addLayer(const std::string & layer_name, const std::unordered_map & parameters); /** - * Overriding the forward substitution function for the neural network, unfortunately - * this cannot be const since it creates a graph in the background - * @param x Input tensor for the evaluation + * Run a forward pass through the network. + * @param x Input tensor for the evaluation. */ virtual torch::Tensor forward(const torch::Tensor & x) override; @@ -79,13 +93,61 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu torch::DeviceType deviceType() const { return _device_type; } /// Return the data type which is used by this neural network torch::ScalarType dataType() const { return _data_type; } + /// Return the affine input shift factors used before evaluation + const std::vector & inputShiftFactors() const { return _input_shift_factors; } + /// Return the affine input scaling factors used before evaluation + const std::vector & inputScalingFactors() const { return _input_scaling_factors; } + /// Return the output scaling factors applied after evaluation + const std::vector & outputScalingFactors() const { return _output_scaling_factors; } /// Construct the neural network - void constructNeuralNetwork(); + virtual void constructNeuralNetwork(); + + /// Update cached affine metadata vectors from the registered libtorch buffers. + void synchronizeAffineFactorsFromBuffers(); + + /** + * Map an activation name to the orthogonal-initialization gain we want to use. + * @param activation Activation name to look up. + */ + Real determineGain(const std::string & activation); + + /** + * Initialize the trainable weights and biases. + * @param generator Optional torch random-number generator used for reproducible initialization. + */ + virtual void initializeNeuralNetwork(c10::optional generator = c10::nullopt); /// Store the network architecture in a json file (for debugging, visualization) void store(nlohmann::json & json) const; protected: + /** + * Set affine metadata by either accepting the user values or filling defaults. + * @param factors User-provided affine factors. + * @param expected_size Expected number of entries. + * @param default_value Default value used when the vector is empty. + * @param factor_name Name used in error messages. + */ + static std::vector setAffineFactors(const std::vector & factors, + unsigned int expected_size, + Real default_value, + const std::string & factor_name); + + /// Initialize the registered affine metadata buffers used by serialization. + void initializeAffineBuffers(); + + /** + * Apply affine preprocessing to the raw input tensor. + * @param x Raw input tensor. + */ + virtual torch::Tensor preprocessInput(const torch::Tensor & x) const; + + /** + * Apply the configured output scaling to a network output tensor. + * @param y Raw network output tensor. + */ + virtual torch::Tensor scaleOutput(const torch::Tensor & y) const; + /// Name of the neural network const std::string _name; /// Submodules that hold linear operations and the corresponding @@ -104,10 +166,24 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu const torch::DeviceType _device_type; /// The data type used in this neural network const torch::ScalarType _data_type; + /// Affine preprocessing applied to the flattened input + std::vector _input_shift_factors; + /// Multiplicative affine preprocessing applied after shifting the input + std::vector _input_scaling_factors; + /// Multiplicative scaling applied after the network output is formed + std::vector _output_scaling_factors; + /// Registered libtorch buffer holding the affine input shifts + torch::Tensor _input_shift_tensor; + /// Registered libtorch buffer holding the affine input scaling factors + torch::Tensor _input_scale_tensor; + /// Registered libtorch buffer holding the output scaling factors + torch::Tensor _output_scale_tensor; }; void to_json(nlohmann::json & json, const Moose::LibtorchArtificialNeuralNet * const & network); +void loadLibtorchArtificialNeuralNetState(Moose::LibtorchArtificialNeuralNet & nn, + const std::string & filename); } template <> diff --git a/framework/include/libtorch/utils/LibtorchObservationHistoryHelper.h b/framework/include/libtorch/utils/LibtorchObservationHistoryHelper.h new file mode 100644 index 000000000000..97a19521a2db --- /dev/null +++ b/framework/include/libtorch/utils/LibtorchObservationHistoryHelper.h @@ -0,0 +1,85 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#pragma once + +#include "MooseTypes.h" + +#include + +/** + * Shared observation history stacking and factor-expansion logic for libtorch-based controls and + * trainers. + */ +class LibtorchObservationHistoryHelper +{ +public: + /** + * Build an observation-history helper for libtorch inputs. + * @param input_timesteps Number of timesteps to stack into each flattened input. + */ + LibtorchObservationHistoryHelper(unsigned int input_timesteps); + + /// Return the number of timesteps stacked into each flattened input. + unsigned int inputTimesteps() const { return _input_timesteps; } + + /** + * Fill the history buffer with copies of the current observation. + * @param observation Current observation. + * @param old_observations History buffer that stores previous observations. + */ + void initializeHistory(const std::vector & observation, + std::vector> & old_observations) const; + + /** + * Advance the history buffer by inserting the latest observation. + * @param observation Current observation. + * @param old_observations History buffer ordered from newest to oldest. + */ + void advanceHistory(const std::vector & observation, + std::vector> & old_observations) const; + + /** + * Repeat per-observation-entry factors across all stacked timesteps. + * @param observation_factors Per-entry factors for one observation vector. + */ + std::vector expandObservationFactors(const std::vector & observation_factors) const; + + /** + * Flatten the current observation together with its stored history. + * @param observation Current observation. + * @param old_observations History buffer ordered from newest to oldest. + */ + std::vector + stackCurrentObservation(const std::vector & observation, + const std::vector> & old_observations) const; + + /** + * Flatten one time slice of observation-component trajectories with causal history. + * This uses [component][time] because the trainer receives reporter data one observation + * component at a time, so keeping that layout avoids building an extra transposed + * [time][component] container before stacking. + * @param component_trajectories Observation trajectories indexed as [component][time]. + * @param time_index Time index to stack. + */ + std::vector + stackTrajectoryObservation(const std::vector> & component_trajectories, + unsigned int time_index) const; + +private: + /// Check that all observation-component trajectories have a consistent shape. + void validateTrajectoryShape(const std::vector> & component_trajectories) const; + + /// Number of timesteps stacked into each flattened observation. + const unsigned int _input_timesteps; +}; + +#endif diff --git a/framework/include/libtorch/utils/LibtorchRandomUtils.h b/framework/include/libtorch/utils/LibtorchRandomUtils.h new file mode 100644 index 000000000000..9ac469c57ced --- /dev/null +++ b/framework/include/libtorch/utils/LibtorchRandomUtils.h @@ -0,0 +1,43 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#pragma once + +#include + +#include "MooseTypes.h" + +namespace Moose +{ + +/// Create an owned CPU generator using libtorch's default seed behavior. +at::Generator makeLibtorchCPUGenerator(); + +/** + * Create an owned CPU generator with an explicit seed. + * @param seed Seed value passed to the libtorch CPU generator. + */ +at::Generator makeLibtorchCPUGenerator(uint64_t seed); + +/** + * Fill a tensor with a (semi) orthogonal matrix using the provided generator. + * This mirrors torch::nn::init::orthogonal_, but avoids the ambient default RNG. + * @param tensor Tensor to initialize in place. + * @param gain Scaling factor applied after the orthogonal initialization. + * @param generator Optional torch random-number generator used to sample the initialization. + */ +void orthogonalInitializeTensor(torch::Tensor & tensor, + Real gain = 1.0, + c10::optional generator = c10::nullopt); + +} // namespace Moose + +#endif diff --git a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C index 5502ce168647..9f3c8a39ce2f 100644 --- a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C +++ b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C @@ -23,59 +23,61 @@ LibtorchNeuralNetControl::validParams() InputParameters params = Control::validParams(); params.addClassDescription("Controls the value of multiple controllable input parameters using a " "Libtorch-based neural network."); - params.addRequiredParam>("parameters", - "The input parameter(s) to control."); + params.addRequiredParam>( + "parameters", "Controllable input parameters driven by the network."); params.addRequiredParam>( - "responses", "The responses (prostprocessors) which are used for the control."); + "observations", "Postprocessors used as the current observation vector."); params.addParam>( - "response_shift_factors", - "Constants which will be used to shift the response values. This is used for the " - "manipulation of the neural net inputs for better training efficiency."); + "observation_shift_factors", + {}, + "Optional offsets applied to the observation values before scaling."); params.addParam>( - "response_scaling_factors", - "Constants which will be used to multiply the shifted response values. This is used for " - "the manipulation of the neural net inputs for better training efficiency."); - params.addParam("filename", - "Define if the neural net is supposed to be loaded from a file."); + "observation_scaling_factors", + {}, + "Optional multipliers applied after shifting the observation values."); + params.addParam("filename", "Checkpoint file to load for the controller network."); params.addParam("torch_script_format", false, - "If we want to load the neural net using the torch-script format."); - params.addParam( + "Whether the checkpoint should be read as a scripted Torch module."); + params.addRangeCheckedParam( "input_timesteps", 1, - "Number of time steps to use in the input data, if larger than 1, " - "data from the previous timesteps will be used as well as inputs in the training."); - params.addParam>("num_neurons_per_layer", - "The number of neurons on each hidden layer."); + "1<=input_timesteps", + "Number of recent timesteps to stack into each network input."); + params.addParam>( + "num_neurons_per_layer", "Hidden-layer widths used when constructing the controller."); params.addParam>( "activation_function", std::vector({"relu"}), - "The type of activation functions to use. It is either one value " - "or one value per hidden layer."); + "Activation name for each hidden layer, or one shared value for all layers."); params.addParam>( "action_scaling_factors", - "Scale factor that multiplies the NN output to obtain a physically meaningful value."); + {}, + "Per-action scaling embedded in the controller outputs so saved checkpoints stay in " + "physical units."); return params; } LibtorchNeuralNetControl::LibtorchNeuralNetControl(const InputParameters & parameters) : Control(parameters), - _old_responses(declareRestartableData>>("old_responses")), + _old_observations(declareRestartableData>>("old_observations")), _control_names(getParam>("parameters")), - _current_control_signals(std::vector(_control_names.size(), 0.0)), - _response_names(getParam>("responses")), + _current_control_signals(declareRestartableData>( + "current_control_signals", std::vector(_control_names.size(), 0.0))), + _observation_names(getParam>("observations")), _input_timesteps(getParam("input_timesteps")), - _response_shift_factors(isParamValid("response_shift_factors") - ? getParam>("response_shift_factors") - : std::vector(_response_names.size(), 0.0)), - _response_scaling_factors(isParamValid("response_scaling_factors") - ? getParam>("response_scaling_factors") - : std::vector(_response_names.size(), 1.0)), - _action_scaling_factors(isParamValid("action_scaling_factors") + _observation_shift_factors(isParamSetByUser("observation_shift_factors") + ? getParam>("observation_shift_factors") + : std::vector(_observation_names.size(), 0.0)), + _observation_scaling_factors(isParamSetByUser("observation_scaling_factors") + ? getParam>("observation_scaling_factors") + : std::vector(_observation_names.size(), 1.0)), + _action_scaling_factors(isParamSetByUser("action_scaling_factors") ? getParam>("action_scaling_factors") - : std::vector(_control_names.size(), 1.0)) + : std::vector(_control_names.size(), 1.0)), + _observation_history(_input_timesteps) { // We first check if the input parameters make sense and throw errors if different parameter // combinations are not allowed @@ -83,14 +85,14 @@ LibtorchNeuralNetControl::LibtorchNeuralNetControl(const InputParameters & param {"num_neurons_per_layer", "activation_function"}, !getParam("torch_script_format")); - if (_response_names.size() != _response_shift_factors.size()) - paramError("response_shift_factors", - "The number of shift factors is not the same as the number of responses!"); + if (_observation_names.size() != _observation_shift_factors.size()) + paramError("observation_shift_factors", + "The number of shift factors is not the same as the number of observations!"); - if (_response_names.size() != _response_scaling_factors.size()) - paramError( - "response_scaling_factors", - "The number of normalization coefficients is not the same as the number of responses!"); + if (_observation_names.size() != _observation_scaling_factors.size()) + paramError("observation_scaling_factors", + "The number of normalization coefficients is not the same as the number of " + "observations!"); if (_control_names.size() != _action_scaling_factors.size()) paramError("action_scaling_factors", @@ -99,45 +101,68 @@ LibtorchNeuralNetControl::LibtorchNeuralNetControl(const InputParameters & param // We link to the postprocessor values so that we can fetch them any time. This also raises // errors if we don't have the postprocessors requested in the input. - for (unsigned int resp_i = 0; resp_i < _response_names.size(); ++resp_i) - _response_values.push_back(&getPostprocessorValueByName(_response_names[resp_i])); + for (unsigned int obs_i = 0; obs_i < _observation_names.size(); ++obs_i) + _observation_values.push_back(&getPostprocessorValueByName(_observation_names[obs_i])); +} + +void +LibtorchNeuralNetControl::initialSetup() +{ + // File-backed controllers are loaded after full construction so derived controls can override + // the loader without constructor-time type checks. + if (isParamSetByUser("filename")) + loadControlNeuralNetFromFile(); +} - // If the user wants to read the neural net from file, we do it. We can read it from a - // torchscript file, or we can create a shell and read back the parameters - if (isParamValid("filename")) +void +LibtorchNeuralNetControl::loadControlNeuralNetFromFile() +{ + const auto & filename = getParam("filename"); + if (getParam("torch_script_format")) + _nn = std::make_shared(filename); + else { - std::string filename = getParam("filename"); - if (getParam("torch_script_format")) - _nn = std::make_shared(filename); - else + unsigned int num_inputs = _observation_names.size() * _input_timesteps; + unsigned int num_outputs = _control_names.size(); + std::vector num_neurons_per_layer = + getParam>("num_neurons_per_layer"); + std::vector activation_functions = + isParamSetByUser("activation_function") + ? getParam>("activation_function") + : std::vector({"relu"}); + const auto input_shift_factors = + _observation_history.expandObservationFactors(_observation_shift_factors); + const auto input_scaling_factors = + _observation_history.expandObservationFactors(_observation_scaling_factors); + auto nn = std::make_shared(filename, + num_inputs, + num_outputs, + num_neurons_per_layer, + activation_functions, + torch::kCPU, + torch::kDouble, + true, + input_shift_factors, + input_scaling_factors, + _action_scaling_factors); + + try { - unsigned int num_inputs = _response_names.size() * _input_timesteps; - unsigned int num_outputs = _control_names.size(); - std::vector num_neurons_per_layer = - getParam>("num_neurons_per_layer"); - std::vector activation_functions = - parameters.isParamSetByUser("activation_function") - ? getParam>("activation_function") - : std::vector({"relu"}); - auto nn = std::make_shared( - filename, num_inputs, num_outputs, num_neurons_per_layer, activation_functions); - - try - { - torch::load(nn, filename); - _nn = std::make_shared(*nn); - } - catch (const c10::Error & e) - { - mooseError( - "The requested pytorch parameter file could not be loaded. This can either be the" - "result of the file not existing or a misalignment in the generated container and" - "the data in the file. Make sure the dimensions of the generated neural net are the" - "same as the dimensions of the parameters in the input file!\n", - e.msg()); - } + Moose::loadLibtorchArtificialNeuralNetState(*nn, filename); + _nn = std::make_shared(*nn); + } + catch (const c10::Error & e) + { + mooseError( + "The requested pytorch parameter file could not be loaded. This can either be the" + "result of the file not existing or a misalignment in the generated container and" + "the data in the file. Make sure the dimensions of the generated neural net are the" + "same as the dimensions of the parameters in the input file!\n", + e.msg()); } } + + execute(); } void @@ -146,14 +171,13 @@ LibtorchNeuralNetControl::execute() if (_nn) { const unsigned int n_controls = _control_names.size(); - const unsigned int num_old_timesteps = _input_timesteps - 1; - // Fetch current reporter values and populate _current_response - updateCurrentResponse(); + // Fetch current observation values from the linked postprocessors. + updateCurrentObservation(); // If this is the first timestep, we fill up the old values with the initial value - if (_old_responses.empty()) - _old_responses.assign(num_old_timesteps, _current_response); + if (_old_observations.empty()) + _observation_history.initializeHistory(_current_observation, _old_observations); // Organize the old an current solution into a tensor so we can evaluate the neural net torch::Tensor input_tensor = prepareInputTensor(); @@ -164,16 +188,14 @@ LibtorchNeuralNetControl::execute() _current_control_signals = {action.data_ptr(), action.data_ptr() + action.size(1)}; for (unsigned int control_i = 0; control_i < n_controls; ++control_i) { - // We scale the controllable value for physically meaningful control action setControllableValueByName(_control_names[control_i], - _current_control_signals[control_i] * - _action_scaling_factors[control_i]); + _current_control_signals[control_i]); } // We add the curent solution to the old solutions and move everything in there one step // backward - std::rotate(_old_responses.rbegin(), _old_responses.rbegin() + 1, _old_responses.rend()); - _old_responses[0] = _current_response; + if (_old_observations.size()) + _observation_history.advanceHistory(_current_observation, _old_observations); } } @@ -204,14 +226,11 @@ LibtorchNeuralNetControl::conditionalParameterError( } void -LibtorchNeuralNetControl::updateCurrentResponse() +LibtorchNeuralNetControl::updateCurrentObservation() { - // Gather the current response values from the reporters - _current_response.clear(); - - for (const auto & resp_i : index_range(_response_names)) - _current_response.push_back((*_response_values[resp_i] - _response_shift_factors[resp_i]) * - _response_scaling_factors[resp_i]); + _current_observation.resize(_observation_names.size()); + for (const auto obs_i : index_range(_observation_names)) + _current_observation[obs_i] = *_observation_values[obs_i]; } void @@ -223,14 +242,8 @@ LibtorchNeuralNetControl::loadControlNeuralNet(const Moose::LibtorchArtificialNe torch::Tensor LibtorchNeuralNetControl::prepareInputTensor() { - const unsigned int num_old_timesteps = _input_timesteps - 1; - - // We convert the standard vectors to libtorch tensors - std::vector raw_input(_current_response); - - for (const auto & step_i : make_range(num_old_timesteps)) - raw_input.insert(raw_input.end(), _old_responses[step_i].begin(), _old_responses[step_i].end()); - + auto raw_input = + _observation_history.stackCurrentObservation(_current_observation, _old_observations); torch::Tensor input_tensor; LibtorchUtils::vectorToTensor(raw_input, input_tensor); diff --git a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C index 58fb40e33cf6..99175551d987 100644 --- a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C +++ b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C @@ -10,7 +10,187 @@ #ifdef MOOSE_LIBTORCH_ENABLED #include "LibtorchArtificialNeuralNet.h" +#include "LibtorchRandomUtils.h" #include "MooseError.h" +#include "LibtorchUtils.h" + +#include + +namespace +{ + +/** + * Try to read one tensor from a plain libtorch archive. + * @param archive Archive being read. + * @param key Serialized tensor name. + * @param tensor Tensor that receives the loaded data. + * @return True when the tensor was found and loaded. + */ +bool +readArchiveTensor(torch::serialize::InputArchive & archive, + const std::string & key, + torch::Tensor & tensor) +{ + try + { + archive.read(key, tensor); + return true; + } + catch (const c10::Error &) + { + return false; + } +} + +/** + * Copy a stored tensor into an existing parameter or buffer. + * @param destination Tensor owned by the current module. + * @param source Tensor read from disk. + */ +void +copyTensor(torch::Tensor & destination, const torch::Tensor & source) +{ + destination.data().copy_(source.to(destination.options())); +} + +/// Return true for affine-metadata buffers that older checkpoints may omit. +bool +isOptionalArtificialNeuralNetBuffer(const std::string & key) +{ + return key == "input_shift" || key == "input_scale" || key == "output_scale"; +} + +/** + * Look up one named tensor in a torch named-parameter or named-buffer list. + * @param tensors Torch named tensor list. + * @param key Tensor name to search for. + * @param tensor Tensor that receives the match. + * @return True when the requested tensor exists. + */ +template +bool +findNamedTensor(const NamedTensorList & tensors, const std::string & key, torch::Tensor & tensor) +{ + for (const auto & entry : tensors) + if (entry.name == key) + { + tensor = entry.value; + return true; + } + + return false; +} + +/** + * Load ANN parameters and buffers from a plain libtorch archive. + * @param nn Neural network that receives the loaded state. + * @param filename Checkpoint file to read. + * @param error Human-readable error string filled on failure. + * @return True when the network was loaded successfully. + */ +bool +loadArtificialNeuralNetStateFromArchive(Moose::LibtorchArtificialNeuralNet & nn, + const std::string & filename, + std::string & error) +{ + try + { + torch::serialize::InputArchive archive; + archive.load_from(filename); + + for (auto & parameter : nn.named_parameters()) + { + torch::Tensor stored_tensor; + if (!readArchiveTensor(archive, parameter.key(), stored_tensor)) + { + error = "Missing serialized parameter: " + parameter.key(); + return false; + } + + copyTensor(parameter.value(), stored_tensor); + } + + for (auto & buffer : nn.named_buffers()) + { + torch::Tensor stored_tensor; + if (!readArchiveTensor(archive, buffer.key(), stored_tensor)) + { + if (isOptionalArtificialNeuralNetBuffer(buffer.key())) + continue; + + error = "Missing serialized buffer: " + buffer.key(); + return false; + } + + copyTensor(buffer.value(), stored_tensor); + } + + nn.synchronizeAffineFactorsFromBuffers(); + return true; + } + catch (const c10::Error & e) + { + error = e.msg(); + return false; + } +} + +/** + * Load ANN parameters and buffers from a scripted Torch module. + * @param nn Neural network that receives the loaded state. + * @param filename Checkpoint file to read. + * @param error Human-readable error string filled on failure. + * @return True when the network was loaded successfully. + */ +bool +loadArtificialNeuralNetStateFromTorchScript(Moose::LibtorchArtificialNeuralNet & nn, + const std::string & filename, + std::string & error) +{ + try + { + const auto scripted = torch::jit::load(filename); + const auto scripted_parameters = scripted.named_parameters(); + const auto scripted_buffers = scripted.named_buffers(); + + for (auto & parameter : nn.named_parameters()) + { + torch::Tensor stored_tensor; + if (!findNamedTensor(scripted_parameters, parameter.key(), stored_tensor)) + { + error = "Missing scripted parameter: " + parameter.key(); + return false; + } + + copyTensor(parameter.value(), stored_tensor); + } + + for (auto & buffer : nn.named_buffers()) + { + torch::Tensor stored_tensor; + if (!findNamedTensor(scripted_buffers, buffer.key(), stored_tensor)) + { + if (isOptionalArtificialNeuralNetBuffer(buffer.key())) + continue; + + error = "Missing scripted buffer: " + buffer.key(); + return false; + } + + copyTensor(buffer.value(), stored_tensor); + } + + nn.synchronizeAffineFactorsFromBuffers(); + return true; + } + catch (const c10::Error & e) + { + error = e.msg(); + return false; + } +} + +} // namespace namespace Moose { @@ -22,27 +202,40 @@ LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( const std::vector & num_neurons_per_layer, const std::vector & activation_function, const torch::DeviceType device_type, - const torch::ScalarType data_type) + const torch::ScalarType data_type, + const bool build_on_construct, + const std::vector & input_shift_factors, + const std::vector & input_scaling_factors, + const std::vector & output_scaling_factors) : _name(name), _num_inputs(num_inputs), _num_outputs(num_outputs), _num_neurons_per_layer(num_neurons_per_layer), - _activation_function(MultiMooseEnum("relu sigmoid elu gelu linear", "relu")), + _activation_function(MultiMooseEnum("relu sigmoid elu gelu linear tanh", "relu")), _device_type(device_type), - _data_type(data_type) + _data_type(data_type), + _input_shift_factors( + setAffineFactors(input_shift_factors, num_inputs, 0.0, "input_shift_factors")), + _input_scaling_factors( + setAffineFactors(input_scaling_factors, num_inputs, 1.0, "input_scaling_factors")), + _output_scaling_factors( + setAffineFactors(output_scaling_factors, num_outputs, 1.0, "output_scaling_factors")) { _activation_function = activation_function; + initializeAffineBuffers(); // Check if the number of activation functions matches the number of hidden layers if ((_activation_function.size() != 1) && (_activation_function.size() != _num_neurons_per_layer.size())) mooseError("The number of activation functions should be either one or the same as the number " "of hidden layers"); - constructNeuralNetwork(); + + if (build_on_construct) + constructNeuralNetwork(); } LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( - const Moose::LibtorchArtificialNeuralNet & nn) + const Moose::LibtorchArtificialNeuralNet & nn, const bool build_on_construct) : torch::nn::Module(), _name(nn.name()), _num_inputs(nn.numInputs()), @@ -50,16 +243,103 @@ LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet( _num_neurons_per_layer(nn.numNeuronsPerLayer()), _activation_function(nn.activationFunctions()), _device_type(nn.deviceType()), - _data_type(nn.dataType()) + _data_type(nn.dataType()), + _input_shift_factors(nn.inputShiftFactors()), + _input_scaling_factors(nn.inputScalingFactors()), + _output_scaling_factors(nn.outputScalingFactors()) { + initializeAffineBuffers(); // We construct the NN architecture - constructNeuralNetwork(); - // We fill it up with the current parameter values - const auto & from_params = nn.named_parameters(); - auto to_params = this->named_parameters(); - for (unsigned int param_i : make_range(from_params.size())) - to_params[param_i].value().data() = from_params[param_i].value().data().clone(); + if (build_on_construct) + { + constructNeuralNetwork(); + // We fill it up with the current parameter values + const auto & from_params = nn.named_parameters(); + auto to_params = this->named_parameters(); + for (unsigned int param_i : make_range(from_params.size())) + to_params[param_i].value().data() = from_params[param_i].value().data().clone(); + + const auto & from_buffers = nn.named_buffers(); + auto to_buffers = this->named_buffers(); + for (unsigned int buffer_i : make_range(from_buffers.size())) + to_buffers[buffer_i].value().data() = from_buffers[buffer_i].value().data().clone(); + } +} + +Real +LibtorchArtificialNeuralNet::determineGain(const std::string & activation) +{ + if (activation == "relu") + return std::sqrt(2); + if (activation == "tanh") + return 5.0 / 3.0; + + return 1.0; +} + +void +LibtorchArtificialNeuralNet::initializeNeuralNetwork(const c10::optional generator) +{ + for (unsigned int i = 0; i < numHiddenLayers(); ++i) + { + const auto & activation = + _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0]; + const Real gain = determineGain(activation); + Moose::orthogonalInitializeTensor(_weights[i]->weight, gain, generator); + torch::nn::init::zeros_(_weights[i]->bias); + } + + Moose::orthogonalInitializeTensor(_weights.back()->weight, 1.0, generator); + torch::nn::init::zeros_(_weights.back()->bias); +} + +std::vector +LibtorchArtificialNeuralNet::setAffineFactors(const std::vector & factors, + const unsigned int expected_size, + const Real default_value, + const std::string & factor_name) +{ + const auto resolved_factors = + factors.empty() ? std::vector(expected_size, default_value) : factors; + + if (resolved_factors.size() != expected_size) + mooseError("The number of ", factor_name, " entries must match ", expected_size, "."); + + return resolved_factors; +} + +void +LibtorchArtificialNeuralNet::initializeAffineBuffers() +{ + auto input_shift = _input_shift_factors; + LibtorchUtils::vectorToTensor(input_shift, _input_shift_tensor); + _input_shift_tensor = register_buffer( + "input_shift", _input_shift_tensor.transpose(0, 1).to(_data_type).to(_device_type)); + + auto input_scale = _input_scaling_factors; + LibtorchUtils::vectorToTensor(input_scale, _input_scale_tensor); + _input_scale_tensor = register_buffer( + "input_scale", _input_scale_tensor.transpose(0, 1).to(_data_type).to(_device_type)); + + auto output_scale = _output_scaling_factors; + LibtorchUtils::vectorToTensor(output_scale, _output_scale_tensor); + _output_scale_tensor = register_buffer( + "output_scale", _output_scale_tensor.transpose(0, 1).to(_data_type).to(_device_type)); +} + +void +LibtorchArtificialNeuralNet::synchronizeAffineFactorsFromBuffers() +{ + auto input_shift = _input_shift_tensor.detach().reshape({-1}).to(torch::kCPU).to(torch::kDouble); + LibtorchUtils::tensorToVector(input_shift, _input_shift_factors); + + auto input_scale = _input_scale_tensor.detach().reshape({-1}).to(torch::kCPU).to(torch::kDouble); + LibtorchUtils::tensorToVector(input_scale, _input_scaling_factors); + + auto output_scale = + _output_scale_tensor.detach().reshape({-1}).to(torch::kCPU).to(torch::kDouble); + LibtorchUtils::tensorToVector(output_scale, _output_scaling_factors); } void @@ -84,14 +364,28 @@ LibtorchArtificialNeuralNet::constructNeuralNetwork() _weights.back()->to(_device_type, _data_type); } +torch::Tensor +LibtorchArtificialNeuralNet::preprocessInput(const torch::Tensor & x) const +{ + torch::Tensor input(x); + if (_data_type != input.scalar_type()) + input = input.to(_data_type); + if (_device_type != input.device().type()) + input = input.to(_device_type); + + return (input - _input_shift_tensor) * _input_scale_tensor; +} + +torch::Tensor +LibtorchArtificialNeuralNet::scaleOutput(const torch::Tensor & y) const +{ + return y * _output_scale_tensor; +} + torch::Tensor LibtorchArtificialNeuralNet::forward(const torch::Tensor & x) { - torch::Tensor output(x); - if (_data_type != output.scalar_type()) - output.to(_data_type); - if (_device_type != output.device().type()) - output.to(_device_type); + torch::Tensor output = preprocessInput(x); for (unsigned int i = 0; i < _weights.size() - 1; ++i) { @@ -101,6 +395,8 @@ LibtorchArtificialNeuralNet::forward(const torch::Tensor & x) output = torch::relu(_weights[i]->forward(output)); else if (activation == "sigmoid") output = torch::sigmoid(_weights[i]->forward(output)); + else if (activation == "tanh") + output = torch::tanh(_weights[i]->forward(output)); else if (activation == "elu") output = torch::elu(_weights[i]->forward(output)); else if (activation == "gelu") @@ -109,9 +405,7 @@ LibtorchArtificialNeuralNet::forward(const torch::Tensor & x) output = _weights[i]->forward(output); } - output = _weights[_weights.size() - 1]->forward(output); - - return output; + return scaleOutput(_weights[_weights.size() - 1]->forward(output)); } void @@ -145,6 +439,10 @@ LibtorchArtificialNeuralNet::store(nlohmann::json & json) const named_params[param_i].value().data_ptr(), named_params[param_i].value().data_ptr() + named_params[param_i].value().numel()); } + + json["input_shift_factors"] = _input_shift_factors; + json["input_scaling_factors"] = _input_scaling_factors; + json["output_scaling_factors"] = _output_scaling_factors; } void @@ -154,6 +452,26 @@ to_json(nlohmann::json & json, const Moose::LibtorchArtificialNeuralNet * const network->store(json); } +void +loadLibtorchArtificialNeuralNetState(Moose::LibtorchArtificialNeuralNet & nn, + const std::string & filename) +{ + std::string archive_error; + if (loadArtificialNeuralNetStateFromArchive(nn, filename, archive_error)) + return; + + std::string scripted_error; + if (loadArtificialNeuralNetStateFromTorchScript(nn, filename, scripted_error)) + return; + + mooseError("The requested pytorch parameter file could not be loaded. This can either be the " + "result of the file not existing or a misalignment in the generated container and " + "the data in the file. Make sure the dimensions of the generated neural net are the " + "same as the dimensions of the parameters in the input file!\nArchive load error: ", + archive_error, + "\nTorchScript load error: ", + scripted_error); +} } template <> @@ -233,7 +551,7 @@ dataLoad( nn = std::make_shared( name, num_inputs, num_outputs, num_neurons_per_layer, activation_functions, divt, datt); - torch::load(nn, name); + Moose::loadLibtorchArtificialNeuralNetState(*nn, name); } template <> diff --git a/framework/src/libtorch/utils/LibtorchObservationHistoryHelper.C b/framework/src/libtorch/utils/LibtorchObservationHistoryHelper.C new file mode 100644 index 000000000000..a98d931d71e1 --- /dev/null +++ b/framework/src/libtorch/utils/LibtorchObservationHistoryHelper.C @@ -0,0 +1,133 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "LibtorchObservationHistoryHelper.h" + +#include "MooseError.h" + +#include +#include "libmesh/utility.h" + +LibtorchObservationHistoryHelper::LibtorchObservationHistoryHelper( + const unsigned int input_timesteps) + : _input_timesteps(input_timesteps) +{ + if (_input_timesteps == 0) + mooseError("Observation history requires at least one input timestep."); +} + +void +LibtorchObservationHistoryHelper::validateTrajectoryShape( + const std::vector> & component_trajectories) const +{ + if (component_trajectories.empty()) + return; + + const auto trajectory_size = component_trajectories.front().size(); + for (const auto & trajectory : component_trajectories) + if (trajectory.size() != trajectory_size) + mooseError("Observation trajectories must all have the same number of timesteps."); +} + +void +LibtorchObservationHistoryHelper::initializeHistory( + const std::vector & observation, std::vector> & old_observations) const +{ + old_observations.assign(_input_timesteps - 1, observation); +} + +void +LibtorchObservationHistoryHelper::advanceHistory( + const std::vector & observation, std::vector> & old_observations) const +{ + if (old_observations.empty()) + return; + + std::rotate(old_observations.rbegin(), old_observations.rbegin() + 1, old_observations.rend()); + old_observations[0] = observation; +} + +std::vector +LibtorchObservationHistoryHelper::expandObservationFactors( + const std::vector & observation_factors) const +{ + if (observation_factors.empty()) + return {}; + + std::vector expanded; + expanded.reserve(observation_factors.size() * _input_timesteps); + + for (const auto lag : make_range(_input_timesteps)) + { + libmesh_ignore(lag); + expanded.insert(expanded.end(), observation_factors.begin(), observation_factors.end()); + } + + return expanded; +} + +std::vector +LibtorchObservationHistoryHelper::stackCurrentObservation( + const std::vector & observation, + const std::vector> & old_observations) const +{ + const auto expected_history_size = _input_timesteps - 1; + if (old_observations.size() != expected_history_size) + mooseError("Observation history must contain ", + expected_history_size, + " stored entries, but got ", + old_observations.size(), + "."); + + std::vector stacked; + stacked.reserve(observation.size() * _input_timesteps); + + stacked.insert(stacked.end(), observation.begin(), observation.end()); + + for (const auto & history_entry : old_observations) + { + if (history_entry.size() != observation.size()) + mooseError("Observation history entries must have the same size as the current " + "observation."); + stacked.insert(stacked.end(), history_entry.begin(), history_entry.end()); + } + + return stacked; +} + +std::vector +LibtorchObservationHistoryHelper::stackTrajectoryObservation( + const std::vector> & component_trajectories, + const unsigned int time_index) const +{ + validateTrajectoryShape(component_trajectories); + + if (component_trajectories.empty()) + return {}; + + const auto trajectory_size = component_trajectories.front().size(); + if (time_index >= trajectory_size) + mooseError("Requested observation time index is out of range."); + + std::vector stacked; + stacked.reserve(component_trajectories.size() * _input_timesteps); + + for (const auto lag : make_range(_input_timesteps)) + { + const auto source_index = time_index > lag ? time_index - lag : 0; + for (const auto component_i : make_range(component_trajectories.size())) + stacked.push_back(component_trajectories[component_i][source_index]); + } + + return stacked; +} + +#endif diff --git a/framework/src/libtorch/utils/LibtorchRandomUtils.C b/framework/src/libtorch/utils/LibtorchRandomUtils.C new file mode 100644 index 000000000000..f1c18b349dc1 --- /dev/null +++ b/framework/src/libtorch/utils/LibtorchRandomUtils.C @@ -0,0 +1,71 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "LibtorchRandomUtils.h" + +#include +#include +#include + +#include "MooseError.h" + +namespace Moose +{ + +at::Generator +makeLibtorchCPUGenerator() +{ + return at::detail::createCPUGenerator(); +} + +at::Generator +makeLibtorchCPUGenerator(const uint64_t seed) +{ + return at::detail::createCPUGenerator(seed); +} + +void +orthogonalInitializeTensor(torch::Tensor & tensor, + const Real gain, + const c10::optional generator) +{ + if (tensor.ndimension() < 2) + mooseError("Only tensors with 2 or more dimensions are supported for orthogonal " + "initialization."); + + if (!tensor.numel()) + return; + + torch::NoGradGuard no_grad; + + const auto rows = tensor.size(0); + const auto cols = tensor.numel() / rows; + auto flattened = torch::empty({rows, cols}, tensor.options()); + at::normal_out(flattened, 0.0, 1.0, {rows, cols}, generator); + + if (rows < cols) + flattened = flattened.transpose(0, 1); + + auto qr = at::linalg_qr(flattened, "reduced"); + auto q = std::get<0>(qr); + const auto phases = torch::diag(std::get<1>(qr), 0).sign(); + q = q * phases; + + if (rows < cols) + q = q.transpose(0, 1); + + tensor.view_as(q).copy_(q); + tensor.mul_(gain); +} + +} // namespace Moose + +#endif diff --git a/framework/src/postprocessors/PointValue.C b/framework/src/postprocessors/PointValue.C index 8abe943b9f4c..3b9cf803f65e 100644 --- a/framework/src/postprocessors/PointValue.C +++ b/framework/src/postprocessors/PointValue.C @@ -50,7 +50,6 @@ void PointValue::execute() { _value = _system.point_value(_var_number, _point, false); - /** * If we get exactly zero, we don't know if the locator couldn't find an element, or * if the solution is truly zero, more checking is needed. diff --git a/modules/stochastic_tools/doc/content/bib/stochastic_tools.bib b/modules/stochastic_tools/doc/content/bib/stochastic_tools.bib index 9af1f07f1888..c321338e7187 100644 --- a/modules/stochastic_tools/doc/content/bib/stochastic_tools.bib +++ b/modules/stochastic_tools/doc/content/bib/stochastic_tools.bib @@ -135,6 +135,20 @@ @article{kingma2014adam url={https://doi.org/10.48550/arXiv.1412.6980} } +@article{schulman2015gae, + title={High-dimensional continuous control using generalized advantage estimation}, + author={Schulman, John and Moritz, Philipp and Levine, Sergey and Jordan, Michael and Abbeel, Pieter}, + journal={arXiv preprint arXiv:1506.02438}, + year={2015} +} + +@article{schulman2017proximal, + title={Proximal policy optimization algorithms}, + author={Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg}, + journal={arXiv preprint arXiv:1707.06347}, + year={2017} +} + @article{li2016pss, title={Matlab codes of Subset Simulation for reliability analysis and structural optimization}, author={Li, H. S. and Cao, Z. J.}, @@ -363,4 +377,4 @@ @article{calderhead2014general pages = {17408--17413}, year = {2014}, doi = {10.1073/pnas.1408184111} -} \ No newline at end of file +} diff --git a/modules/stochastic_tools/doc/content/modules/stochastic_tools/examples/libtorch_drl_control.md b/modules/stochastic_tools/doc/content/modules/stochastic_tools/examples/libtorch_drl_control.md index 71e7a0517bfe..3bf2772eac80 100644 --- a/modules/stochastic_tools/doc/content/modules/stochastic_tools/examples/libtorch_drl_control.md +++ b/modules/stochastic_tools/doc/content/modules/stochastic_tools/examples/libtorch_drl_control.md @@ -5,6 +5,17 @@ The following example demonstrates how to set up a Proximal Policy Optimization Deep Reinforcement Learning (DRL) training sequence for neural-net-based controllers of MOOSE simulations. See [!cite](schulman2017proximal) for a more theoretical background on the PPO algorithm. +The detailed PPO and GAE equations used by the trainer are documented on +[LibtorchDRLControlTrainer](source/libtorch/trainers/LibtorchDRLControlTrainer.md). +At a high level, the trainer collects on-policy tuples +$(o_t, a_t, \log \pi_{\mathrm{old}}(a_t|o_t), r_t, o_{t+1})$, computes GAE +advantages [!cite](schulman2015gae), and updates separate actor and critic +networks with the PPO clipped objective [!cite](schulman2017proximal). The +paired [LibtorchDRLControl](source/libtorch/controls/LibtorchDRLControl.md) +executes the actor at `TIMESTEP_BEGIN`, stacks `input_timesteps` observations, +and can optionally reuse or smooth sampled actions before they are applied to +the physics model. + ## Problem Statement In this example we would like to design a DRL-based controller for the air conditioning of a @@ -80,9 +91,10 @@ using [LibtorchControlValuePostprocessor](source/libtorch/postprocessors/Libtorc [LibtorchDRLLogProbabilityPostprocessor](source/libtorch/postprocessors/LibtorchDRLLogProbabilityPostprocessor.md) as shown above. Furthermore, the additional [LibtorchNeuralNetControl](LibtorchNeuralNetControl.md) (`src_control_final`) -can be used to evaluate the neural network without the additional random -sampling process needed for the training process. In other words, this object will evaluate the -final product of this training process. +is optional and can be used to evaluate the trained network with a plain +deterministic control object. The same trained actor could also be executed +through [LibtorchDRLControl.md] with +[!param](/Controls/LibtorchDRLControl/stochastic) set to `false`. ### Main Application @@ -157,10 +169,12 @@ to balance these two factors by tuning the parameters in the `Trainer` and `Cont layout={'xaxis':{'type':'linear', 'title':'Number of simulations'}, 'yaxis':{'type':'linear','title':'Average Episodic Reward'}} -Following the training procedure, we can replace the [LibtorchDRLControl.md] object with +Following the training procedure, we can either set +[!param](/Controls/LibtorchDRLControl/stochastic) to `false` on +[LibtorchDRLControl.md] or replace it with [LibtorchNeuralNetControl](source/libtorch/controls/LibtorchNeuralNetControl.md) -to evaluate the final version of the neural network -without the additional randomization. By doing this, the following results are obtained: +for plain deterministic inference. In this example we use the latter, and the +following results are obtained: !plot scatter id=results caption=The evolution of the room temperature at the sensor over the day. diff --git a/modules/stochastic_tools/doc/content/source/libtorch/controls/LibtorchDRLControl.md b/modules/stochastic_tools/doc/content/source/libtorch/controls/LibtorchDRLControl.md index 1a1ad7c057a3..ef7038e3d0c5 100644 --- a/modules/stochastic_tools/doc/content/source/libtorch/controls/LibtorchDRLControl.md +++ b/modules/stochastic_tools/doc/content/source/libtorch/controls/LibtorchDRLControl.md @@ -5,10 +5,44 @@ ## Overview -This object controls a physical process using a neural network, just like [LibtorchNeuralNetControl](source/libtorch/controls/LibtorchNeuralNetControl.md), -with an additional functionality of randomizing the action values to avoid overfitting in the control process. -This control object is supposed to be used in conjunction with [LibtorchDRLControlTrainer.md]. In other -cases when the neural network needs to be simply evaluated, the user is encouraged to use [LibtorchNeuralNetControl](source/libtorch/controls/LibtorchNeuralNetControl.md). +This object is the runtime policy executor paired with +[LibtorchDRLControlTrainer](source/libtorch/trainers/LibtorchDRLControlTrainer.md). +It extends +[LibtorchNeuralNetControl](source/libtorch/controls/LibtorchNeuralNetControl.md) +with stochastic policy sampling, action reuse, optional smoothing, and +restartable policy state. For deterministic execution of the same actor, set +[!param](/Controls/LibtorchDRLControl/stochastic) to `false`. Use +[LibtorchNeuralNetControl](source/libtorch/controls/LibtorchNeuralNetControl.md) +instead when a plain deterministic neural-net control object is preferred +without the DRL-specific execution features. + +## Execution Model + +`LibtorchDRLControl` only supports `TIMESTEP_BEGIN`. On each execution it reads +the current observation values, combines them with the stored history implied by +[!param](/Controls/LibtorchDRLControl/input_timesteps), and evaluates the actor +when a new policy action is needed. + +If [!param](/Controls/LibtorchDRLControl/stochastic) is `true`, the action is +sampled from the actor distribution and the corresponding log probabilities are +stored for PPO training. If it is `false`, the deterministic actor output is +used instead. The policy evaluation can be reused across multiple controller +executions with [!param](/Controls/LibtorchDRLControl/num_steps_in_period), so a +new action is only sampled every configured number of executions. + +The applied control can also be relaxed with +[!param](/Controls/LibtorchDRLControl/smoother): +\begin{equation} +u_t^{\mathrm{applied}} = u_{t-1}^{\mathrm{applied}} + +\alpha\left(u_t^{\mathrm{policy}} - u_{t-1}^{\mathrm{applied}}\right), +\end{equation} +where $\alpha$ is the `smoother` value. Setting `smoother = 1` applies the raw +policy action directly. + +The controller stores the observation history, smoothed signal, and the libtorch +CPU random-number-generator state as restartable data. This keeps stochastic +recovered runs aligned with uninterrupted runs, provided the same controller +state is recovered. !if! function=hasCapability('libtorch') diff --git a/modules/stochastic_tools/doc/content/source/libtorch/trainers/LibtorchDRLControlTrainer.md b/modules/stochastic_tools/doc/content/source/libtorch/trainers/LibtorchDRLControlTrainer.md index 7d5fcc1e1a54..659ed5b20691 100644 --- a/modules/stochastic_tools/doc/content/source/libtorch/trainers/LibtorchDRLControlTrainer.md +++ b/modules/stochastic_tools/doc/content/source/libtorch/trainers/LibtorchDRLControlTrainer.md @@ -5,8 +5,78 @@ ## Overview -This object is supposed to train a Deep Reinforcement Learning (DRL) controller -using the Proximal Policy Optimization (PPO) algorithm [!cite](schulman2017proximal). +This object trains an on-policy actor-critic controller using Proximal Policy +Optimization (PPO) [!cite](schulman2017proximal) together with generalized +advantage estimation (GAE) [!cite](schulman2015gae). Reporter trajectories are +assembled into transitions, flattened across samples, shuffled into mini-batches, +and then used to update separate actor and critic neural networks. + +## Algorithm Summary + +For `input_timesteps = H`, the observation passed to the actor and critic is the +stacked history +\begin{equation} +o_t = \left[s_t,\ s_{t-1},\ \ldots,\ s_{t-H+1}\right], +\end{equation} +where early missing history entries are filled with the earliest available +reporter value. + +Each collected trajectory is converted into the tuples +$(o_t, a_t, \log \pi_{\mathrm{old}}(a_t|o_t), r_t, o_{t+1})$. The +[!param](/Trainers/LibtorchDRLControlTrainer/shift_outputs) option aligns the +reported action and log-probability sequences with the reward sequence when the +control is applied at the beginning of a time step and the reward is measured at +the end of that step. The +[!param](/Trainers/LibtorchDRLControlTrainer/timestep_window) option can be used +to downsample long reporter trajectories before these transitions are assembled. + +The critic is first evaluated on $o_t$ and $o_{t+1}$, and the temporal-difference +residual is computed as +\begin{equation} +\delta_t = r_t + \gamma V_{\phi}(o_{t+1}) - V_{\phi}(o_t). +\end{equation} +The trainer then computes the reverse-time GAE recursion +\begin{equation} +\hat{A}_t = \delta_t + \gamma \lambda \hat{A}_{t+1}, +\end{equation} +with critic regression targets +\begin{equation} +\hat{V}_t = \hat{A}_t + V_{\phi}(o_t). +\end{equation} + +For each mini-batch, PPO uses the probability ratio +\begin{equation} +r_t(\theta) = \exp\left(\log \pi_{\theta}(a_t|o_t) - +\log \pi_{\mathrm{old}}(a_t|o_t)\right), +\end{equation} +and the actor objective implemented here is +\begin{equation} +L_{\mathrm{actor}} = +-\frac{1}{N}\sum_t \left[ +\min\left(r_t(\theta)\hat{A}_t, +\mathrm{clip}\left(r_t(\theta), 1-\epsilon, 1+\epsilon\right)\hat{A}_t\right) ++ c_H \mathcal{H}\left[\pi_{\theta}(\cdot|o_t)\right] +\right]. +\end{equation} +The critic is trained with a mean-squared-error loss, +\begin{equation} +L_{\mathrm{critic}} = \frac{1}{N}\sum_t +\left(V_{\phi}(o_t) - \hat{V}_t\right)^2. +\end{equation} + +If [!param](/Trainers/LibtorchDRLControlTrainer/standardize_advantage) is set, +each sampled mini-batch uses zero-mean, unit-variance advantages before the PPO +loss is evaluated. + +## Notes + +The actor embeds the configured input shifting, input scaling, and action scaling +directly into the neural-network module so that transferred and checkpointed +controllers operate in the same normalized coordinates used during training. + +The trainer updates the actor and critic with separate Adam optimizers and then +broadcasts the updated parameters so all MPI ranks hold the same networks after +each PPO update. ## Example Input File Syntax diff --git a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i index 2dad8e5f504f..ff4f127ef0af 100644 --- a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i +++ b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i @@ -10,8 +10,8 @@ air_effective_k = 0.5 # W/(m K) xmax = 7.0 ymin = 0.0 ymax = 5.0 - nx = 35 - ny = 25 + nx = 20 + ny = 20 [] [] @@ -97,7 +97,7 @@ air_effective_k = 0.5 # W/(m K) type = FunctionValuePostprocessor function = reward_function execute_on = 'INITIAL TIMESTEP_END' - indirect_dependencies = 'center_temp_tend env_temp' + indirect_dependencies = 'center_temp_tend' [] [top_flux] type = LibtorchControlValuePostprocessor @@ -112,42 +112,22 @@ air_effective_k = 0.5 # W/(m K) [Reporters] [T_reporter] type = AccumulateReporter - reporters = 'center_temp_tend/value env_temp/value reward/value top_flux/value log_prob_top_flux/value' + reporters = 'center_temp_tend/value reward/value top_flux/value log_prob_top_flux/value' [] [] [Controls] - inactive = 'src_control_final' [src_control] type = LibtorchDRLControl parameters = "BCs/top_flux/value" - responses = 'center_temp_tend env_temp' + observations = 'center_temp_tend' # keep consistent with LibtorchDRLControlTrainer - input_timesteps = 2 - response_scaling_factors = '0.03 0.03' - response_shift_factors = '290 290' - action_standard_deviations = '0.02' - action_scaling_factors = 200 - - execute_on = 'TIMESTEP_BEGIN' - [] - [src_control_final] - type = LibtorchNeuralNetControl - - filename = 'mynet_control.net' - num_neurons_per_layer = '16 6' - activation_function = 'relu' - - parameters = "BCs/top_flux/value" - responses = 'center_temp_tend env_temp' - - # keep consistent with LibtorchDRLControlTrainer - input_timesteps = 2 - response_scaling_factors = '0.03 0.03' - response_shift_factors = '290 290' - action_standard_deviations = '0.02' - action_scaling_factors = 200 + input_timesteps = 1 + observation_scaling_factors = '0.03' + observation_shift_factors = '290' + action_scaling_factors = 20 + stochastic = true execute_on = 'TIMESTEP_BEGIN' [] @@ -157,21 +137,17 @@ air_effective_k = 0.5 # W/(m K) type = Transient solve_type = 'NEWTON' - petsc_options_iname = '-pc_type -pc_factor_shift_type' - petsc_options_value = 'lu NONZERO' + petsc_options_iname = '-pc_type -pc_hypre_type' + petsc_options_value = 'hypre boomeramg' line_search = 'none' - nl_rel_tol = 1e-7 + nl_rel_tol = 1e-6 start_time = 0.0 end_time = 86400 - dt = 900.0 + dt = ${fparse 86400/80} [] [Outputs] console = false - [c] - type = CSV - execute_on = FINAL - [] [] diff --git a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i index 341a251214d3..e09ee3eb98c5 100644 --- a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i +++ b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i @@ -13,63 +13,76 @@ type = SamplerFullSolveMultiApp sampler = dummy input_files = 'libtorch_drl_control_sub.i' + mode = batch-reset [] [] [Transfers] [nn_transfer] - type = LibtorchNeuralNetControlTransfer + type = SamplerDRLControlTransfer to_multi_app = runner trainer_name = nn_trainer control_name = src_control + sampler = dummy [] [r_transfer] - type = MultiAppReporterTransfer + type = SamplerReporterTransfer from_multi_app = runner - to_reporters = 'results/center_temp results/env_temp results/reward results/top_flux results/log_prob_top_flux' - from_reporters = 'T_reporter/center_temp_tend:value T_reporter/env_temp:value T_reporter/reward:value T_reporter/top_flux:value T_reporter/log_prob_top_flux:value' + sampler = dummy + stochastic_reporter = storage + from_reporter = 'T_reporter/center_temp_tend:value T_reporter/reward:value T_reporter/top_flux:value T_reporter/log_prob_top_flux:value' [] [] [Trainers] [nn_trainer] type = LibtorchDRLControlTrainer - response = 'results/center_temp results/env_temp' - control = 'results/top_flux' - log_probability = 'results/log_prob_top_flux' - reward = 'results/reward' + observation = 'storage/r_transfer:T_reporter:center_temp_tend:value' + control = 'storage/r_transfer:T_reporter:top_flux:value' + log_probability = 'storage/r_transfer:T_reporter:log_prob_top_flux:value' + reward = 'storage/r_transfer:T_reporter:reward:value' - num_epochs = 1000 - update_frequency = 10 - decay_factor = 0.0 + num_epochs = 50 + update_frequency = 1 + decay_factor = 0.8 + lambda_factor = 1.0 loss_print_frequency = 10 - critic_learning_rate = 0.0001 - num_critic_neurons_per_layer = '64 27' + critic_learning_rate = 0.001 + num_critic_neurons_per_layer = '32 16' + critic_activation_functions = 'relu relu' + + control_learning_rate = 0.001 + num_control_neurons_per_layer = '32 16' + control_activation_functions = 'relu relu' - control_learning_rate = 0.0005 - num_control_neurons_per_layer = '16 6' # keep consistent with LibtorchNeuralNetControl - input_timesteps = 2 - response_scaling_factors = '0.03 0.03' - response_shift_factors = '290 290' - action_standard_deviations = '0.02' + input_timesteps = 1 + observation_scaling_factors = '0.03' + observation_shift_factors = '290' + action_scaling_factors = 20 standardize_advantage = true + batch_size = 80 + read_from_file = false + + entropy_coeff = 0.0 + + # min_control_value = ${fparse -0.1} + # max_control_value = ${fparse 0.1} + [] [] [Reporters] - [results] - type = ConstantReporter - real_vector_names = 'center_temp env_temp reward top_flux log_prob_top_flux' - real_vector_values = '0; 0; 0; 0; 0' - outputs = csv - execute_on = timestep_begin + [storage] + type = StochasticReporter + parallel_type = ROOT + outputs = none [] [reward] type = DRLRewardReporter @@ -79,11 +92,12 @@ [Executioner] type = Transient - num_steps = 440 + num_steps = 200 [] [Outputs] file_base = output/train_out - csv = true - time_step_interval = 10 + json = true + time_step_interval = 1 + execute_on = TIMESTEP_END [] diff --git a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h index fbfccd9eafcf..e6c8074935db 100644 --- a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h +++ b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h @@ -11,7 +11,9 @@ #pragma once -#include "LibtorchArtificialNeuralNet.h" +#include + +#include "LibtorchActorNeuralNet.h" #include "LibtorchNeuralNetControl.h" /** @@ -29,6 +31,9 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl /// Construct using input parameters LibtorchDRLControl(const InputParameters & parameters); + /// Restore any restartable controller state after base setup completes. + virtual void initialSetup() override; + /// We compute the actions in this function together with the corresponding logarithmic probabilities. virtual void execute() override; @@ -39,25 +44,53 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl */ Real getSignalLogProbability(const unsigned int signal_index) const; -protected: /** - * Function which computes the logarithmic probability of given actions. - * @param action The tensor containing the perturbed control signals (also known as the action of - * the controller) - * @param output_tensor The expected value of the signals predicted by the neural net - * @return The logarithmic probability of the action with respect to the neural net prediction + * Copy an actor network into this DRL controller. + * @param input_nn Actor network to copy into the controller. */ - torch::Tensor computeLogProbability(const torch::Tensor & action, - const torch::Tensor & output_tensor); + void loadControlNeuralNet(const Moose::LibtorchActorNeuralNet & input_nn); + + virtual void loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn) override; + + virtual void loadControlNeuralNetFromFile() override; + + /// Reset the owned policy-sampling generator to a known seed. + void setPolicySampleSeed(uint64_t seed); +protected: /// The log probability of control signals from the last evaluation of the controller - std::vector _current_control_signal_log_probabilities; + std::vector & _current_control_signal_log_probabilities; + + /// The smoothed control signal from the previous execution, saved for restart/recover. + std::vector & _previous_control_signal; + /// The current smoothed control signal applied to the controllable parameters. + std::vector & _current_smoothed_signal; + + /// Actor network used when the controller operates as a stochastic policy. + std::shared_ptr _actor_nn; + /// Owned libtorch CPU generator used for policy sampling. + at::Generator _policy_generator; + /// Restartable serialized state for the owned policy-sampling generator. + std::vector & _policy_generator_state; + + /// Number of controller executions remaining before the next policy evaluation. + unsigned int & _executions_until_next_policy_evaluation; + /// Number of controller executions between policy evaluations. + const unsigned int _num_steps_in_period; + /// Relaxation factor applied while smoothing control updates. + const Real _smoother; + /// Whether to sample actions stochastically instead of using the deterministic actor output. + const bool _stochastic; + +private: + /// Advance the reuse schedule and report whether this execution should evaluate the policy. + bool shouldEvaluatePolicy(); - /// Standard deviation for the actions, supplied by the user - const std::vector _action_std; + /// Restore the owned libtorch generator state from restartable storage. + void restorePolicyGeneratorState(); - /// Standard deviations converted to a 2D diagonal tensor that can be used by Libtorch routines. - torch::Tensor _std; + /// Mirror the owned libtorch generator state into restartable storage. + void savePolicyGeneratorState(); }; #endif diff --git a/modules/stochastic_tools/include/libtorch/reporters/DRLRewardReporter.h b/modules/stochastic_tools/include/libtorch/reporters/DRLRewardReporter.h index d9792772de3a..2482e7166a9d 100644 --- a/modules/stochastic_tools/include/libtorch/reporters/DRLRewardReporter.h +++ b/modules/stochastic_tools/include/libtorch/reporters/DRLRewardReporter.h @@ -27,7 +27,17 @@ class DRLRewardReporter : public GeneralReporter, public SurrogateModelInterface protected: /// The reward values which will be saved - Real & _reward; + Real & _average_reward; + + /// The standard deviation of the reward values which will be saved + Real & _std_reward; + + /// The per-sample average reward values which will be saved + std::vector & _sample_average_reward; + + /// The per-sample reward standard deviations which will be saved + std::vector & _sample_std_reward; + /// The DRL trainer which computes the reward values LibtorchDRLControlTrainer & _trainer; }; diff --git a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h index 93bb35e8c1db..30cf9041d316 100644 --- a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h +++ b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h @@ -12,105 +12,102 @@ #pragma once #include -#include "LibtorchArtificialNeuralNet.h" +#include "LibtorchActorNeuralNet.h" +#include "LibtorchObservationHistoryHelper.h" +#include "LibtorchRLMiniBatchSampler.h" +#include "LibtorchRLPPOLoss.h" +#include "LibtorchRLTrajectoryBuffer.h" +#include "LibtorchRLValueEstimator.h" #include "libmesh/utility.h" #include "SurrogateTrainer.h" /** - * This trainer is responsible for training neural networks that efficiently control - * different processes. It utilizes the Proximal Policy Optimization algorithms. For more - * information on the algorithm, see the following resources: Schulman, John, et al. "Proximal - * policy optimization algorithms." arXiv preprint arXiv:1707.06347 (2017). - * https://medium.com/analytics-vidhya/coding-ppo-from-scratch-with-pytorch-part-1-4-613dfc1b14c8 - * https://stable-baselines.readthedocs.io/en/master/modules/ppo2.html + * Fixed-horizon actor-critic trainer that collects trajectories from MOOSE reporters and runs a + * PPO update on top of reusable RL-core components (observation history, trajectory buffer, + * mini-batch sampler, value estimator, and PPO loss). */ class LibtorchDRLControlTrainer : public SurrogateTrainerBase { public: static InputParameters validParams(); - /// construct using input parameters + /** + * Build the PPO-based DRL trainer. + * @param parameters Input parameters for the trainer. + */ LibtorchDRLControlTrainer(const InputParameters & parameters); + /// Pull fresh rollout data from the reporters and trigger training when ready. virtual void execute() override; /** - * Function which returns the current average episodic reward. It is only updated - * at the end of every episode. + * Return the current average episodic reward. + * @return Average episodic reward over the latest training window. */ Real averageEpisodeReward() { return _average_episode_reward; } + /// Return the current episodic reward standard deviation. + Real stdEpisodeReward() { return _std_episode_reward; } - /// The condensed training function - void trainController(); - - const Moose::LibtorchArtificialNeuralNet & controlNeuralNet() const { return *_control_nn; } - -protected: - /// Compute the average eposiodic reward - void computeAverageEpisodeReward(); - - /** - * Function to convert input/output data from std::vector to torch::tensor - * @param vector_data The input data in vector-vectors format - * @param tensor_data The tensor where we would like to save the results - * @param detach If the gradient info needs to be detached from the tensor - */ - void convertDataToTensor(std::vector> & vector_data, - torch::Tensor & tensor_data, - const bool detach = false); + /// Return per-sample mean episodic rewards from the latest update window. + std::vector sampleAverageEpsiodeRewards() { return _sample_average_episode_reward; } + /// Return per-sample episodic reward standard deviations from the latest update window. + std::vector sampleStdEpsiodeRewards() { return _sample_std_episode_reward; } /** - * Function which evaluates the critic to get the value (discounter reward) - * @param input The observation values (responses) - * @return The estimated value + * Run the PPO update on a flattened on-policy batch. + * @param batch Flattened trajectory batch to train on. */ - torch::Tensor evaluateValue(torch::Tensor & input); + void trainController(const LibtorchRLTrajectoryBuffer::TensorBatch & batch); - /** - * Function which evaluates the control net and then computes the logarithmic probability of the - * action - * @param input The observation values (responses) - * @param output The actions corresponding to the observations - * @return The estimated value for the logarithmic probability - */ - torch::Tensor evaluateAction(torch::Tensor & input, torch::Tensor & output); + /// Return the current actor network. + const Moose::LibtorchActorNeuralNet & controlNeuralNet() const { return *_control_nn; } + /// Return the trainer seed used for sampling and shuffling. + unsigned int seed() const { return _seed; } - /// Compute the return value by discounting the rewards and summing them - void computeRewardToGo(); +protected: + /// Compute the average episodic reward statistics for the latest samples. + void computeEpisodeRewardStatistics(); - /// Reset data after updating the neural network + /// Reset the stored rollout data after an update. void resetData(); - /// Response reporter names - const std::vector _response_names; + /// Observation reporter names + const std::vector _state_names; - /// Pointers to the current values of the responses - std::vector *> _response_value_pointers; + /// Pointers to the current values of the observations + /// We can have multiple observations, multiple samples, multiple timesteps + std::vector> *> _state_value_pointers; - /// Shifting constants for the responses - const std::vector _response_shift_factors; + /// Shifting constants for the observations + const std::vector _state_shift_factors; - /// Scaling constants for the responses - const std::vector _response_scaling_factors; + /// Scaling constants for the observations + const std::vector _state_scaling_factors; /// Control reporter names - const std::vector _control_names; + const std::vector _action_names; + + /// Multiplicative action scaling embedded in the actor outputs + const std::vector _action_scaling_factors; /// Pointers to the current values of the control signals - std::vector *> _control_value_pointers; + /// We can have multiple control signals, multiple samples, multiple timesteps + std::vector> *> _action_value_pointers; /// Log probability reporter names const std::vector _log_probability_names; /// Pointers to the current values of the control log probabilities - std::vector *> _log_probability_value_pointers; + /// We can have multiple control signals, multiple samples, multiple timesteps + std::vector> *> _log_probability_value_pointers; /// Reward reporter name const ReporterName _reward_name; /// Pointer to the current values of the reward - const std::vector * _reward_value_pointer; + /// We can have multiple samples, multiple timesteps + const std::vector> * _reward_value_pointer; /// Number of timesteps to fetch from the reporters to be the input of then eural nets const unsigned int _input_timesteps; @@ -120,19 +117,6 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase /// Number of outputs for the control neural network unsigned int _num_outputs; - ///@{ - /// The gathered data from the reporters, each row represents one QoI, each column represents one time step - std::vector> _input_data; - std::vector> _output_data; - std::vector> _log_probability_data; - ///@} - - ///@{ - /// The reward and return data. The return is calculated using the _reward_data - std::vector _reward_data; - std::vector _return_data; - ///@} - /// Number of epochs for the training of the emulator const unsigned int _num_epochs; @@ -156,9 +140,8 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase /// Decaying factor that is used when calculating the return from the reward const Real _decay_factor; - - /// Standard deviation for the actions - const std::vector _action_std; + /// GAE lambda factor used while estimating advantages and returns. + const Real _lambda_factor; /// Name of the pytorch output file. This is used for loading and storing /// already existing data @@ -176,6 +159,13 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase /// Storage for the current average episode reward Real _average_episode_reward; + /// Storage for the current episode reward standard deviation + Real _std_episode_reward; + + /// Per-sample mean episodic rewards over the latest update window + std::vector _sample_average_episode_reward; + /// Per-sample episodic reward standard deviations over the latest update window + std::vector _sample_std_episode_reward; /// Switch to enable the standardization of the advantages const bool _standardize_advantage; @@ -183,55 +173,75 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase /// The frequency the loss should be printed const unsigned int _loss_print_frequency; + /// Base seed for stochastic optimizers and policy sampling. + const unsigned int _seed; + + /// Optional lower bounds for each control signal + std::vector _min_values; + /// Optional upper bounds for each control signal + std::vector _max_values; + /// Pointer to the control (or actor) neural net object - std::shared_ptr _control_nn; + std::shared_ptr _control_nn; /// Pointer to the critic neural net object std::shared_ptr _critic_nn; - /// standard deviation in a tensor format for sampling the actual control value - torch::Tensor _std; + /// Best average episode reward seen so far while training + Real _highest_reward; + /// Entropy bonus coefficient used in the PPO actor loss + Real _entropy_coeff; - /// Torch::tensor version of the input and action data - torch::Tensor _input_tensor; - torch::Tensor _output_tensor; - torch::Tensor _return_tensor; - torch::Tensor _log_probability_tensor; + /// Adam optimizer used to update the actor network + std::unique_ptr _actor_optimizer; + /// Adam optimizer used to update the critic network + std::unique_ptr _critic_optimizer; private: /** - * Extract the response values from the postprocessors of the controlled system. - * This assumes that they are stored in an AccumulateReporter - * @param data The data where we would like to store the response values - * @param reporter_names The names of the reporters which need to be extracted - * @param num_timesteps The number of timesteps we want to use for training + * Resolve reporter names into cached pointer storage. + * @param reporter_names Reporter names to look up. + * @param pointer_storage Output vector that receives the reporter pointers. */ - void getInputDataFromReporter(std::vector> & data, - const std::vector *> & reporter_links, - const unsigned int num_timesteps); + void getReporterPointers(const std::vector & reporter_names, + std::vector> *> & pointer_storage); + + /// Pull trajectories out of the reporters and append them to the trajectory buffer. + void collectTrajectoriesFromReporters(); + /** - * Extract the output (actions, logarithmic probabilities) values from the postprocessors - * of the controlled system. This assumes that they are stored in an AccumulateReporter - * @param data The data where we would like to store the output values - * @param reporter_names The names of the reporters which need to be extracted + * Figure out how many aligned transitions a raw reporter sequence contains. + * @param raw_sequence_size Number of raw time entries in the reporter sequence. + * @return Number of valid transitions after history stacking and downsampling. */ - void getOutputDataFromReporter(std::vector> & data, - const std::vector *> & reporter_links); + unsigned int computeNumTransitions(std::size_t raw_sequence_size) const; /** - * Extract the reward values from the postprocessors of the controlled system - * This assumes that they are stored in an AccumulateReporter. - * @param data The data where we would like to store the reward values - * @param reporter_names The name of the reporter which need to be extracted + * Downsample one raw reporter sequence into the aligned rollout sequence we train on. + * @param sample Raw reporter sequence. + * @param offset Starting offset used for the aligned sequence. + * @param num_entries Number of aligned entries to extract. + * @return Downsampled sequence. */ - void getRewardDataFromReporter(std::vector & data, - const std::vector * const reporter_link); - - /// Getting reporter pointers with given names - void getReporterPointers(const std::vector & reporter_names, - std::vector *> & pointer_storage); + std::vector extractDownsampledSequence(const std::vector & sample, + unsigned int offset, + unsigned int num_entries) const; /// Counter for number of transient simulations that have been run before updating the controller unsigned int _update_counter; + + /// Reporter downsampling stride used while assembling rollout trajectories + unsigned int _timestep_window; + + /// Shared observation history stacking and factor-expansion helper + const LibtorchObservationHistoryHelper _observation_history; + /// Accumulated on-policy rollout data waiting to be flattened and trained on + LibtorchRLTrajectoryBuffer _trajectory_buffer; + /// Mini-batch sampler used to split flattened rollout data for PPO updates + const LibtorchRLMiniBatchSampler _sampler; + /// Helper that builds value targets and advantages from collected trajectories + const LibtorchRLValueEstimator _value_estimator; + /// PPO loss helper for the actor and critic updates + const LibtorchRLPPOLoss _ppo_loss; }; #endif diff --git a/modules/stochastic_tools/include/libtorch/transfers/SamplerDRLControlTransfer.h b/modules/stochastic_tools/include/libtorch/transfers/SamplerDRLControlTransfer.h new file mode 100644 index 000000000000..0b97a480c159 --- /dev/null +++ b/modules/stochastic_tools/include/libtorch/transfers/SamplerDRLControlTransfer.h @@ -0,0 +1,56 @@ +//* This file is part of the MOOSE framework +//* https://www.mooseframework.org +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#pragma once + +// torch-based includes +#include "LibtorchDRLControlTrainer.h" + +#include "StochasticToolsTransfer.h" +#include "SurrogateModelInterface.h" + +class SamplerDRLControlTransfer : public StochasticToolsTransfer, public SurrogateModelInterface +{ +public: + static InputParameters validParams(); + + /** + * Build the transfer that pushes a trained controller into subapps. + * @param parameters Input parameters for the transfer. + */ + SamplerDRLControlTransfer(const InputParameters & parameters); + + /// Execute the transfer in the standard non-batch path. + virtual void execute() override; + + ///@{ + /** + * Methods used when running in batch mode (see SamplerFullSolveMultiApp). + */ + virtual void initialSetup() override; + virtual void initializeFromMultiapp() override; + virtual void executeFromMultiapp() override; + virtual void finalizeFromMultiapp() override; + + virtual void initializeToMultiapp() override; + virtual void executeToMultiapp() override; + virtual void finalizeToMultiapp() override; + ///@} + +protected: + /// The name of the control object on the other app where we want to copy our neural net + const std::string _control_name; + + /// The trainer object which will contains the control neural net + const LibtorchDRLControlTrainer & _trainer; +}; + +#endif diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h new file mode 100644 index 000000000000..e1e8f04b4b55 --- /dev/null +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h @@ -0,0 +1,269 @@ +//* This file is part of the MOOSE framework +//* https://www.mooseframework.org +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#pragma once + +#include + +#include "MooseTypes.h" + +#include +#include + +namespace Moose +{ + +/** + * Reusable continuous-action distribution interface for actor policies. + */ +class LibtorchActionDistribution : public torch::nn::Module +{ +public: + /** + * Build an action-distribution module for an actor network. + * @param name Module name used for registration and serialization. + * @param num_inputs Number of actor features feeding the distribution. + * @param num_outputs Number of action dimensions produced by the distribution. + * @param device_type Torch device used by the module. + * @param scalar_type Torch scalar type used by the module. + * @param output_scaling_factors Optional per-action scaling applied in physical units. + */ + LibtorchActionDistribution(const std::string & name, + unsigned int num_inputs, + unsigned int num_outputs, + torch::DeviceType device_type = torch::kCPU, + torch::ScalarType scalar_type = torch::kDouble, + const std::vector & output_scaling_factors = {}); + + /// Initialize the trainable distribution parameters. + virtual void initialize(c10::optional generator = c10::nullopt) = 0; + + /** + * Refresh cached distribution parameters from the latest actor features. + * @param input Feature tensor coming from the actor body. + */ + virtual void reset(const torch::Tensor & input) = 0; + + /** + * Draw a stochastic action sample in physical units. + * @param generator Optional random-number generator used for sampling. + */ + virtual torch::Tensor sample(c10::optional generator = c10::nullopt) const = 0; + + /// Return the deterministic action used for evaluation. + virtual torch::Tensor deterministicAction() const = 0; + + /** + * Evaluate the log-probability of an action under the current distribution. + * @param action Action tensor in physical units. + */ + virtual torch::Tensor logProbability(const torch::Tensor & action) const = 0; + + /// Compute the entropy of the current distribution. + virtual torch::Tensor entropy() const = 0; + + /// Tell callers whether the distribution enforces explicit action bounds. + virtual bool isBounded() const = 0; + + /// Sync cached scaling metadata from the registered buffers after loading state. + void synchronizeScalingFactorsFromBuffer(); + +protected: + /** + * Convert actor features to the configured device and scalar type. + * @param input Raw actor feature tensor. + */ + torch::Tensor prepareFeatures(const torch::Tensor & input) const; + /** + * Convert actions to the configured device and scalar type. + * @param action Raw action tensor. + */ + torch::Tensor prepareAction(const torch::Tensor & action) const; + /// Return the registered tensor that stores per-action scaling factors. + const torch::Tensor & actionScaleTensor() const { return _action_scale_tensor; } + + /// Module name used for registration and serialization. + const std::string _name; + /// Number of actor features feeding this distribution. + const unsigned int _num_inputs; + /// Number of action dimensions produced by this distribution. + const unsigned int _num_outputs; + /// Torch device used by this distribution. + const torch::DeviceType _device_type; + /// Torch scalar type used by this distribution. + const torch::ScalarType _data_type; + /// Cached per-action scaling factors applied in physical units. + std::vector _output_scaling_factors; + + /// Registered libtorch buffer holding the per-action scaling factors. + torch::Tensor _action_scale_tensor; +}; + +/** + * Gaussian action distribution for unbounded action spaces. + */ +class LibtorchGaussianActionDistribution : public LibtorchActionDistribution +{ +public: + /** + * Build the Gaussian action distribution used for unbounded controls. + * @param name Module name used for registration and serialization. + * @param num_inputs Number of actor features feeding the distribution. + * @param num_outputs Number of action dimensions produced by the distribution. + * @param device_type Torch device used by the module. + * @param scalar_type Torch scalar type used by the module. + * @param build_on_construct Whether to build the torch modules right away. + * @param output_scaling_factors Optional per-action scaling applied in physical units. + * @param state_independent_std Whether the learned std should ignore the current state. + */ + LibtorchGaussianActionDistribution(const std::string & name, + unsigned int num_inputs, + unsigned int num_outputs, + torch::DeviceType device_type = torch::kCPU, + torch::ScalarType scalar_type = torch::kDouble, + bool build_on_construct = true, + const std::vector & output_scaling_factors = {}, + bool state_independent_std = true); + + virtual void initialize(c10::optional generator = c10::nullopt) override; + + virtual void reset(const torch::Tensor & input) override; + + virtual torch::Tensor + sample(c10::optional generator = c10::nullopt) const override; + + virtual torch::Tensor deterministicAction() const override; + + virtual torch::Tensor logProbability(const torch::Tensor & action) const override; + + virtual torch::Tensor entropy() const override; + + virtual bool isBounded() const override { return false; } + + /// Return whether the Gaussian std ignores the current actor features. + bool stateIndependentStd() const { return _state_independent_std; } + /// Return the Gaussian mean head. + torch::nn::Linear & meanModule() { return _mean_module; } + /// Return the Gaussian mean head. + const torch::nn::Linear & meanModule() const { return _mean_module; } + /// Return the Gaussian std head. + torch::nn::Linear & stdModule() { return _std_module; } + /// Return the Gaussian std head. + const torch::nn::Linear & stdModule() const { return _std_module; } + /// Return the cached Gaussian standard deviation tensor. + const torch::Tensor & stdTensor() const { return _std_tensor; } + +private: + /// Build and register the Gaussian distribution heads. + void constructDistribution(); + + /// Whether the Gaussian std ignores the current actor features. + const bool _state_independent_std; + /// Linear head that produces the Gaussian action mean. + torch::nn::Linear _mean_module{nullptr}; + /// Linear head that produces the Gaussian log-std inputs or bias-only std state. + torch::nn::Linear _std_module{nullptr}; + /// Cached Gaussian action mean from the latest reset. + torch::Tensor _mean; + /// Cached Gaussian action standard deviation from the latest reset. + torch::Tensor _std_tensor; + /// Cached Gaussian action log standard deviation from the latest reset. + torch::Tensor _log_std_tensor; +}; + +/** + * Beta action distribution for bounded action spaces. + */ +class LibtorchBetaActionDistribution : public LibtorchActionDistribution +{ +public: + /** + * Build the Beta action distribution used for bounded controls. + * @param name Module name used for registration and serialization. + * @param num_inputs Number of actor features feeding the distribution. + * @param num_outputs Number of action dimensions produced by the distribution. + * @param minimum_values Lower action bounds in physical units. + * @param maximum_values Upper action bounds in physical units. + * @param device_type Torch device used by the module. + * @param scalar_type Torch scalar type used by the module. + * @param build_on_construct Whether to build the torch modules right away. + * @param output_scaling_factors Optional extra per-action scaling in physical units. + */ + LibtorchBetaActionDistribution(const std::string & name, + unsigned int num_inputs, + unsigned int num_outputs, + const std::vector & minimum_values, + const std::vector & maximum_values, + torch::DeviceType device_type = torch::kCPU, + torch::ScalarType scalar_type = torch::kDouble, + bool build_on_construct = true, + const std::vector & output_scaling_factors = {}); + + virtual void initialize(c10::optional generator = c10::nullopt) override; + + virtual void reset(const torch::Tensor & input) override; + + virtual torch::Tensor + sample(c10::optional generator = c10::nullopt) const override; + + virtual torch::Tensor deterministicAction() const override; + + virtual torch::Tensor logProbability(const torch::Tensor & action) const override; + + virtual torch::Tensor entropy() const override; + + virtual bool isBounded() const override { return true; } + + /// Return the Beta alpha head. + torch::nn::Linear & alphaModule() { return _alpha_module; } + /// Return the Beta alpha head. + const torch::nn::Linear & alphaModule() const { return _alpha_module; } + /// Return the Beta beta head. + torch::nn::Linear & betaModule() { return _beta_module; } + /// Return the Beta beta head. + const torch::nn::Linear & betaModule() const { return _beta_module; } + /// Return the cached Beta alpha tensor. + const torch::Tensor & alphaTensor() const { return _alpha_tensor; } + /// Return the cached Beta beta tensor. + const torch::Tensor & betaTensor() const { return _beta_tensor; } + +private: + /// Build and register the Beta distribution heads. + void constructDistribution(); + + /// Lower action bounds in physical units. + const std::vector _minimum_values; + /// Upper action bounds in physical units. + const std::vector _maximum_values; + + /// Linear head that produces the Beta alpha parameters. + torch::nn::Linear _alpha_module{nullptr}; + /// Linear head that produces the Beta beta parameters. + torch::nn::Linear _beta_module{nullptr}; + /// Tensor form of the lower action bounds. + torch::Tensor _min_tensor; + /// Tensor form of the upper action bounds. + torch::Tensor _max_tensor; + /// Cached Beta alpha parameters from the latest reset. + torch::Tensor _alpha_tensor; + /// Cached Beta beta parameters from the latest reset. + torch::Tensor _beta_tensor; + /// Cached sum of the alpha and beta parameters. + torch::Tensor _alpha_beta_tensor; + /// Cached log normalization factor for the latest Beta distribution state. + torch::Tensor _log_norm; + /// Cached normalized Beta mean from the latest reset. + torch::Tensor _mean; +}; + +} // namespace Moose + +#endif diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h new file mode 100644 index 000000000000..774cb198b051 --- /dev/null +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h @@ -0,0 +1,210 @@ +//* This file is part of the MOOSE framework +//* https://www.mooseframework.org +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#pragma once + +#include +#include "LibtorchActionDistribution.h" +#include "LibtorchArtificialNeuralNet.h" + +namespace Moose +{ + +/** + * Feed-forward actor network coupled to a Gaussian or Beta action distribution. + */ +class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet +{ +public: + /** + * Build an actor network with either a Gaussian or Beta action distribution. + * @param name Name of the neural network module. + * @param num_inputs Number of actor inputs. + * @param num_outputs Number of action outputs. + * @param num_neurons_per_layer Hidden-layer widths. + * @param activation_function Hidden-layer activation names. + * @param minimum_values Optional lower action bounds. Leave empty for Gaussian actions. + * @param maximum_values Optional upper action bounds. Leave empty for Gaussian actions. + * @param device_type Torch device used by the module. + * @param scalar_type Torch scalar type used by the module. + * @param build_on_construct Whether to build the torch modules right away. + * @param input_shift_factors Optional affine input shifts. + * @param input_scaling_factors Optional affine input scales. + * @param output_scaling_factors Optional per-action scaling in physical units. + * @param state_independent_std Whether the Gaussian std should ignore the current state. + */ + LibtorchActorNeuralNet(const std::string name, + const unsigned int num_inputs, + const unsigned int num_outputs, + const std::vector & num_neurons_per_layer, + const std::vector & activation_function = {"relu"}, + const std::vector & minimum_values = {}, + const std::vector & maximum_values = {}, + const torch::DeviceType device_type = torch::kCPU, + const torch::ScalarType scalar_type = torch::kDouble, + const bool build_on_construct = true, + const std::vector & input_shift_factors = {}, + const std::vector & input_scaling_factors = {}, + const std::vector & output_scaling_factors = {}, + const bool state_independent_std = true); + + /** + * Copy-construct an actor network. + * @param nn Actor network to copy. + * @param build_on_construct Whether to rebuild the module structure during the copy. + */ + LibtorchActorNeuralNet(const Moose::LibtorchActorNeuralNet & nn, + const bool build_on_construct = true); + + virtual torch::Tensor forward(const torch::Tensor & x) override; + + /** + * Evaluate the actor and either sample from it or use its deterministic action. + * @param input Input tensor for the evaluation. + * @param sampled Whether to draw a stochastic sample. + * @param generator Optional random-number generator used for sampling. + */ + virtual torch::Tensor evaluate(torch::Tensor & input, + bool sampled, + c10::optional generator = c10::nullopt); + + /** + * Sample an action from the already-reset distribution. + * @param generator Optional random-number generator used for sampling. + */ + virtual torch::Tensor sample(c10::optional generator = c10::nullopt); + + virtual void constructNeuralNetwork() override; + + /// Return the active action distribution as the common base type. + const LibtorchActionDistribution & actionDistribution() const { return *_action_distribution; } + /// Return the active action distribution as the common base type. + LibtorchActionDistribution & actionDistribution() { return *_action_distribution; } + + /// Return the Gaussian action distribution pointer, or nullptr for bounded actors. + const LibtorchGaussianActionDistribution * gaussianActionDistributionPtr() const; + /// Return the Gaussian action distribution pointer, or nullptr for bounded actors. + LibtorchGaussianActionDistribution * gaussianActionDistributionPtr(); + /// Return the Gaussian action distribution reference. Errors if the actor is bounded. + const LibtorchGaussianActionDistribution & gaussianActionDistribution() const; + /// Return the Gaussian action distribution reference. Errors if the actor is bounded. + LibtorchGaussianActionDistribution & gaussianActionDistribution(); + + /// Return the Beta action distribution pointer, or nullptr for Gaussian actors. + const LibtorchBetaActionDistribution * betaActionDistributionPtr() const; + /// Return the Beta action distribution pointer, or nullptr for Gaussian actors. + LibtorchBetaActionDistribution * betaActionDistributionPtr(); + /// Return the Beta action distribution reference. Errors if the actor is unbounded. + const LibtorchBetaActionDistribution & betaActionDistribution() const; + /// Return the Beta action distribution reference. Errors if the actor is unbounded. + LibtorchBetaActionDistribution & betaActionDistribution(); + + /// Return whether the Gaussian std ignores the current actor features. + bool stateIndependentStd() const { return _state_independent_std; } + /// Return the configured lower action bounds. + const std::vector & minValues() const { return _minimum_values; } + /// Return the configured upper action bounds. + const std::vector & maxValues() const { return _maximum_values; } + + /** + * Refresh the cached distribution parameters from a fresh input tensor. + * @param input Input tensor used to update the distribution. + */ + void resetDistributionParams(torch::Tensor input); + + /** + * Evaluate the log-probability of an action under the current actor state. + * @param other Action tensor in physical units. + */ + torch::Tensor logProbability(const torch::Tensor & other); + + /// Compute the entropy of the current action distribution. + torch::Tensor entropy(); + + virtual void + initializeNeuralNetwork(c10::optional generator = c10::nullopt) override; + +protected: + /// Lower action bounds used by bounded actor distributions. + const std::vector _minimum_values; + /// Upper action bounds used by bounded actor distributions. + const std::vector _maximum_values; + /// Whether the Gaussian std ignores the current actor features. + const bool _state_independent_std; + /// Action-distribution module attached to the actor output. + std::shared_ptr _action_distribution; +}; + +/** + * Dump an actor network into JSON for reporter output and debugging. + * @param json JSON object that receives the serialized state. + * @param network Actor network pointer to serialize. + */ +void to_json(nlohmann::json & json, const Moose::LibtorchActorNeuralNet * const & network); + +/** + * Load an actor checkpoint written as a native libtorch state archive. + * @param nn Actor network that receives the loaded state. + * @param filename Checkpoint file to read. + */ +void loadLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, + const std::string & filename); + +} + +/** + * Serialize the actor-network metadata needed for restart. + * @param stream Stream that receives the serialized data. + * @param nn Actor network shared pointer to serialize. + * @param context Serialization context passed through MOOSE data I/O. + */ +template <> +void dataStore(std::ostream & stream, + std::shared_ptr & nn, + void * context); + +/** + * Deserialize the actor-network metadata needed for restart. + * @param stream Stream that provides the serialized data. + * @param nn Actor network shared pointer to populate. + * @param context Serialization context passed through MOOSE data I/O. + */ +template <> +void dataLoad(std::istream & stream, + std::shared_ptr & nn, + void * context); + +// This is needed because the reporter which is used to output the neural net parameters to JSON +// requires a dataStore/dataLoad. However, these functions will be empty due to the fact that +// we are only interested in the JSON output and we don't want to output everything +/** + * Placeholder serializer for reporter-only actor pointers. + * @param stream Stream that would receive the serialized data. + * @param nn Reporter actor pointer. + * @param context Serialization context passed through MOOSE data I/O. + */ +template <> +void dataStore(std::ostream & stream, + Moose::LibtorchActorNeuralNet const *& nn, + void * context); + +/** + * Placeholder deserializer for reporter-only actor pointers. + * @param stream Stream that would provide the serialized data. + * @param nn Reporter actor pointer. + * @param context Serialization context passed through MOOSE data I/O. + */ +template <> +void dataLoad(std::istream & stream, + Moose::LibtorchActorNeuralNet const *& nn, + void * context); + +#endif diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h new file mode 100644 index 000000000000..8ea0ba65a1e0 --- /dev/null +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h @@ -0,0 +1,75 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#pragma once + +#include "LibtorchRLTrajectoryBuffer.h" + +#include + +#include +#include + +struct LibtorchRLMiniBatch +{ + /// Flattened observation rows for the mini-batch. + torch::Tensor observations; + /// Action rows that match the sampled observations. + torch::Tensor actions; + /// Behavior-policy log probabilities for the sampled actions. + torch::Tensor old_log_probabilities; + /// Critic targets aligned with the sampled observations. + torch::Tensor value_targets; + /// Advantage estimates aligned with the sampled observations. + torch::Tensor advantages; + + /// Return the number of rows stored in the mini-batch. + std::int64_t size() const { return observations.defined() ? observations.size(0) : 0; } +}; + +/** + * Samples shuffled mini-batches from a flattened on-policy trajectory batch. + */ +class LibtorchRLMiniBatchSampler +{ +public: + /** + * Shuffle a flattened rollout batch into PPO-sized chunks. + * @param batch Flattened rollout tensors ready for PPO updates. + * @param batch_size Preferred number of rows per mini-batch. + * @param standardize_advantage Whether to normalize the advantages inside each mini-batch. + * @param generator Optional random-number generator used for the row permutation. + */ + std::vector + sample(const LibtorchRLTrajectoryBuffer::TensorBatch & batch, + unsigned int batch_size, + bool standardize_advantage, + c10::optional generator = c10::nullopt) const; + +private: + /** + * Sanity-check that the flattened rollout tensors all line up. + * @param batch Flattened rollout tensors to validate. + */ + static void validateBatch(const LibtorchRLTrajectoryBuffer::TensorBatch & batch); + + /** + * Slice one shuffled mini-batch out of the flattened rollout tensors. + * @param batch Flattened rollout tensors. + * @param indices Row indices assigned to this mini-batch. + * @param standardize_advantage Whether to normalize the advantages in this slice. + */ + static LibtorchRLMiniBatch makeMiniBatch(const LibtorchRLTrajectoryBuffer::TensorBatch & batch, + const torch::Tensor & indices, + bool standardize_advantage); +}; + +#endif diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLPPOLoss.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLPPOLoss.h new file mode 100644 index 000000000000..8e526138bfc8 --- /dev/null +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLPPOLoss.h @@ -0,0 +1,64 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#pragma once + +#include "LibtorchActorNeuralNet.h" +#include "LibtorchArtificialNeuralNet.h" +#include "LibtorchRLMiniBatchSampler.h" + +struct LibtorchRLPPOLossOutput +{ + /// Clipped actor loss for the current mini-batch. + torch::Tensor actor_loss; + /// Critic regression loss for the current mini-batch. + torch::Tensor critic_loss; + /// Mean action-distribution entropy for the current mini-batch. + torch::Tensor entropy; +}; + +/** + * PPO clipped surrogate loss on top of the reusable RL buffer/value-estimation core. + */ +class LibtorchRLPPOLoss +{ +public: + /** + * Build the PPO loss helper. + * @param clip_parameter PPO clipping width. + * @param entropy_coeff Weight applied to the entropy bonus. + */ + LibtorchRLPPOLoss(Real clip_parameter, Real entropy_coeff); + + /** + * Compute actor, critic, and entropy terms for one mini-batch. + * @param policy_network Actor network used for the policy term. + * @param value_network Critic network used for the value term. + * @param batch Mini-batch pulled from the on-policy trajectory buffer. + */ + LibtorchRLPPOLossOutput compute(Moose::LibtorchActorNeuralNet & policy_network, + Moose::LibtorchArtificialNeuralNet & value_network, + const LibtorchRLMiniBatch & batch) const; + +private: + /** + * Collapse multi-action log-probabilities or entropies into one column tensor. + * @param tensor Action-wise tensor to reduce. + */ + static torch::Tensor reduceActionDimension(const torch::Tensor & tensor); + + /// PPO clipping width used in the actor surrogate objective. + const Real _clip_parameter; + /// Weight applied to the entropy bonus inside the actor loss. + const Real _entropy_coeff; +}; + +#endif diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLTrajectoryBuffer.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLTrajectoryBuffer.h new file mode 100644 index 000000000000..ffd8e88fa7cf --- /dev/null +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLTrajectoryBuffer.h @@ -0,0 +1,104 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#pragma once + +#include "MooseTypes.h" + +#include + +#include +#include + +/** + * On-policy trajectory storage for fixed-horizon RL training. + */ +class LibtorchRLTrajectoryBuffer +{ +public: + struct Trajectory + { + /// Observations for each transition. + std::vector> observations; + /// Next observations for each transition. + std::vector> next_observations; + /// Actions applied at each transition. + std::vector> actions; + /// Action log-probabilities recorded during rollout. + std::vector> log_probabilities; + /// Scalar rewards for each transition. + std::vector rewards; + /// Critic targets aligned with each transition. + std::vector value_targets; + /// Advantage estimates aligned with each transition. + std::vector advantages; + }; + + struct TensorBatch + { + /// Flattened observation matrix. + torch::Tensor observations; + /// Flattened next-observation matrix. + torch::Tensor next_observations; + /// Flattened action matrix. + torch::Tensor actions; + /// Flattened action log-probabilities. + torch::Tensor log_probabilities; + /// Flattened rewards. + torch::Tensor rewards; + /// Flattened critic targets. + torch::Tensor value_targets; + /// Flattened advantages. + torch::Tensor advantages; + + /// Return the number of transitions represented by the batch. + std::int64_t size() const { return observations.defined() ? observations.size(0) : 0; } + }; + + /** + * Append one trajectory to the on-policy buffer. + * @param trajectory Trajectory to store. + */ + void addTrajectory(Trajectory trajectory); + + /// Clear every stored trajectory. + void clear(); + + bool empty() const { return _trajectories.empty(); } + + std::size_t numTrajectories() const { return _trajectories.size(); } + + /** + * Count the total number of transitions stored across every trajectory. + * @return Total transition count. + */ + std::size_t numTransitions() const; + + std::vector & trajectories() { return _trajectories; } + const std::vector & trajectories() const { return _trajectories; } + + /** + * Flatten every stored trajectory into one tensor batch. + * @return Tensor batch ready for mini-batch sampling. + */ + TensorBatch flatten() const; + +private: + /** + * Validate a trajectory before it is stored. + * @param trajectory Trajectory to validate. + */ + static void validateTrajectory(const Trajectory & trajectory); + + std::vector _trajectories; +}; + +#endif diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLValueEstimator.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLValueEstimator.h new file mode 100644 index 000000000000..020683b46ba0 --- /dev/null +++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLValueEstimator.h @@ -0,0 +1,73 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#pragma once + +#include "LibtorchArtificialNeuralNet.h" +#include "LibtorchRLTrajectoryBuffer.h" + +#include + +/** + * Computes generalized-advantage estimates and value targets for an on-policy trajectory buffer, + * following Schulman et al., "High-Dimensional Continuous Control Using Generalized Advantage + * Estimation." + */ +class LibtorchRLValueEstimator +{ +public: + struct Targets + { + /// Generalized-advantage estimates. + std::vector advantages; + /// Critic regression targets. + std::vector value_targets; + }; + + /** + * Build the GAE helper. + * @param discount_factor Reward discount factor. + * @param lambda_factor GAE lambda factor. + */ + LibtorchRLValueEstimator(Real discount_factor, Real lambda_factor); + + /** + * Fill every trajectory in the buffer with value targets and advantages. + * @param buffer On-policy trajectory buffer to update. + * @param value_network Critic used for target estimation. + */ + void computeValueTargets(LibtorchRLTrajectoryBuffer & buffer, + Moose::LibtorchArtificialNeuralNet & value_network) const; + + /** + * Compute value targets and advantages for one trajectory. + * @param trajectory Trajectory to evaluate. + * @param value_network Critic used for target estimation. + */ + Targets estimate(const LibtorchRLTrajectoryBuffer::Trajectory & trajectory, + Moose::LibtorchArtificialNeuralNet & value_network) const; + +private: + /** + * Evaluate the critic on a batch of observations. + * @param observations Observation matrix to feed through the critic. + * @param value_network Critic used for the evaluation. + */ + std::vector evaluate(const std::vector> & observations, + Moose::LibtorchArtificialNeuralNet & value_network) const; + + /// Reward discount factor used in the temporal-difference recursion. + const Real _discount_factor; + /// GAE lambda factor used in the reverse-time advantage recursion. + const Real _lambda_factor; +}; + +#endif diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C index 6d9c313b6a12..4bd5f2ef6441 100644 --- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C +++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C @@ -10,9 +10,11 @@ #ifdef MOOSE_LIBTORCH_ENABLED #include "LibtorchDRLControl.h" -#include "TorchScriptModule.h" -#include "LibtorchArtificialNeuralNet.h" +#include "LibtorchRandomUtils.h" #include "Transient.h" +#include "LibtorchUtils.h" + +#include registerMooseObject("StochasticToolsApp", LibtorchDRLControl); @@ -23,93 +25,237 @@ LibtorchDRLControl::validParams() params.addClassDescription( "Sets the value of multiple 'Real' input parameters and postprocessors based on a Deep " "Reinforcement Learning (DRL) neural network trained using a PPO algorithm."); - params.addRequiredParam>( - "action_standard_deviations", "Standard deviation value used while sampling the actions."); + params.set("execute_on") = EXEC_TIMESTEP_BEGIN; + params.suppressParameter("torch_script_format"); + params.addParam("seed", "Seed for the random number generator."); + params.addRangeCheckedParam( + "num_steps_in_period", + 1, + "1<=num_steps_in_period", + "Number of controller executions between policy evaluations."); + params.addParam( + "smoother", 1.0, "Relaxation factor applied when smoothing control updates."); + + params.addParam( + "stochastic", + true, + "If true, sample from the policy distribution; otherwise use the deterministic action."); + + params.addParam>( + "min_control_value", {}, "Optional lower bounds for each control signal."); + params.addParam>( + "max_control_value", {}, "Optional upper bounds for each control signal."); + params.addParam( + "state_independent_std", + true, + "If true, interpret the unbounded Gaussian actor as learning one log-std per action " + "dimension. If false, use a state-dependent std head."); + return params; } LibtorchDRLControl::LibtorchDRLControl(const InputParameters & parameters) : LibtorchNeuralNetControl(parameters), - _current_control_signal_log_probabilities(std::vector(_control_names.size(), 0.0)), - _action_std(getParam>("action_standard_deviations")) + _current_control_signal_log_probabilities(declareRestartableData>( + "current_control_signal_log_probabilities", std::vector(_control_names.size(), 0.0))), + _previous_control_signal(declareRestartableData>( + "previous_control_signal", std::vector(_control_names.size(), 0.0))), + _current_smoothed_signal(declareRestartableData>( + "current_smoothed_signal", std::vector(_control_names.size(), 0.0))), + _policy_generator(Moose::makeLibtorchCPUGenerator()), + _policy_generator_state(declareRestartableData>( + "policy_generator_state", std::vector())), + _executions_until_next_policy_evaluation( + declareRestartableData("executions_until_next_policy_evaluation", 0)), + _num_steps_in_period(getParam("num_steps_in_period")), + _smoother(getParam("smoother")), + _stochastic(getParam("stochastic")) { - if (_control_names.size() != _action_std.size()) - paramError("action_standard_deviations", - "Number of action_standard_deviations does not match the number of controlled " - "parameters."); + const auto & execute_on = getParam("execute_on"); + if (execute_on.size() != 1 || !execute_on.contains(EXEC_TIMESTEP_BEGIN)) + paramError("execute_on", "LibtorchDRLControl only supports 'TIMESTEP_BEGIN' for 'execute_on'."); - // Fixing the RNG seed to make sure every experiment is the same. if (isParamValid("seed")) - torch::manual_seed(getParam("seed")); + setPolicySampleSeed(getParam("seed")); + + savePolicyGeneratorState(); +} - // We convert and store the user-supplied standard deviations into a tensor which can be easily - // used by routines in libtorch - _std = torch::eye(_control_names.size()); - for (unsigned int i = 0; i < _control_names.size(); ++i) - _std[i][i] = _action_std[i]; +void +LibtorchDRLControl::initialSetup() +{ + LibtorchNeuralNetControl::initialSetup(); + restorePolicyGeneratorState(); + savePolicyGeneratorState(); +} + +void +LibtorchDRLControl::loadControlNeuralNetFromFile() +{ + const auto & filename = getParam("filename"); + unsigned int num_inputs = _observation_names.size() * _input_timesteps; + unsigned int num_outputs = _control_names.size(); + std::vector num_neurons_per_layer = + getParam>("num_neurons_per_layer"); + std::vector activation_functions = + isParamSetByUser("activation_function") + ? getParam>("activation_function") + : std::vector({"relu"}); + + const std::vector & minimum_values = getParam>("min_control_value"); + const std::vector & maximum_values = getParam>("max_control_value"); + const auto input_shift_factors = + _observation_history.expandObservationFactors(_observation_shift_factors); + const auto input_scaling_factors = + _observation_history.expandObservationFactors(_observation_scaling_factors); + + _actor_nn = + std::make_shared(filename, + num_inputs, + num_outputs, + num_neurons_per_layer, + activation_functions, + minimum_values, + maximum_values, + torch::kCPU, + torch::kDouble, + true, + input_shift_factors, + input_scaling_factors, + _action_scaling_factors, + getParam("state_independent_std")); + + Moose::loadLibtorchActorNeuralNetState(*_actor_nn, filename); + _nn = _actor_nn; } void LibtorchDRLControl::execute() { - if (_nn) + if (!_actor_nn) { - unsigned int n_controls = _control_names.size(); - unsigned int num_old_timesteps = _input_timesteps - 1; + mooseAssert(!_nn, "LibtorchDRLControl should not store a non-actor controller network."); + return; + } - // Fill a vector with the current values of the responses - updateCurrentResponse(); + mooseAssert(_current_execute_flag == EXEC_TIMESTEP_BEGIN, + "LibtorchDRLControl should only execute on TIMESTEP_BEGIN."); - // If this is the first time this control is called and we need to use older values, fill up the - // needed old values using the initial values - if (_old_responses.empty()) - _old_responses.assign(num_old_timesteps, _current_response); + const unsigned int n_controls = _control_names.size(); + const bool first_control_execution = _old_observations.empty(); - // Organize the old an current solution into a tensor so we can evaluate the neural net - torch::Tensor input_tensor = prepareInputTensor(); + // Fill a vector with the current observation values. + updateCurrentObservation(); - // Evaluate the neural network to get the expected control value - torch::Tensor output_tensor = _nn->forward(input_tensor); + // Seed the observation history with the initial observation when the control first runs. + if (first_control_execution) + _observation_history.initializeHistory(_current_observation, _old_observations); - // Sample control value (action) from Gaussian distribution - torch::Tensor action = at::normal(output_tensor, _std); + if (shouldEvaluatePolicy()) + { + torch::Tensor input_tensor = prepareInputTensor(); + torch::Tensor action = _actor_nn->evaluate(input_tensor, _stochastic, _policy_generator); + savePolicyGeneratorState(); - // Compute log probability - torch::Tensor log_probability = computeLogProbability(action, output_tensor); + if (_stochastic) + { + torch::Tensor log_probability = _actor_nn->logProbability(action); + _current_control_signal_log_probabilities = {log_probability.data_ptr(), + log_probability.data_ptr() + + log_probability.size(1)}; + } + else + _current_control_signal_log_probabilities.assign(n_controls, 0.0); - // Convert data _current_control_signals = {action.data_ptr(), action.data_ptr() + action.size(1)}; - _current_control_signal_log_probabilities = {log_probability.data_ptr(), - log_probability.data_ptr() + - log_probability.size(1)}; + if (first_control_execution) + _current_smoothed_signal = _current_control_signals; + } - for (unsigned int control_i = 0; control_i < n_controls; ++control_i) - { - // We scale the controllable value for physically meaningful control action - setControllableValueByName(_control_names[control_i], - _current_control_signals[control_i] * - _action_scaling_factors[control_i]); - } + _previous_control_signal = _current_smoothed_signal; + + for (const auto i : index_range(_current_smoothed_signal)) + _current_smoothed_signal[i] = + _previous_control_signal[i] + + _smoother * (_current_control_signals[i] - _previous_control_signal[i]); + + for (unsigned int control_i = 0; control_i < n_controls; ++control_i) + setControllableValueByName(_control_names[control_i], + _current_smoothed_signal[control_i]); - // We add the curent solution to the old solutions and move everything in there one step - // backward - std::rotate(_old_responses.rbegin(), _old_responses.rbegin() + 1, _old_responses.rend()); - _old_responses[0] = _current_response; + _observation_history.advanceHistory(_current_observation, _old_observations); +} + +void +LibtorchDRLControl::loadControlNeuralNet(const Moose::LibtorchActorNeuralNet & input_nn) +{ + _actor_nn = std::make_shared(input_nn); + _nn = _actor_nn; +} + +void +LibtorchDRLControl::loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn) +{ + const auto * check = dynamic_cast(&input_nn); + if (!check) + mooseError("This needs to be a LibtorchActorNeuralNet!"); + loadControlNeuralNet(*check); +} + +void +LibtorchDRLControl::setPolicySampleSeed(const uint64_t seed) +{ + { + std::lock_guard lock(_policy_generator.mutex()); + _policy_generator.set_current_seed(seed); } + savePolicyGeneratorState(); } -torch::Tensor -LibtorchDRLControl::computeLogProbability(const torch::Tensor & action, - const torch::Tensor & output_tensor) +bool +LibtorchDRLControl::shouldEvaluatePolicy() { - // Logarithmic probability of taken action, given the current distribution. - torch::Tensor var = torch::matmul(_std, _std); + if (_executions_until_next_policy_evaluation == 0) + { + _executions_until_next_policy_evaluation = _num_steps_in_period - 1; + return true; + } + + --_executions_until_next_policy_evaluation; + return false; +} + +void +LibtorchDRLControl::restorePolicyGeneratorState() +{ + if (!_stochastic || _policy_generator_state.empty()) + return; + + auto state_tensor = + torch::from_blob(_policy_generator_state.data(), + {static_cast(_policy_generator_state.size())}, + torch::TensorOptions().dtype(torch::kUInt8).device(torch::kCPU)) + .clone(); + std::lock_guard lock(_policy_generator.mutex()); + _policy_generator.set_state(state_tensor); +} + +void +LibtorchDRLControl::savePolicyGeneratorState() +{ + if (!_stochastic) + { + _policy_generator_state.clear(); + return; + } - return -((action - output_tensor) * (action - output_tensor)) / (2.0 * var) - torch::log(_std) - - std::log(std::sqrt(2.0 * M_PI)); + std::lock_guard lock(_policy_generator.mutex()); + const auto state_tensor = _policy_generator.get_state().contiguous(); + const auto * data = state_tensor.data_ptr(); + _policy_generator_state.assign(data, data + static_cast(state_tensor.numel())); } Real diff --git a/modules/stochastic_tools/src/libtorch/reporters/DRLRewardReporter.C b/modules/stochastic_tools/src/libtorch/reporters/DRLRewardReporter.C index acfef49acdc2..c91caf5736c9 100644 --- a/modules/stochastic_tools/src/libtorch/reporters/DRLRewardReporter.C +++ b/modules/stochastic_tools/src/libtorch/reporters/DRLRewardReporter.C @@ -21,7 +21,7 @@ DRLRewardReporter::validParams() params.addClassDescription("Reporter containing the reward values of a DRL controller trainer."); params.addRequiredParam( - "drl_trainer_name", "The name of the RDL controller trainer which computes the rewards."); + "drl_trainer_name", "The name of the DRL controller trainer which computes the rewards."); return params; } @@ -29,7 +29,12 @@ DRLRewardReporter::validParams() DRLRewardReporter::DRLRewardReporter(const InputParameters & parameters) : GeneralReporter(parameters), SurrogateModelInterface(this), - _reward(declareValueByName("average_reward", REPORTER_MODE_ROOT)), + _average_reward(declareValueByName("average_reward", REPORTER_MODE_ROOT)), + _std_reward(declareValueByName("std_reward", REPORTER_MODE_ROOT)), + _sample_average_reward( + declareValueByName>("sample_average_reward", REPORTER_MODE_ROOT)), + _sample_std_reward( + declareValueByName>("sample_std_reward", REPORTER_MODE_ROOT)), _trainer(getSurrogateTrainer("drl_trainer_name")) { } @@ -37,7 +42,10 @@ DRLRewardReporter::DRLRewardReporter(const InputParameters & parameters) void DRLRewardReporter::execute() { - _reward = _trainer.averageEpisodeReward(); + _average_reward = _trainer.averageEpisodeReward(); + _std_reward = _trainer.stdEpisodeReward(); + _sample_average_reward = _trainer.sampleAverageEpsiodeRewards(); + _sample_std_reward = _trainer.sampleStdEpsiodeRewards(); } #endif diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchANNTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchANNTrainer.C index e9951ed7277a..c2818b3570eb 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchANNTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchANNTrainer.C @@ -128,7 +128,7 @@ LibtorchANNTrainer::postTrain() if (_read_from_file) try { - torch::load(_nn, _nn_filename); + Moose::loadLibtorchArtificialNeuralNetState(*_nn, _nn_filename); _console << "Loaded requested .pt file." << std::endl; } catch (const c10::Error & e) diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C index c7461db9baa9..d24799c3a08a 100644 --- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C +++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C @@ -9,12 +9,11 @@ #ifdef MOOSE_LIBTORCH_ENABLED -#include "LibtorchDataset.h" -#include "LibtorchArtificialNeuralNetTrainer.h" -#include "LibtorchUtils.h" #include "LibtorchDRLControlTrainer.h" -#include "Sampler.h" -#include "Function.h" +#include "LibtorchRandomUtils.h" + +#include +#include registerMooseObject("StochasticToolsApp", LibtorchDRLControlTrainer); @@ -24,22 +23,27 @@ LibtorchDRLControlTrainer::validParams() InputParameters params = SurrogateTrainerBase::validParams(); params.addClassDescription( - "Trains a neural network controller using the Proximal Policy Optimization (PPO) algorithm."); + "Trains a neural network controller using fixed-horizon PPO on top of the libtorch RL core."); params.addRequiredParam>( - "response", "Reporter values containing the response values from the model."); + "observation", "Reporter values containing the observation values from the model."); params.addParam>( - "response_shift_factors", - "A shift constant which will be used to shift the response values. This is used for the " - "manipulation of the neural net inputs for better training efficiency."); + "observation_shift_factors", + {}, + "Optional offsets applied to the observed state values before scaling."); params.addParam>( - "response_scaling_factors", - "A normalization constant which will be used to divide the response values. This is used for " - "the manipulation of the neural net inputs for better training efficiency."); + "observation_scaling_factors", + {}, + "Optional multipliers applied after shifting the observed state values."); params.addRequiredParam>( "control", "Reporters containing the values of the controlled quantities (control signals) from the " "model simulations."); + params.addParam>( + "action_scaling_factors", + {}, + "Scale factors embedded into the trained policy outputs so transferred and checkpointed " + "controllers operate in physical units."); params.addRequiredParam>( "log_probability", "Reporters containing the log probabilities of the actions taken during the simulations."); @@ -51,48 +55,36 @@ LibtorchDRLControlTrainer::validParams() "1<=input_timesteps", "Number of time steps to use in the input data, if larger than 1, " "data from the previous timesteps will be used as inputs in the training."); - params.addParam("skip_num_rows", - 1, - "Number of rows to ignore from training. We usually skip the 1st " - "row from the reporter since it contains only initial values."); params.addRequiredParam("num_epochs", "Number of epochs for the training."); - params.addRequiredRangeCheckedParam( - "critic_learning_rate", - "0>( - "num_critic_neurons_per_layer", "Number of neurons per layer in the emulator neural net."); + params.addRequiredRangeCheckedParam("critic_learning_rate", + "0>("num_critic_neurons_per_layer", + "Hidden-layer widths for the critic network."); params.addParam>( "critic_activation_functions", std::vector({"relu"}), - "The type of activation functions to use in the emulator neural net. It is either one value " - "or one value per hidden layer."); + "Activation name for each critic hidden layer, or one shared value for all layers."); - params.addRequiredRangeCheckedParam( - "control_learning_rate", - "0("control_learning_rate", + "0>( "num_control_neurons_per_layer", "Number of neurons per layer for the control neural network."); params.addParam>( "control_activation_functions", std::vector({"relu"}), - "The type of activation functions to use in the control neural net. It " - "is either one value " - "or one value per hidden layer."); + "Activation name for each actor hidden layer, or one shared value for all layers."); params.addParam("filename_base", - "Filename used to output the neural net parameters."); + "Base filename used when writing actor and critic checkpoints."); params.addParam( "seed", 11, "Random number generator seed for stochastic optimizers."); - params.addRequiredParam>( - "action_standard_deviations", "Standard deviation value used while sampling the actions."); - params.addParam( "clip_parameter", 0.2, "Clip parameter used while clamping the advantage value."); params.addRangeCheckedParam( @@ -105,45 +97,61 @@ LibtorchDRLControlTrainer::validParams() "decay_factor", 1.0, "0.0<=decay_factor<=1.0", - "Decay factor for calculating the return. This accounts for decreased " - "reward values from the later steps."); + "Discount factor used when building PPO return and GAE targets."); + + params.addRangeCheckedParam("lambda_factor", 1.0, "0.0<=lambda_factor<=1.0", "GAE lambda."); params.addParam( "read_from_file", false, "Switch to read the neural network parameters from a file."); params.addParam( "shift_outputs", true, - "If we would like to shift the outputs the realign the input-output pairs."); + "Whether to shift rollout outputs so observations and actions line up in time."); params.addParam( "standardize_advantage", true, "Switch to enable the shifting and normalization of the advantages in the PPO algorithm."); - params.addParam("loss_print_frequency", - 0, - "The frequency which is used to print the loss values. If 0, the " - "loss values are not printed."); + params.addParam( + "loss_print_frequency", 0, "Print PPO loss values every N updates. Use 0 to stay quiet."); + params.addParam("batch_size", 100, "Number of flattened samples per mini-batch."); + params.addParam>( + "min_control_value", {}, "Optional lower bounds for each control signal."); + params.addParam>( + "max_control_value", {}, "Optional upper bounds for each control signal."); + params.addParam( + "state_independent_std", + true, + "If true, learn one Gaussian log-std parameter per action dimension. If false, learn a " + "state-dependent std head as in the older MOOSE actor implementation."); + + params.addParam( + "entropy_coeff", 0.01, "Entropy bonus coefficient used in the PPO actor loss."); + + params.addParam( + "timestep_window", 1, "Use every nth reporter timestep when assembling trajectories."); + return params; } LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & parameters) : SurrogateTrainerBase(parameters), - _response_names(getParam>("response")), - _response_shift_factors(isParamValid("response_shift_factors") - ? getParam>("response_shift_factors") - : std::vector(_response_names.size(), 0.0)), - _response_scaling_factors(isParamValid("response_scaling_factors") - ? getParam>("response_scaling_factors") - : std::vector(_response_names.size(), 1.0)), - _control_names(getParam>("control")), + _state_names(getParam>("observation")), + _state_shift_factors(isParamSetByUser("observation_shift_factors") + ? getParam>("observation_shift_factors") + : std::vector(_state_names.size(), 0.0)), + _state_scaling_factors(isParamSetByUser("observation_scaling_factors") + ? getParam>("observation_scaling_factors") + : std::vector(_state_names.size(), 1.0)), + _action_names(getParam>("control")), + _action_scaling_factors(isParamSetByUser("action_scaling_factors") + ? getParam>("action_scaling_factors") + : std::vector(_action_names.size(), 1.0)), _log_probability_names(getParam>("log_probability")), _reward_name(getParam("reward")), - _reward_value_pointer(&getReporterValueByName>(_reward_name)), + _reward_value_pointer(&getReporterValueByName>>(_reward_name)), _input_timesteps(getParam("input_timesteps")), - _num_inputs(_input_timesteps * _response_names.size()), - _num_outputs(_control_names.size()), - _input_data(std::vector>(_num_inputs)), - _output_data(std::vector>(_num_outputs)), - _log_probability_data(std::vector>(_num_outputs)), + _num_inputs(_input_timesteps * _state_names.size()), + _num_outputs(_action_names.size()), _num_epochs(getParam("num_epochs")), _num_critic_neurons_per_layer( getParam>("num_critic_neurons_per_layer")), @@ -154,60 +162,76 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _update_frequency(getParam("update_frequency")), _clip_param(getParam("clip_parameter")), _decay_factor(getParam("decay_factor")), - _action_std(getParam>("action_standard_deviations")), + _lambda_factor(getParam("lambda_factor")), _filename_base(isParamValid("filename_base") ? getParam("filename_base") : ""), _read_from_file(getParam("read_from_file")), _shift_outputs(getParam("shift_outputs")), + _average_episode_reward(0.0), _standardize_advantage(getParam("standardize_advantage")), _loss_print_frequency(getParam("loss_print_frequency")), - _update_counter(_update_frequency) + _seed(getParam("seed")), + _min_values(getParam>("min_control_value")), + _max_values(getParam>("max_control_value")), + _highest_reward(-1e8), + _entropy_coeff(getParam("entropy_coeff")), + _update_counter(_update_frequency), + _timestep_window(getParam("timestep_window")), + _observation_history(_input_timesteps), + _value_estimator(_decay_factor, _lambda_factor), + _ppo_loss(_clip_param, _entropy_coeff) { - if (_response_names.size() != _response_shift_factors.size()) - paramError("response_shift_factors", - "The number of shift factors is not the same as the number of responses!"); + if (_state_names.size() != _state_shift_factors.size()) + paramError("observation_shift_factors", + "The number of shift factors is not the same as the number of observations!"); - if (_response_names.size() != _response_scaling_factors.size()) - paramError( - "response_scaling_factors", - "The number of normalization coefficients is not the same as the number of responses!"); + if (_state_names.size() != _state_scaling_factors.size()) + paramError("observation_scaling_factors", + "The number of normalization coefficients is not the same as the number of " + "observations!"); - // We establish the links with the chosen reporters - getReporterPointers(_response_names, _response_value_pointers); - getReporterPointers(_control_names, _control_value_pointers); - getReporterPointers(_log_probability_names, _log_probability_value_pointers); + if (_action_names.size() != _log_probability_names.size()) + paramError("log_probability", + "The number of log-probability reporters must match the number of control " + "reporters."); - // Fixing the RNG seed to make sure every experiment is the same. - // Otherwise sampling / stochastic gradient descent would be different. - torch::manual_seed(getParam("seed")); + if (_action_names.size() != _action_scaling_factors.size()) + paramError("action_scaling_factors", + "The number of action scaling factors must match the number of control " + "reporters."); - // Convert the user input standard deviations to a diagonal tensor - _std = torch::eye(_control_names.size()); - for (unsigned int i = 0; i < _control_names.size(); ++i) - _std[i][i] = _action_std[i]; + // We establish the links with the chosen reporters + getReporterPointers(_state_names, _state_value_pointers); + getReporterPointers(_action_names, _action_value_pointers); + getReporterPointers(_log_probability_names, _log_probability_value_pointers); bool filename_valid = isParamValid("filename_base"); + const auto input_shift_factors = + _observation_history.expandObservationFactors(_state_shift_factors); + const auto input_scaling_factors = + _observation_history.expandObservationFactors(_state_scaling_factors); // Initializing the control neural net so that the control can grab it right away - _control_nn = std::make_shared( + _control_nn = std::make_shared( filename_valid ? _filename_base + "_control.net" : "control.net", _num_inputs, _num_outputs, _num_control_neurons_per_layer, - getParam>("control_activation_functions")); + getParam>("control_activation_functions"), + _min_values, + _max_values, + torch::kCPU, + torch::kDouble, + true, + input_shift_factors, + input_scaling_factors, + _action_scaling_factors, + getParam("state_independent_std")); // We read parameters for the control neural net if it is requested if (_read_from_file) { - try - { - torch::load(_control_nn, _control_nn->name()); - _console << "Loaded requested .pt file." << std::endl; - } - catch (const c10::Error & e) - { - mooseError("The requested pytorch file could not be loaded for the control neural net.\n", - e.msg()); - } + Moose::loadLibtorchActorNeuralNetState(*_control_nn, _control_nn->name()); + _console << "Loaded requested .pt file." << std::endl; } else if (filename_valid) torch::save(_control_nn, _control_nn->name()); @@ -218,14 +242,24 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par _num_inputs, 1, _num_critic_neurons_per_layer, - getParam>("critic_activation_functions")); + getParam>("critic_activation_functions"), + torch::kCPU, + torch::kDouble, + true, + input_shift_factors, + input_scaling_factors); + + _actor_optimizer = std::make_unique( + _control_nn->parameters(), torch::optim::AdamOptions(_control_learning_rate)); + _critic_optimizer = std::make_unique( + _critic_nn->parameters(), torch::optim::AdamOptions(_critic_learning_rate)); // We read parameters for the critic neural net if it is requested if (_read_from_file) { try { - torch::load(_critic_nn, _critic_nn->name()); + Moose::loadLibtorchArtificialNeuralNetState(*_critic_nn, _critic_nn->name()); _console << "Loaded requested .pt file." << std::endl; } catch (const c10::Error & e) @@ -236,131 +270,146 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par } else if (filename_valid) torch::save(_critic_nn, _critic_nn->name()); + + _control_nn->initializeNeuralNetwork( + Moose::makeLibtorchCPUGenerator(static_cast(_seed))); + _critic_nn->initializeNeuralNetwork( + Moose::makeLibtorchCPUGenerator(static_cast(_seed) + 1)); } void LibtorchDRLControlTrainer::execute() { - // Extract data from the reporters - getInputDataFromReporter(_input_data, _response_value_pointers, _input_timesteps); - getOutputDataFromReporter(_output_data, _control_value_pointers); - getOutputDataFromReporter(_log_probability_data, _log_probability_value_pointers); - getRewardDataFromReporter(_reward_data, _reward_value_pointer); - - // Calculate return from the reward (discounting the reward) - computeRewardToGo(); + collectTrajectoriesFromReporters(); _update_counter--; - // Only update the NNs when - if (_update_counter == 0) - { - // We compute the average reward first - computeAverageEpisodeReward(); + if (_update_counter != 0 || _trajectory_buffer.empty()) + return; - // Transform input/output/return data to torch::Tensor - convertDataToTensor(_input_data, _input_tensor); - convertDataToTensor(_output_data, _output_tensor); - convertDataToTensor(_log_probability_data, _log_probability_tensor); + computeEpisodeRewardStatistics(); - // Discard (detach) the gradient info for return data - LibtorchUtils::vectorToTensor(_return_data, _return_tensor, true); + if (_average_episode_reward > _highest_reward) + { + torch::save(_control_nn, _control_nn->name() + "_best"); + _highest_reward = _average_episode_reward; + } - // We train the controller using the emulator to get a good control strategy - trainController(); + _value_estimator.computeValueTargets(_trajectory_buffer, *_critic_nn); + const auto batch = _trajectory_buffer.flatten(); - // We clean the training data after contoller update and reset the counter - resetData(); - } + trainController(batch); + resetData(); } void -LibtorchDRLControlTrainer::computeAverageEpisodeReward() +LibtorchDRLControlTrainer::computeEpisodeRewardStatistics() { - if (_reward_data.size()) - _average_episode_reward = - std::accumulate(_reward_data.begin(), _reward_data.end(), 0.0) / _reward_data.size(); - else + if (_trajectory_buffer.empty()) + { _average_episode_reward = 0.0; -} + _std_episode_reward = 0.0; + _sample_average_episode_reward.clear(); + _sample_std_episode_reward.clear(); + return; + } -void -LibtorchDRLControlTrainer::computeRewardToGo() -{ - // Get reward data from one simulation - std::vector reward_data_per_sim; - std::vector return_data_per_sim; - getRewardDataFromReporter(reward_data_per_sim, _reward_value_pointer); - - // Discount the reward to get the return value, we need this to be able to anticipate - // rewards based on the current behavior. - Real discounted_reward(0.0); - for (int i = reward_data_per_sim.size() - 1; i >= 0; --i) + _average_episode_reward = 0.0; + _std_episode_reward = 0.0; + unsigned int combined_sizes = 0; + + _sample_average_episode_reward.clear(); + _sample_std_episode_reward.clear(); + + for (const auto & trajectory : _trajectory_buffer.trajectories()) { - discounted_reward = reward_data_per_sim[i] + discounted_reward * _decay_factor; + const auto & sample = trajectory.rewards; + const unsigned int sample_size = sample.size(); + if (!sample_size) + continue; + + const Real sum = std::accumulate(sample.begin(), sample.end(), 0.0); + const Real mean = sum / sample_size; + _sample_average_episode_reward.push_back(mean); + + const Real variance = + std::transform_reduce(sample.begin(), + sample.end(), + 0.0, + std::plus<>(), + [mean](const Real value) { return (value - mean) * (value - mean); }); + _sample_std_episode_reward.push_back(std::sqrt(variance / sample_size)); + + _average_episode_reward += sum; + _std_episode_reward += variance; + combined_sizes += sample_size; + } - // We are inserting to the front of the vector and push the rest back, this will - // ensure that the first element of the vector is the discounter reward for the whole transient - return_data_per_sim.insert(return_data_per_sim.begin(), discounted_reward); + if (!combined_sizes) + { + _average_episode_reward = 0.0; + _std_episode_reward = 0.0; + return; } - // Save and accumulate the return values - _return_data.insert(_return_data.end(), return_data_per_sim.begin(), return_data_per_sim.end()); + _average_episode_reward /= combined_sizes; + _std_episode_reward = std::sqrt(_std_episode_reward / combined_sizes); } void -LibtorchDRLControlTrainer::trainController() +LibtorchDRLControlTrainer::trainController(const LibtorchRLTrajectoryBuffer::TensorBatch & batch) { - // Define the optimizers for the training - torch::optim::Adam actor_optimizer(_control_nn->parameters(), - torch::optim::AdamOptions(_control_learning_rate)); + if (!batch.size()) + return; - torch::optim::Adam critic_optimizer(_critic_nn->parameters(), - torch::optim::AdamOptions(_critic_learning_rate)); + // We only train on the rank 0 partition. Libtorch should still be able to + // fetch the local threads which are available. + if (processor_id() == 0) + { + auto shuffle_generator = Moose::makeLibtorchCPUGenerator( + static_cast(_seed) + static_cast(_fe_problem.timeStep())); - // Compute the approximate value (return) from the critic neural net and use it to compute an - // advantage - auto value = evaluateValue(_input_tensor).detach(); - auto advantage = _return_tensor - value; + for (unsigned int epoch = 0; epoch < _num_epochs; ++epoch) + { + const auto mini_batches = _sampler.sample( + batch, getParam("batch_size"), _standardize_advantage, shuffle_generator); + bool printed_losses = false; + for (const auto & mini_batch : mini_batches) + { + const auto losses = _ppo_loss.compute(*_control_nn, *_critic_nn, mini_batch); + + _actor_optimizer->zero_grad(); + losses.actor_loss.backward(); + _actor_optimizer->step(); + + _critic_optimizer->zero_grad(); + losses.critic_loss.backward(); + _critic_optimizer->step(); + + if (_loss_print_frequency && epoch % _loss_print_frequency == 0 && !printed_losses) + { + _console << "Epoch: " << epoch << " | Actor Loss: " << COLOR_GREEN + << losses.actor_loss.item() << COLOR_DEFAULT + << " | Critic Loss: " << COLOR_GREEN << losses.critic_loss.item() + << COLOR_DEFAULT << std::endl; + printed_losses = true; + } + } + } - // If requested, standardize the advantage - if (_standardize_advantage) - advantage = (advantage - advantage.mean()) / (advantage.std() + 1e-10); + _console << "Best model so far: " << _highest_reward << std::endl; + } - for (unsigned int epoch = 0; epoch < _num_epochs; ++epoch) + // It is time to send the trained data to every other processor so that the neural networks + // are the same on all ranks. TODO: Make sure this can be done on a GPU as well. + for (auto & param : _control_nn->named_parameters()) { - // Get the approximate return from the neural net again (this one does have an associated - // gradient) - value = evaluateValue(_input_tensor); - // Get the approximate logarithmic action probability using the control neural net - auto curr_log_probability = evaluateAction(_input_tensor, _output_tensor); - - // Prepare the ratio by using the e^(logx-logy)=x/y expression - auto ratio = (curr_log_probability - _log_probability_tensor).exp(); - - // Use clamping for limiting - auto surr1 = ratio * advantage; - auto surr2 = torch::clamp(ratio, 1.0 - _clip_param, 1.0 + _clip_param) * advantage; - - // Compute loss values for the critic and the control neural net - auto actor_loss = -torch::min(surr1, surr2).mean(); - auto critic_loss = torch::mse_loss(value, _return_tensor); - - // Update the weights in the neural nets - actor_optimizer.zero_grad(); - actor_loss.backward(); - actor_optimizer.step(); - - critic_optimizer.zero_grad(); - critic_loss.backward(); - critic_optimizer.step(); - - // print loss per epoch - if (_loss_print_frequency) - if (epoch % _loss_print_frequency == 0) - _console << "Epoch: " << epoch << " | Actor Loss: " << COLOR_GREEN - << actor_loss.item() << COLOR_DEFAULT << " | Critic Loss: " << COLOR_GREEN - << critic_loss.item() << COLOR_DEFAULT << std::endl; + MPI_Bcast(param.value().data_ptr(), param.value().numel(), MPI_DOUBLE, 0, _communicator.get()); + } + + for (auto & param : _critic_nn->named_parameters()) + { + MPI_Bcast(param.value().data_ptr(), param.value().numel(), MPI_DOUBLE, 0, _communicator.get()); } // Save the controller neural net so our controller can read it, we also save the critic if we @@ -369,120 +418,107 @@ LibtorchDRLControlTrainer::trainController() torch::save(_critic_nn, _critic_nn->name()); } -void -LibtorchDRLControlTrainer::convertDataToTensor(std::vector> & vector_data, - torch::Tensor & tensor_data, - const bool detach) -{ - for (unsigned int i = 0; i < vector_data.size(); ++i) - { - torch::Tensor input_row; - LibtorchUtils::vectorToTensor(vector_data[i], input_row, detach); - - if (i == 0) - tensor_data = input_row; - else - tensor_data = torch::cat({tensor_data, input_row}, 1); - } - - if (detach) - tensor_data.detach(); -} - -torch::Tensor -LibtorchDRLControlTrainer::evaluateValue(torch::Tensor & input) -{ - return _critic_nn->forward(input); -} - -torch::Tensor -LibtorchDRLControlTrainer::evaluateAction(torch::Tensor & input, torch::Tensor & output) -{ - torch::Tensor var = torch::matmul(_std, _std); - - // Compute an action and get it's logarithmic proability based on an assumed Gaussian distribution - torch::Tensor action = _control_nn->forward(input); - return -((action - output) * (action - output)) / (2 * var) - torch::log(_std) - - std::log(std::sqrt(2 * M_PI)); -} - void LibtorchDRLControlTrainer::resetData() { - for (auto & data : _input_data) - data.clear(); - for (auto & data : _output_data) - data.clear(); - for (auto & data : _log_probability_data) - data.clear(); - - _reward_data.clear(); - _return_data.clear(); - + _trajectory_buffer.clear(); _update_counter = _update_frequency; } void -LibtorchDRLControlTrainer::getInputDataFromReporter( - std::vector> & data, - const std::vector *> & reporter_links, - const unsigned int num_timesteps) +LibtorchDRLControlTrainer::collectTrajectoriesFromReporters() { - for (const auto & rep_i : index_range(reporter_links)) + for (const auto sample_i : index_range(*_reward_value_pointer)) { - std::vector reporter_data = *reporter_links[rep_i]; - - // We shift and scale the inputs to get better training efficiency - std::transform( - reporter_data.begin(), - reporter_data.end(), - reporter_data.begin(), - [this, &rep_i](Real value) -> Real - { return (value - _response_shift_factors[rep_i]) * _response_scaling_factors[rep_i]; }); - - // Fill the corresponding containers - for (const auto & start_step : make_range(num_timesteps)) + const auto & reward_sample = (*_reward_value_pointer)[sample_i]; + const auto num_transitions = computeNumTransitions(reward_sample.size()); + if (!num_transitions) + continue; + + std::vector> component_trajectories(_state_names.size()); + for (const auto state_i : index_range(_state_value_pointers)) + component_trajectories[state_i] = extractDownsampledSequence( + (*_state_value_pointers[state_i])[sample_i], 0, num_transitions + 1); + + LibtorchRLTrajectoryBuffer::Trajectory trajectory; + trajectory.observations.reserve(num_transitions); + trajectory.next_observations.reserve(num_transitions); + trajectory.actions.assign(num_transitions, std::vector()); + trajectory.log_probabilities.assign(num_transitions, std::vector()); + + for (auto & action_row : trajectory.actions) + action_row.reserve(_action_names.size()); + for (auto & log_probability_row : trajectory.log_probabilities) + log_probability_row.reserve(_log_probability_names.size()); + + for (const auto step_i : make_range(num_transitions)) + { + trajectory.observations.push_back( + _observation_history.stackTrajectoryObservation(component_trajectories, step_i)); + trajectory.next_observations.push_back( + _observation_history.stackTrajectoryObservation(component_trajectories, step_i + 1)); + } + + for (const auto action_i : index_range(_action_value_pointers)) { - unsigned int row = reporter_links.size() * start_step + rep_i; - for (unsigned int fill_i = 1; fill_i < num_timesteps - start_step; ++fill_i) - data[row].push_back(reporter_data[0]); - - data[row].insert(data[row].end(), - reporter_data.begin(), - reporter_data.begin() + start_step + reporter_data.size() - - (num_timesteps - 1) - _shift_outputs); + const auto action_sequence = extractDownsampledSequence( + (*_action_value_pointers[action_i])[sample_i], _shift_outputs, num_transitions); + const auto log_probability_sequence = extractDownsampledSequence( + (*_log_probability_value_pointers[action_i])[sample_i], _shift_outputs, num_transitions); + + for (const auto step_i : make_range(num_transitions)) + { + trajectory.actions[step_i].push_back(action_sequence[step_i]); + trajectory.log_probabilities[step_i].push_back(log_probability_sequence[step_i]); + } } + + trajectory.rewards = + extractDownsampledSequence(reward_sample, _timestep_window, num_transitions); + + _trajectory_buffer.addTrajectory(std::move(trajectory)); } } -void -LibtorchDRLControlTrainer::getOutputDataFromReporter( - std::vector> & data, - const std::vector *> & reporter_links) +unsigned int +LibtorchDRLControlTrainer::computeNumTransitions(const std::size_t raw_sequence_size) const { - for (const auto & rep_i : index_range(reporter_links)) - // Fill the corresponding containers - data[rep_i].insert(data[rep_i].end(), - reporter_links[rep_i]->begin() + _shift_outputs, - reporter_links[rep_i]->end()); + unsigned int num_transitions = 0; + for (std::size_t raw_index = 0; raw_index + _timestep_window < raw_sequence_size; + raw_index += _timestep_window) + ++num_transitions; + + return num_transitions; } -void -LibtorchDRLControlTrainer::getRewardDataFromReporter(std::vector & data, - const std::vector * const reporter_link) +std::vector +LibtorchDRLControlTrainer::extractDownsampledSequence(const std::vector & sample, + const unsigned int offset, + const unsigned int num_entries) const { - // Fill the corresponding container - data.insert(data.end(), reporter_link->begin() + _shift_outputs, reporter_link->end()); + std::vector values; + values.reserve(num_entries); + + for (const auto entry_i : make_range(num_entries)) + { + const auto raw_index = offset + entry_i * _timestep_window; + if (raw_index >= sample.size()) + mooseError("Reporter data is shorter than required by the configured timestep window and " + "history stacking."); + values.push_back(sample[raw_index]); + } + + return values; } void LibtorchDRLControlTrainer::getReporterPointers( const std::vector & reporter_names, - std::vector *> & pointer_storage) + std::vector> *> & pointer_storage) { pointer_storage.clear(); for (const auto & name : reporter_names) - pointer_storage.push_back(&getReporterValueByName>(name)); + pointer_storage.push_back(&getReporterValueByName>>(name)); } #endif diff --git a/modules/stochastic_tools/src/libtorch/transfers/SamplerDRLControlTransfer.C b/modules/stochastic_tools/src/libtorch/transfers/SamplerDRLControlTransfer.C new file mode 100644 index 000000000000..713a7c10da2b --- /dev/null +++ b/modules/stochastic_tools/src/libtorch/transfers/SamplerDRLControlTransfer.C @@ -0,0 +1,132 @@ +//* This file is part of the MOOSE framework +//* https://www.mooseframework.org +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "SamplerDRLControlTransfer.h" +#include "LibtorchDRLControl.h" + +registerMooseObject("StochasticToolsApp", SamplerDRLControlTransfer); + +InputParameters +SamplerDRLControlTransfer::validParams() +{ + InputParameters params = StochasticToolsTransfer::validParams(); + params += SurrogateModelInterface::validParams(); + + params.addClassDescription( + "Copies a DRL actor from a trainer object on the main app to a LibtorchDRLControl object " + "on the subapp."); + + params.suppressParameter("from_multi_app"); + + params.addRequiredParam( + "trainer_name", "Trainer object that owns the latest controller network."); + params.addRequiredParam("control_name", "Controller object name."); + return params; +} + +SamplerDRLControlTransfer::SamplerDRLControlTransfer(const InputParameters & parameters) + : StochasticToolsTransfer(parameters), + SurrogateModelInterface(this), + _control_name(getParam("control_name")), + _trainer(getSurrogateTrainerByName( + getParam("trainer_name"))) +{ +} + +void +SamplerDRLControlTransfer::initialSetup() +{ +} + +void +SamplerDRLControlTransfer::execute() +{ + const auto n = getToMultiApp()->numGlobalApps(); + for (MooseIndex(n) i = 0; i < n; i++) + { + if (getToMultiApp()->hasLocalApp(i)) + { + // Get the control neural net from the trainer + const Moose::LibtorchActorNeuralNet & trainer_nn = _trainer.controlNeuralNet(); + + // Get the control object from the other app + FEProblemBase & app_problem = _multi_app->appProblemBase(i); + auto & control_warehouse = app_problem.getControlWarehouse(); + std::shared_ptr control_ptr = control_warehouse.getActiveObject(_control_name); + LibtorchDRLControl * control_object = dynamic_cast(control_ptr.get()); + + if (!control_object) + paramError("control_name", "The given control is not a LibtorchDRLControl!"); + + // Copy and the neural net and execute it to get the initial values + control_object->loadControlNeuralNet(trainer_nn); + control_object->execute(); + } + } +} + +void +SamplerDRLControlTransfer::initializeFromMultiapp() +{ +} + +void +SamplerDRLControlTransfer::executeFromMultiapp() +{ +} + +void +SamplerDRLControlTransfer::finalizeFromMultiapp() +{ +} + +void +SamplerDRLControlTransfer::initializeToMultiapp() +{ +} + +void +SamplerDRLControlTransfer::executeToMultiapp() +{ + if (getToMultiApp()->hasLocalApp(_app_index)) + { + // Use a rank-invariant seed based on the configured trainer seed, the current main-app + // training step, and the sampler row being executed. This keeps the stochastic rollout path + // tied to the actual sample instead of the transient local app slot chosen by batch-reset. + const uint64_t sample_seed = static_cast(_trainer.seed()) + + static_cast(_global_index) + + static_cast(_sampler_ptr->getNumberOfRows()) * + static_cast(_fe_problem.timeStep()); + // Get the control neural net from the trainer + const Moose::LibtorchActorNeuralNet & trainer_nn = _trainer.controlNeuralNet(); + + // Get the control object from the other app + FEProblemBase & app_problem = _multi_app->appProblemBase(_app_index); + auto & control_warehouse = app_problem.getControlWarehouse(); + std::shared_ptr control_ptr = control_warehouse.getActiveObject(_control_name); + LibtorchDRLControl * control_object = dynamic_cast(control_ptr.get()); + + if (!control_object) + paramError("control_name", "The given control is not a LibtorchDRLControl!"); + + // Copy and the neural net and execute it to get the initial values + control_object->loadControlNeuralNet(trainer_nn); + control_object->setPolicySampleSeed(sample_seed); + control_object->execute(); + } +} + +void +SamplerDRLControlTransfer::finalizeToMultiapp() +{ +} + +#endif diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C new file mode 100644 index 000000000000..311d9676f4a5 --- /dev/null +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C @@ -0,0 +1,321 @@ +//* This file is part of the MOOSE framework +//* https://www.mooseframework.org +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "LibtorchActionDistribution.h" + +#include "LibtorchRandomUtils.h" +#include "LibtorchUtils.h" +#include "MooseError.h" + +#include +#include +#include "libmesh/utility.h" + +namespace +{ + +/** + * Fill in default action scaling and catch shape mistakes early. + * @param factors User-provided scaling factors. + * @param expected_size Number of action outputs expected by the distribution. + * @return A fully populated and validated scaling-factor vector. + */ +std::vector +setActionScalingFactors(const std::vector & factors, const unsigned int expected_size) +{ + const auto normalized = factors.empty() ? std::vector(expected_size, 1.0) : factors; + + if (normalized.size() != expected_size) + mooseError("The number of output_scaling_factors entries must match the number of action " + "outputs."); + + for (const auto factor : normalized) + if (std::abs(factor) == 0.0) + mooseError("The output_scaling_factors entries must be non-zero."); + + return normalized; +} + +} // namespace + +namespace Moose +{ + +LibtorchActionDistribution::LibtorchActionDistribution( + const std::string & name, + const unsigned int num_inputs, + const unsigned int num_outputs, + const torch::DeviceType device_type, + const torch::ScalarType data_type, + const std::vector & output_scaling_factors) + : _name(name), + _num_inputs(num_inputs), + _num_outputs(num_outputs), + _device_type(device_type), + _data_type(data_type), + _output_scaling_factors(setActionScalingFactors(output_scaling_factors, num_outputs)) +{ + auto action_scale = _output_scaling_factors; + LibtorchUtils::vectorToTensor(action_scale, _action_scale_tensor); + _action_scale_tensor = register_buffer( + "action_scale", _action_scale_tensor.transpose(0, 1).to(_data_type).to(_device_type)); +} + +void +LibtorchActionDistribution::synchronizeScalingFactorsFromBuffer() +{ + auto action_scale = + _action_scale_tensor.detach().reshape({-1}).to(torch::kCPU).to(torch::kDouble); + LibtorchUtils::tensorToVector(action_scale, _output_scaling_factors); +} + +torch::Tensor +LibtorchActionDistribution::prepareFeatures(const torch::Tensor & input) const +{ + auto features = input; + if (_data_type != features.scalar_type()) + features = features.to(_data_type); + if (_device_type != features.device().type()) + features = features.to(_device_type); + return features; +} + +torch::Tensor +LibtorchActionDistribution::prepareAction(const torch::Tensor & action) const +{ + auto scaled_action = action; + if (_data_type != scaled_action.scalar_type()) + scaled_action = scaled_action.to(_data_type); + if (_device_type != scaled_action.device().type()) + scaled_action = scaled_action.to(_device_type); + return scaled_action; +} + +LibtorchGaussianActionDistribution::LibtorchGaussianActionDistribution( + const std::string & name, + const unsigned int num_inputs, + const unsigned int num_outputs, + const torch::DeviceType device_type, + const torch::ScalarType data_type, + const bool build_on_construct, + const std::vector & output_scaling_factors, + const bool state_independent_std) + : LibtorchActionDistribution( + name, num_inputs, num_outputs, device_type, data_type, output_scaling_factors), + _state_independent_std(state_independent_std) +{ + if (build_on_construct) + constructDistribution(); +} + +void +LibtorchGaussianActionDistribution::constructDistribution() +{ + _mean_module = register_module( + "mean", torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true))); + _std_module = register_module( + "std", torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true))); + + _mean_module->to(_device_type, _data_type); + _std_module->to(_device_type, _data_type); +} + +void +LibtorchGaussianActionDistribution::initialize(const c10::optional generator) +{ + Moose::orthogonalInitializeTensor(_mean_module->weight, 1.0, generator); + torch::nn::init::zeros_(_mean_module->bias); + + if (_state_independent_std) + { + _std_module->weight.data().zero_(); + torch::nn::init::zeros_(_std_module->bias); + return; + } + + Moose::orthogonalInitializeTensor(_std_module->weight, 1.0, generator); + torch::nn::init::zeros_(_std_module->bias); +} + +void +LibtorchGaussianActionDistribution::reset(const torch::Tensor & input) +{ + const auto features = prepareFeatures(input); + _mean = _mean_module->forward(features); + + if (_state_independent_std) + { + if (_mean.dim() <= 1) + _log_std_tensor = _std_module->bias; + else + _log_std_tensor = _std_module->bias.view({1, -1}).expand(_mean.sizes()); + } + else + _log_std_tensor = _std_module->forward(features); + + _log_std_tensor = torch::clamp(_log_std_tensor, std::log(1e-12), -std::log(1e-12)); + _std_tensor = torch::exp(_log_std_tensor); +} + +torch::Tensor +LibtorchGaussianActionDistribution::sample(const c10::optional generator) const +{ + return at::normal(_mean, _std_tensor, generator) * actionScaleTensor(); +} + +torch::Tensor +LibtorchGaussianActionDistribution::deterministicAction() const +{ + return _mean * actionScaleTensor(); +} + +torch::Tensor +LibtorchGaussianActionDistribution::logProbability(const torch::Tensor & action) const +{ + const auto scaled_action = prepareAction(action); + const auto log_action_scale = torch::log(torch::abs(actionScaleTensor())); + const auto unscaled_action = scaled_action / actionScaleTensor(); + + constexpr Real pi = 3.14159265358979323846; + const torch::Tensor var = _std_tensor * _std_tensor; + return -((unscaled_action - _mean) * (unscaled_action - _mean)) / (2.0 * var) - _log_std_tensor - + 0.5 * std::log(2.0 * pi) - log_action_scale; +} + +torch::Tensor +LibtorchGaussianActionDistribution::entropy() const +{ + const auto log_action_scale = torch::log(torch::abs(actionScaleTensor())); + constexpr Real pi = 3.14159265358979323846; + return 0.5 * std::log(2.0 * pi) + _log_std_tensor + 0.5 + log_action_scale; +} + +LibtorchBetaActionDistribution::LibtorchBetaActionDistribution( + const std::string & name, + const unsigned int num_inputs, + const unsigned int num_outputs, + const std::vector & minimum_values, + const std::vector & maximum_values, + const torch::DeviceType device_type, + const torch::ScalarType data_type, + const bool build_on_construct, + const std::vector & output_scaling_factors) + : LibtorchActionDistribution( + name, num_inputs, num_outputs, device_type, data_type, output_scaling_factors), + _minimum_values(minimum_values), + _maximum_values(maximum_values) +{ + if (_minimum_values.size() != _num_outputs || _maximum_values.size() != _num_outputs) + mooseError("The number of minimum_values and maximum_values entries must match the number " + "of action outputs."); + + for (const auto i : make_range(_minimum_values.size())) + if (!(_maximum_values[i] > _minimum_values[i])) + mooseError("maximum_values entries must be strictly greater than minimum_values entries."); + + auto min_value = _minimum_values; + LibtorchUtils::vectorToTensor(min_value, _min_tensor); + _min_tensor = _min_tensor.transpose(0, 1).to(_data_type).to(_device_type); + auto max_value = _maximum_values; + LibtorchUtils::vectorToTensor(max_value, _max_tensor); + _max_tensor = _max_tensor.transpose(0, 1).to(_data_type).to(_device_type); + + if (build_on_construct) + constructDistribution(); +} + +void +LibtorchBetaActionDistribution::constructDistribution() +{ + _alpha_module = register_module( + "alpha", torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true))); + _beta_module = register_module( + "beta", torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true))); + + _alpha_module->to(_device_type, _data_type); + _beta_module->to(_device_type, _data_type); +} + +void +LibtorchBetaActionDistribution::initialize(const c10::optional generator) +{ + Moose::orthogonalInitializeTensor(_alpha_module->weight, 1.0, generator); + torch::nn::init::zeros_(_alpha_module->bias); + + Moose::orthogonalInitializeTensor(_beta_module->weight, 1.0, generator); + torch::nn::init::zeros_(_beta_module->bias); +} + +void +LibtorchBetaActionDistribution::reset(const torch::Tensor & input) +{ + const auto features = prepareFeatures(input); + const auto alpha = _alpha_module->forward(features); + _alpha_tensor = torch::log(torch::exp(alpha) + 1.0) + 1.0; + const auto beta = _beta_module->forward(features); + _beta_tensor = torch::log(torch::exp(beta) + 1.0) + 1.0; + + _alpha_beta_tensor = torch::clamp_min(_alpha_tensor + _beta_tensor, 1e-8); + _mean = _alpha_tensor / _alpha_beta_tensor; + _log_norm = at::lgamma(_alpha_tensor) + at::lgamma(_beta_tensor) - at::lgamma(_alpha_beta_tensor); +} + +torch::Tensor +LibtorchBetaActionDistribution::sample(const c10::optional generator) const +{ + const auto alpha_sample = at::_standard_gamma(_alpha_tensor, generator); + const auto beta_sample = at::_standard_gamma(_beta_tensor, generator); + const auto sampled = alpha_sample / (alpha_sample + beta_sample); + return (_min_tensor + (_max_tensor - _min_tensor) * sampled) * actionScaleTensor(); +} + +torch::Tensor +LibtorchBetaActionDistribution::deterministicAction() const +{ + return (_min_tensor + (_max_tensor - _min_tensor) * _mean) * actionScaleTensor(); +} + +torch::Tensor +LibtorchBetaActionDistribution::logProbability(const torch::Tensor & action) const +{ + const auto scaled_action = prepareAction(action); + const auto log_action_scale = torch::log(torch::abs(actionScaleTensor())); + const auto unscaled_action = scaled_action / actionScaleTensor(); + const auto scale = torch::clamp_min(_max_tensor - _min_tensor, 1e-8); + const auto normalized = (unscaled_action - _min_tensor) / scale; + const auto clipped = torch::clamp(normalized, 1e-8, 1.0 - 1e-8); + auto log_prob = (_alpha_tensor - 1.0) * torch::log(clipped) + + (_beta_tensor - 1.0) * torch::log1p(-clipped) - _log_norm - torch::log(scale) - + log_action_scale; + + const auto out_of_bounds = (normalized < 0.0) | (normalized > 1.0); + if (out_of_bounds.any().item()) + log_prob = torch::where(out_of_bounds, + torch::full_like(log_prob, -std::numeric_limits::infinity()), + log_prob); + + return log_prob; +} + +torch::Tensor +LibtorchBetaActionDistribution::entropy() const +{ + const auto log_action_scale = torch::log(torch::abs(actionScaleTensor())); + const auto scale = torch::clamp_min(_max_tensor - _min_tensor, 1e-8); + return _log_norm - (_beta_tensor - 1.0) * torch::digamma(_beta_tensor) - + (_alpha_tensor - 1.0) * torch::digamma(_alpha_tensor) + + (_alpha_beta_tensor - 2.0) * torch::digamma(_alpha_beta_tensor) + torch::log(scale) + + log_action_scale; +} + +} // namespace Moose + +#endif diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C new file mode 100644 index 000000000000..036517259627 --- /dev/null +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C @@ -0,0 +1,348 @@ +//* This file is part of the MOOSE framework +//* https://www.mooseframework.org +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "LibtorchActorNeuralNet.h" +#include "LibtorchRandomUtils.h" +#include "MooseError.h" +#include "libmesh/utility.h" + +namespace +{ + +template +auto +captureTensorShapes(const NamedTensorList & tensors) +{ + std::vector>> shapes; + shapes.reserve(tensors.size()); + + for (const auto & tensor : tensors) + shapes.emplace_back( + tensor.key(), + std::vector(tensor.value().sizes().begin(), tensor.value().sizes().end())); + + return shapes; +} + +template +void +verifyTensorShapes(const NamedTensorList & tensors, + const std::vector>> & expected, + const char * tensor_kind) +{ + if (tensors.size() != expected.size()) + mooseError("The loaded DRL actor ", tensor_kind, " count does not match the generated schema."); + + for (const auto tensor_i : make_range(tensors.size())) + { + const auto actual_shape = std::vector(tensors[tensor_i].value().sizes().begin(), + tensors[tensor_i].value().sizes().end()); + + if (tensors[tensor_i].key() != expected[tensor_i].first || + actual_shape != expected[tensor_i].second) + mooseError("The loaded DRL actor ", + tensor_kind, + " '", + tensors[tensor_i].key(), + "' does not match the generated schema."); + } +} + +} // namespace + +namespace Moose +{ + +LibtorchActorNeuralNet::LibtorchActorNeuralNet( + const std::string name, + const unsigned int num_inputs, + const unsigned int num_outputs, + const std::vector & num_neurons_per_layer, + const std::vector & activation_function, + const std::vector & minimum_values, + const std::vector & maximum_values, + const torch::DeviceType device_type, + const torch::ScalarType data_type, + const bool build_on_construct, + const std::vector & input_shift_factors, + const std::vector & input_scaling_factors, + const std::vector & output_scaling_factors, + const bool state_independent_std) + : LibtorchArtificialNeuralNet(name, + num_inputs, + num_outputs, + num_neurons_per_layer, + activation_function, + device_type, + data_type, + false, + input_shift_factors, + input_scaling_factors, + output_scaling_factors), + _minimum_values(minimum_values), + _maximum_values(maximum_values), + _state_independent_std(state_independent_std) +{ + const bool has_minimum_values = !_minimum_values.empty(); + const bool has_maximum_values = !_maximum_values.empty(); + if (has_minimum_values != has_maximum_values) + mooseError("Bounded action distributions require both minimum_values and maximum_values."); + + if (has_minimum_values) + { + if (_minimum_values.size() != _num_outputs || _maximum_values.size() != _num_outputs) + mooseError("The number of minimum_values and maximum_values entries must match the number " + "of action outputs."); + + for (const auto i : make_range(_minimum_values.size())) + if (!(_maximum_values[i] > _minimum_values[i])) + mooseError("maximum_values entries must be strictly greater than minimum_values entries."); + } + + if (build_on_construct) + constructNeuralNetwork(); +} + +LibtorchActorNeuralNet::LibtorchActorNeuralNet(const Moose::LibtorchActorNeuralNet & nn, + const bool build_on_construct) + : LibtorchArtificialNeuralNet(dynamic_cast(nn), false), + _minimum_values(nn.minValues()), + _maximum_values(nn.maxValues()), + _state_independent_std(nn.stateIndependentStd()) +{ + // We construct the NN architecture + if (build_on_construct) + { + constructNeuralNetwork(); + // We fill it up with the current parameter values + const auto & from_params = nn.named_parameters(); + auto to_params = this->named_parameters(); + for (unsigned int param_i : make_range(from_params.size())) + to_params[param_i].value().data() = from_params[param_i].value().data().clone(); + + const auto & from_buffers = nn.named_buffers(); + auto to_buffers = this->named_buffers(); + for (unsigned int buffer_i : make_range(from_buffers.size())) + to_buffers[buffer_i].value().data() = from_buffers[buffer_i].value().data().clone(); + } +} + +void +LibtorchActorNeuralNet::initializeNeuralNetwork(const c10::optional generator) +{ + for (unsigned int i = 0; i < numHiddenLayers(); ++i) + { + const auto & activation = + _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0]; + const Real gain = determineGain(activation); + Moose::orthogonalInitializeTensor(_weights[i]->weight, gain, generator); + torch::nn::init::zeros_(_weights[i]->bias); + } + + _action_distribution->initialize(generator); +} + +void +LibtorchActorNeuralNet::constructNeuralNetwork() +{ + // Adding hidden layers + unsigned int inp_neurons = _num_inputs; + for (unsigned int i = 0; i < numHiddenLayers(); ++i) + { + std::unordered_map parameters = { + {"inp_neurons", inp_neurons}, {"out_neurons", _num_neurons_per_layer[i]}}; + addLayer("hidden_layer_" + std::to_string(i + 1), parameters); + + // Necessary to retain double precision (and error-free runs) + _weights[i]->to(_device_type, _data_type); + inp_neurons = _num_neurons_per_layer[i]; + } + + if (_minimum_values.empty() && _maximum_values.empty()) + _action_distribution = + std::make_shared("action_distribution", + inp_neurons, + _num_outputs, + _device_type, + _data_type, + true, + _output_scaling_factors, + _state_independent_std); + else + _action_distribution = + std::make_shared("action_distribution", + inp_neurons, + _num_outputs, + _minimum_values, + _maximum_values, + _device_type, + _data_type, + true, + _output_scaling_factors); + + // Keep the serialized module name stable so existing checkpoints continue to load. + register_module("action_head", _action_distribution); +} + +torch::Tensor +LibtorchActorNeuralNet::entropy() +{ + return _action_distribution->entropy(); +} + +const LibtorchGaussianActionDistribution * +LibtorchActorNeuralNet::gaussianActionDistributionPtr() const +{ + return dynamic_cast(_action_distribution.get()); +} + +LibtorchGaussianActionDistribution * +LibtorchActorNeuralNet::gaussianActionDistributionPtr() +{ + return dynamic_cast(_action_distribution.get()); +} + +const LibtorchGaussianActionDistribution & +LibtorchActorNeuralNet::gaussianActionDistribution() const +{ + const auto * distribution = gaussianActionDistributionPtr(); + if (!distribution) + mooseError("Requested a Gaussian action distribution from a bounded actor."); + return *distribution; +} + +LibtorchGaussianActionDistribution & +LibtorchActorNeuralNet::gaussianActionDistribution() +{ + auto * distribution = gaussianActionDistributionPtr(); + if (!distribution) + mooseError("Requested a Gaussian action distribution from a bounded actor."); + return *distribution; +} + +const LibtorchBetaActionDistribution * +LibtorchActorNeuralNet::betaActionDistributionPtr() const +{ + return dynamic_cast(_action_distribution.get()); +} + +LibtorchBetaActionDistribution * +LibtorchActorNeuralNet::betaActionDistributionPtr() +{ + return dynamic_cast(_action_distribution.get()); +} + +const LibtorchBetaActionDistribution & +LibtorchActorNeuralNet::betaActionDistribution() const +{ + const auto * distribution = betaActionDistributionPtr(); + if (!distribution) + mooseError("Requested a Beta action distribution from an unbounded actor."); + return *distribution; +} + +LibtorchBetaActionDistribution & +LibtorchActorNeuralNet::betaActionDistribution() +{ + auto * distribution = betaActionDistributionPtr(); + if (!distribution) + mooseError("Requested a Beta action distribution from an unbounded actor."); + return *distribution; +} + +void +LibtorchActorNeuralNet::resetDistributionParams(torch::Tensor input) +{ + _action_distribution->reset(input); +} + +torch::Tensor +LibtorchActorNeuralNet::forward(const torch::Tensor & x) +{ + torch::Tensor output = preprocessInput(x); + + for (unsigned int i = 0; i < _weights.size(); ++i) + { + std::string activation = + _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0]; + if (activation == "relu") + output = torch::relu(_weights[i]->forward(output)); + else if (activation == "sigmoid") + output = torch::sigmoid(_weights[i]->forward(output)); + else if (activation == "tanh") + output = torch::tanh(_weights[i]->forward(output)); + else if (activation == "elu") + output = torch::elu(_weights[i]->forward(output)); + else if (activation == "gelu") + output = torch::gelu(_weights[i]->forward(output)); + else if (activation == "linear") + output = _weights[i]->forward(output); + } + + return output; +} + +torch::Tensor +LibtorchActorNeuralNet::evaluate(torch::Tensor & x, + const bool sampled, + const c10::optional generator) +{ + torch::Tensor output = forward(x); + + resetDistributionParams(output); + + if (sampled) + return sample(generator); + + return _action_distribution->deterministicAction(); +} + +torch::Tensor +LibtorchActorNeuralNet::sample(const c10::optional generator) +{ + return _action_distribution->sample(generator); +} + +torch::Tensor +LibtorchActorNeuralNet::logProbability(const torch::Tensor & action) +{ + return _action_distribution->logProbability(action); +} + +void +loadLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, const std::string & filename) +{ + try + { + const auto expected_parameters = captureTensorShapes(nn.named_parameters()); + const auto expected_buffers = captureTensorShapes(nn.named_buffers()); + + torch::serialize::InputArchive archive; + archive.load_from(filename); + nn.load(archive); + + verifyTensorShapes(nn.named_parameters(), expected_parameters, "parameter"); + verifyTensorShapes(nn.named_buffers(), expected_buffers, "buffer"); + + nn.synchronizeAffineFactorsFromBuffers(); + nn.actionDistribution().synchronizeScalingFactorsFromBuffer(); + } + catch (const c10::Error & e) + { + mooseError("The requested DRL actor checkpoint could not be loaded as a native libtorch " + "archive. Make sure the file exists and matches the generated actor schema.\n", + e.msg()); + } +} + +} + +#endif diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchRLMiniBatchSampler.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLMiniBatchSampler.C new file mode 100644 index 000000000000..f25b6fefe9d5 --- /dev/null +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLMiniBatchSampler.C @@ -0,0 +1,92 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "LibtorchRLMiniBatchSampler.h" + +#include "MooseError.h" + +#include +#include + +std::vector +LibtorchRLMiniBatchSampler::sample(const LibtorchRLTrajectoryBuffer::TensorBatch & batch, + const unsigned int batch_size, + const bool standardize_advantage, + const c10::optional generator) const +{ + std::vector mini_batches; + + if (!batch.size()) + return mini_batches; + + validateBatch(batch); + + const auto effective_batch_size = std::max(1, batch_size); + auto permutation = + at::randperm(batch.size(), generator, torch::TensorOptions().dtype(torch::kLong)); + + for (std::int64_t batch_begin = 0; batch_begin < batch.size(); + batch_begin += effective_batch_size) + { + const auto batch_end = std::min( + batch.size(), batch_begin + static_cast(effective_batch_size)); + const auto indices = permutation.narrow(0, batch_begin, batch_end - batch_begin); + mini_batches.push_back(makeMiniBatch(batch, indices, standardize_advantage)); + } + + return mini_batches; +} + +void +LibtorchRLMiniBatchSampler::validateBatch(const LibtorchRLTrajectoryBuffer::TensorBatch & batch) +{ + if (!batch.actions.defined() || !batch.log_probabilities.defined() || + !batch.value_targets.defined() || !batch.advantages.defined()) + mooseError("RL tensor batches must define observations, actions, log probabilities, value " + "targets, and advantages before mini-batch sampling."); + + const auto batch_size = batch.size(); + const auto validate_rows = [batch_size](const torch::Tensor & tensor, std::string_view name) + { + if (!tensor.defined() || tensor.size(0) != batch_size) + mooseError( + "RL tensor batch field ", name, " must have the same number of rows as observations."); + }; + + validate_rows(batch.actions, "actions"); + validate_rows(batch.log_probabilities, "log_probabilities"); + validate_rows(batch.value_targets, "value_targets"); + validate_rows(batch.advantages, "advantages"); +} + +LibtorchRLMiniBatch +LibtorchRLMiniBatchSampler::makeMiniBatch(const LibtorchRLTrajectoryBuffer::TensorBatch & batch, + const torch::Tensor & indices, + const bool standardize_advantage) +{ + LibtorchRLMiniBatch mini_batch; + + mini_batch.observations = batch.observations.index({indices}); + mini_batch.actions = batch.actions.index({indices}); + mini_batch.old_log_probabilities = batch.log_probabilities.index({indices}); + mini_batch.value_targets = batch.value_targets.index({indices}); + mini_batch.advantages = batch.advantages.index({indices}); + + if (standardize_advantage) + { + const auto std = mini_batch.advantages.std(false); + mini_batch.advantages = (mini_batch.advantages - mini_batch.advantages.mean()) / (std + 1e-10); + } + + return mini_batch; +} + +#endif diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchRLPPOLoss.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLPPOLoss.C new file mode 100644 index 000000000000..51d36135aaaa --- /dev/null +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLPPOLoss.C @@ -0,0 +1,60 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "LibtorchRLPPOLoss.h" + +LibtorchRLPPOLoss::LibtorchRLPPOLoss(const Real clip_parameter, const Real entropy_coeff) + : _clip_parameter(clip_parameter), _entropy_coeff(entropy_coeff) +{ +} + +LibtorchRLPPOLossOutput +LibtorchRLPPOLoss::compute(Moose::LibtorchActorNeuralNet & policy_network, + Moose::LibtorchArtificialNeuralNet & value_network, + const LibtorchRLMiniBatch & batch) const +{ + auto observations = batch.observations; + policy_network.evaluate(observations, false); + + const auto current_log_probability = + reduceActionDimension(policy_network.logProbability(batch.actions)); + const auto previous_log_probability = reduceActionDimension(batch.old_log_probabilities); + const auto entropy = reduceActionDimension(policy_network.entropy()); + + const auto ratio = (current_log_probability - previous_log_probability).exp(); + const auto surr1 = ratio * batch.advantages; + const auto surr2 = + torch::clamp(ratio, 1.0 - _clip_parameter, 1.0 + _clip_parameter) * batch.advantages; + + LibtorchRLPPOLossOutput output; + output.actor_loss = -(torch::min(surr1, surr2) + _entropy_coeff * entropy).mean(); + output.critic_loss = + torch::mse_loss(value_network.forward(batch.observations), batch.value_targets); + output.entropy = entropy.mean(); + return output; +} + +torch::Tensor +LibtorchRLPPOLoss::reduceActionDimension(const torch::Tensor & tensor) +{ + if (!tensor.defined()) + return tensor; + + if (tensor.dim() == 1) + return tensor.unsqueeze(1); + + if (tensor.size(-1) == 1) + return tensor; + + return tensor.sum(-1, true); +} + +#endif diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchRLTrajectoryBuffer.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLTrajectoryBuffer.C new file mode 100644 index 000000000000..d6ee0a60da81 --- /dev/null +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLTrajectoryBuffer.C @@ -0,0 +1,166 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "LibtorchRLTrajectoryBuffer.h" + +#include "MooseError.h" + +#include "libmesh/utility.h" + +namespace +{ + +torch::Tensor +bufferMatrixToTensor(const std::vector> & rows) +{ + if (rows.empty()) + return {}; + + const auto num_columns = rows.front().size(); + auto tensor = torch::zeros({static_cast(rows.size()), static_cast(num_columns)}, + torch::TensorOptions().dtype(torch::kDouble)); + auto accessor = tensor.accessor(); + + for (const auto row_i : make_range(rows.size())) + { + if (rows[row_i].size() != num_columns) + mooseError("All rows must have the same number of entries when flattening an RL batch."); + + for (const auto column_i : make_range(num_columns)) + accessor[row_i][column_i] = rows[row_i][column_i]; + } + + return tensor; +} + +torch::Tensor +bufferVectorToColumnTensor(const std::vector & values) +{ + if (values.empty()) + return {}; + + auto tensor = torch::zeros({static_cast(values.size()), 1}, + torch::TensorOptions().dtype(torch::kDouble)); + auto accessor = tensor.accessor(); + + for (const auto value_i : make_range(values.size())) + accessor[value_i][0] = values[value_i]; + + return tensor; +} + +} // namespace + +void +LibtorchRLTrajectoryBuffer::addTrajectory(Trajectory trajectory) +{ + validateTrajectory(trajectory); + _trajectories.push_back(std::move(trajectory)); +} + +void +LibtorchRLTrajectoryBuffer::clear() +{ + _trajectories.clear(); +} + +std::size_t +LibtorchRLTrajectoryBuffer::numTransitions() const +{ + std::size_t transitions = 0; + for (const auto & trajectory : _trajectories) + transitions += trajectory.rewards.size(); + return transitions; +} + +LibtorchRLTrajectoryBuffer::TensorBatch +LibtorchRLTrajectoryBuffer::flatten() const +{ + TensorBatch batch; + + if (_trajectories.empty()) + return batch; + + std::vector> observations; + std::vector> next_observations; + std::vector> actions; + std::vector> log_probabilities; + std::vector rewards; + std::vector value_targets; + std::vector advantages; + + observations.reserve(numTransitions()); + next_observations.reserve(numTransitions()); + actions.reserve(numTransitions()); + log_probabilities.reserve(numTransitions()); + rewards.reserve(numTransitions()); + value_targets.reserve(numTransitions()); + advantages.reserve(numTransitions()); + + for (const auto & trajectory : _trajectories) + { + if (!trajectory.value_targets.empty() && + trajectory.value_targets.size() != trajectory.rewards.size()) + mooseError("Value targets must match the reward length of the trajectory."); + if (!trajectory.advantages.empty() && trajectory.advantages.size() != trajectory.rewards.size()) + mooseError("Advantages must match the reward length of the trajectory."); + + observations.insert( + observations.end(), trajectory.observations.begin(), trajectory.observations.end()); + next_observations.insert(next_observations.end(), + trajectory.next_observations.begin(), + trajectory.next_observations.end()); + actions.insert(actions.end(), trajectory.actions.begin(), trajectory.actions.end()); + log_probabilities.insert(log_probabilities.end(), + trajectory.log_probabilities.begin(), + trajectory.log_probabilities.end()); + rewards.insert(rewards.end(), trajectory.rewards.begin(), trajectory.rewards.end()); + value_targets.insert( + value_targets.end(), trajectory.value_targets.begin(), trajectory.value_targets.end()); + advantages.insert(advantages.end(), trajectory.advantages.begin(), trajectory.advantages.end()); + } + + batch.observations = bufferMatrixToTensor(observations); + batch.next_observations = bufferMatrixToTensor(next_observations); + batch.actions = bufferMatrixToTensor(actions); + batch.log_probabilities = bufferMatrixToTensor(log_probabilities); + batch.rewards = bufferVectorToColumnTensor(rewards); + batch.value_targets = bufferVectorToColumnTensor(value_targets); + batch.advantages = bufferVectorToColumnTensor(advantages); + + return batch; +} + +void +LibtorchRLTrajectoryBuffer::validateTrajectory(const Trajectory & trajectory) +{ + const auto num_steps = trajectory.rewards.size(); + + if (trajectory.observations.size() != num_steps) + mooseError("RL trajectory observations must match the reward sequence length."); + + if (trajectory.next_observations.size() != num_steps) + mooseError("RL trajectory next observations must match the reward sequence length."); + + if (trajectory.actions.size() != num_steps) + mooseError("RL trajectory actions must match the reward sequence length."); + + if (trajectory.log_probabilities.size() != num_steps) + mooseError("RL trajectory log probabilities must match the reward sequence length."); + + if (!trajectory.value_targets.empty() && trajectory.value_targets.size() != num_steps) + mooseError("RL trajectory value targets must match the reward sequence length."); + + if (!trajectory.advantages.empty() && trajectory.advantages.size() != num_steps) + mooseError("RL trajectory advantages must match the reward sequence length."); +} + +#endif diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchRLValueEstimator.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLValueEstimator.C new file mode 100644 index 000000000000..dd87f584cf29 --- /dev/null +++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLValueEstimator.C @@ -0,0 +1,104 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "LibtorchRLValueEstimator.h" + +#include "LibtorchUtils.h" + +#include "libmesh/utility.h" + +namespace +{ + +torch::Tensor +valueEstimatorMatrixToTensor(const std::vector> & rows) +{ + if (rows.empty()) + return {}; + + const auto num_columns = rows.front().size(); + auto tensor = torch::zeros({static_cast(rows.size()), static_cast(num_columns)}, + torch::TensorOptions().dtype(torch::kDouble)); + auto accessor = tensor.accessor(); + + for (const auto row_i : make_range(rows.size())) + for (const auto column_i : make_range(num_columns)) + accessor[row_i][column_i] = rows[row_i][column_i]; + + return tensor; +} + +} // namespace + +LibtorchRLValueEstimator::LibtorchRLValueEstimator(const Real discount_factor, + const Real lambda_factor) + : _discount_factor(discount_factor), _lambda_factor(lambda_factor) +{ +} + +void +LibtorchRLValueEstimator::computeValueTargets( + LibtorchRLTrajectoryBuffer & buffer, Moose::LibtorchArtificialNeuralNet & value_network) const +{ + for (auto & trajectory : buffer.trajectories()) + { + const auto targets = estimate(trajectory, value_network); + trajectory.advantages = targets.advantages; + trajectory.value_targets = targets.value_targets; + } +} + +LibtorchRLValueEstimator::Targets +LibtorchRLValueEstimator::estimate(const LibtorchRLTrajectoryBuffer::Trajectory & trajectory, + Moose::LibtorchArtificialNeuralNet & value_network) const +{ + Targets targets; + + const auto values = evaluate(trajectory.observations, value_network); + const auto next_values = evaluate(trajectory.next_observations, value_network); + + const auto num_steps = trajectory.rewards.size(); + targets.advantages.resize(num_steps, 0.0); + targets.value_targets.resize(num_steps, 0.0); + + Real gae = 0.0; + for (const auto reverse_step : make_range(num_steps)) + { + const auto step = num_steps - reverse_step - 1; + const auto delta = + trajectory.rewards[step] + _discount_factor * next_values[step] - values[step]; + gae = delta + _discount_factor * _lambda_factor * gae; + targets.advantages[step] = gae; + targets.value_targets[step] = gae + values[step]; + } + + return targets; +} + +std::vector +LibtorchRLValueEstimator::evaluate(const std::vector> & observations, + Moose::LibtorchArtificialNeuralNet & value_network) const +{ + if (observations.empty()) + return {}; + + torch::NoGradGuard no_grad; + + auto tensor = valueEstimatorMatrixToTensor(observations); + auto value_tensor = value_network.forward(tensor); + auto flattened_value_tensor = value_tensor.reshape({-1}); + + std::vector values; + LibtorchUtils::tensorToVector(flattened_value_tensor, values); + return values; +} + +#endif diff --git a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/gold/parameter_read.csv b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/gold/parameter_read.csv index 454a22e116dc..cd7202f5163b 100644 --- a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/gold/parameter_read.csv +++ b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/gold/parameter_read.csv @@ -1,12 +1,12 @@ time,center_temp,center_temp_tend,env_temp,left_flux,log_prob_left_flux 0,300,300,273,0,0 -1800,300,270.35724380953,273.98104693845,-0.13793264034647,1.1115979988596 -3600,270.35724380953,274.51295145974,274.9578928833,-0.016781115951807,-0.50967346391778 -5400,274.51295145974,270.06590276071,275.92635483024,-0.22228241099481,1.1389673700336 -7200,270.06590276071,269.76297060624,276.88228567654,-0.26991716569164,0.53565739363602 -9000,269.76297060624,272.93107942871,277.82159197955,-0.18534271617753,1.2519275533428 -10800,272.93107942871,274.5965017921,278.74025148548,-0.15706446553999,1.3472992379419 -12600,274.5965017921,276.7588668075,279.63433035329,-0.10897031971508,1.353343147668 -14400,276.7588668075,278.0802103911,280.5,-0.091713226427664,1.2665547503991 -16200,278.0802103911,278.66431523589,281.33355349529,-0.10118636668795,1.2822084987121 -18000,278.66431523589,275.59443714345,282.13142143513,-0.24790007933615,0.92588151005597 +1800,300,283.69849346609,273.98104693845,73.630502862132,-5.8011541981082 +3600,283.69849346609,300.67435328161,274.9578928833,195.06575617849,-7.4224257273223 +5400,300.67435328161,267.4643690803,275.92635483024,-64.308701931811,-5.7737848258126 +7200,267.4643690803,259.55941422837,276.88228567654,-131.38405718619,-6.3770948269327 +9000,259.55941422837,272.14524323322,277.82159197955,-42.985660495371,-5.6608246378745 +10800,272.14524323322,275.68007219983,278.74025148548,-23.188639562158,-5.5654529493672 +12600,275.68007219983,282.82714752336,279.63433035329,24.239841099254,-5.5594090393934 +14400,282.82714752336,286.84577029599,280.5,48.132997254754,-5.6461974402188 +16200,286.84577029599,287.23729606816,281.33355349529,44.765462438375,-5.6305436912643 +18000,287.23729606816,269.39326109815,282.13142143513,-96.664605550218,-5.9868706945221 diff --git a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/libtorch_drl_control.i b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/libtorch_drl_control.i index e4621ac75239..e85f3d7fc132 100644 --- a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/libtorch_drl_control.i +++ b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/libtorch_drl_control.i @@ -93,17 +93,15 @@ seed = 11 type = LibtorchDRLControl parameters = "BCs/left_flux/value" - responses = 'center_temp env_temp' + observations = 'center_temp env_temp' input_timesteps = 2 - response_scaling_factors = '0.03 0.03' - response_shift_factors = '270 270' - action_standard_deviations = '0.1' - action_scaling_factors = 200 + observation_scaling_factors = '0.03 0.03' + observation_shift_factors = '270 270' + action_scaling_factors = 100 filename = 'mynet_control.net' - torch_script_format = false - num_neurons_per_layer = '16 6' + num_neurons_per_layer = '4 2' activation_function = 'relu' execute_on = 'TIMESTEP_BEGIN' @@ -111,12 +109,11 @@ [src_control_empty] type = LibtorchDRLControl parameters = "BCs/left_flux/value" - responses = 'center_temp env_temp' + observations = 'center_temp env_temp' input_timesteps = 2 - response_scaling_factors = '0.03 0.03' - response_shift_factors = '270 270' - action_standard_deviations = '0.1' + observation_scaling_factors = '0.03 0.03' + observation_shift_factors = '270 270' action_scaling_factors = 100 execute_on = 'TIMESTEP_BEGIN' diff --git a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/mynet_control.net b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/mynet_control.net index 66f9c6f3bd3e..ea50e03c46ef 100644 Binary files a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/mynet_control.net and b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/mynet_control.net differ diff --git a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/tests b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/tests index 551b53122de5..add6e3b26270 100644 --- a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/tests +++ b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/tests @@ -10,6 +10,31 @@ "it to control a transient process." capabilities = 'libtorch' [] + [read-parameters-half-transient] + type = RunApp + input = libtorch_drl_control.i + cli_args = "Outputs/file_base='parameter_read' Outputs/checkpoint=true " + "--test-checkpoint-half-transient" + recover = false + restep = false + prereq = read-parameters + requirement = "The system shall be able to recover a seeded DRL controller from a checkpointed " + "transient." + capabilities = 'libtorch' + [] + [read-parameters-recover] + type = CSVDiff + input = libtorch_drl_control.i + csvdiff = parameter_read.csv + cli_args = "Outputs/file_base='parameter_read' --recover parameter_read_cp/LATEST" + recover = false + restep = false + delete_output_before_running = false + prereq = read-parameters-half-transient + requirement = "The system shall reproduce the same transient control history after recovering " + "a seeded DRL controller from a checkpoint." + capabilities = 'libtorch' + [] [without-nn] type = CSVDiff input = libtorch_drl_control.i @@ -21,4 +46,12 @@ "initialized in it." capabilities = 'libtorch' [] + [invalid-execute-on] + type = RunException + input = libtorch_drl_control.i + cli_args = "Controls/src_control/execute_on='TIMESTEP_END'" + expect_err = "LibtorchDRLControl only supports 'TIMESTEP_BEGIN' for 'execute_on'." + requirement = "The system shall reject DRL controller execute flags other than timestep begin." + capabilities = 'libtorch' + [] [] diff --git a/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/control.net_best b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/control.net_best new file mode 100644 index 000000000000..45792df0dce1 Binary files /dev/null and b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/control.net_best differ diff --git a/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_sub.i b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_sub.i index 0352b4cea3a1..0efff026d328 100644 --- a/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_sub.i +++ b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_sub.i @@ -121,13 +121,12 @@ [src_control] type = LibtorchDRLControl parameters = "BCs/left_flux/value" - responses = 'center_temp env_temp' + observations = 'center_temp env_temp' # keep consistent with LibtorchDRLControlTrainer input_timesteps = 2 - response_scaling_factors = '0.03 0.03' - response_shift_factors = '270 270' - action_standard_deviations = '0.1' + observation_scaling_factors = '0.03 0.03' + observation_shift_factors = '270 270' action_scaling_factors = 100 execute_on = 'TIMESTEP_BEGIN' diff --git a/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_trainer.i b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_trainer.i index 1660e9ed3aa1..f13f07fa8565 100644 --- a/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_trainer.i +++ b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_trainer.i @@ -24,20 +24,21 @@ control_name = src_control [] [r_transfer] - type = MultiAppReporterTransfer + type = SamplerReporterTransfer from_multi_app = runner - to_reporters = 'results/center_temp results/env_temp results/reward results/left_flux results/log_prob_left_flux' - from_reporters = 'T_reporter/center_temp_tend:value T_reporter/env_temp:value T_reporter/reward:value T_reporter/left_flux:value T_reporter/log_prob_left_flux:value' + sampler = dummy + stochastic_reporter = storage + from_reporter = 'T_reporter/center_temp_tend:value T_reporter/env_temp:value T_reporter/reward:value T_reporter/left_flux:value T_reporter/log_prob_left_flux:value' [] [] [Trainers] [nn_trainer] type = LibtorchDRLControlTrainer - response = 'results/center_temp results/env_temp' - control = 'results/left_flux' - log_probability = 'results/log_prob_left_flux' - reward = 'results/reward' + observation = 'storage/r_transfer:T_reporter:center_temp_tend:value storage/r_transfer:T_reporter:env_temp:value' + control = 'storage/r_transfer:T_reporter:left_flux:value' + log_probability = 'storage/r_transfer:T_reporter:log_prob_left_flux:value' + reward = 'storage/r_transfer:T_reporter:reward:value' num_epochs = 10 update_frequency = 2 @@ -53,21 +54,20 @@ # keep consistent with LibtorchNeuralNetControl input_timesteps = 2 - response_scaling_factors = '0.03 0.03' - response_shift_factors = '270 270' - action_standard_deviations = '0.1' + observation_scaling_factors = '0.03 0.03' + observation_shift_factors = '270 270' + action_scaling_factors = 100 read_from_file = false + shift_outputs = false [] [] [Reporters] - [results] - type = ConstantReporter - real_vector_names = 'center_temp env_temp reward left_flux log_prob_left_flux' - real_vector_values = '0; 0; 0; 0; 0' - outputs = 'csv_out' - execute_on = timestep_begin + [storage] + type = StochasticReporter + parallel_type = ROOT + outputs = none [] [nn_parameters] type = DRLControlNeuralNetParameters diff --git a/modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C b/modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C new file mode 100644 index 000000000000..a5a2b77a5f30 --- /dev/null +++ b/modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C @@ -0,0 +1,421 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "gtest/gtest.h" +#include "LibtorchActorNeuralNet.h" +#include "LibtorchRandomUtils.h" +#include "MooseUnitUtils.h" + +#include + +namespace +{ + +class TestableLibtorchArtificialNeuralNet : public Moose::LibtorchArtificialNeuralNet +{ +public: + using Moose::LibtorchArtificialNeuralNet::_weights; + using Moose::LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet; +}; + +class TestableLibtorchActorNeuralNet : public Moose::LibtorchActorNeuralNet +{ +public: + using Moose::LibtorchActorNeuralNet::_weights; + using Moose::LibtorchActorNeuralNet::LibtorchActorNeuralNet; +}; + +Real +inverseSoftplusPlusOne(const Real target) +{ + return std::log(std::exp(target - 1.0) - 1.0); +} + +} // namespace + +TEST(LibtorchActorNeuralNetTest, artificialNetAppliesAffineInputAndOutputScaling) +{ + TestableLibtorchArtificialNeuralNet network("test_ann", + 2, + 1, + {}, + {"linear"}, + torch::kCPU, + torch::kDouble, + true, + {1.0, 2.0}, + {2.0, 3.0}, + {10.0}); + + ASSERT_EQ(network._weights.size(), 1u); + + network._weights[0]->weight.data().fill_(0.0); + network._weights[0]->weight.data()[0][0] = 1.0; + network._weights[0]->weight.data()[0][1] = -1.0; + network._weights[0]->bias.data().fill_(0.0); + + auto input = torch::tensor({{2.0, 6.0}}, at::kDouble); + const Real actual = network.forward(input).item(); + + EXPECT_NEAR(actual, -100.0, 1e-12); +} + +TEST(LibtorchActorNeuralNetTest, boundedBetaLogProbability) +{ + constexpr Real min_value = -2.0; + constexpr Real max_value = 4.0; + constexpr Real alpha_target = 2.3; + constexpr Real beta_target = 3.7; + constexpr Real action_value = 1.2; + + TestableLibtorchActorNeuralNet network( + "test_beta", 1, 1, {1}, {"linear"}, {min_value}, {max_value}); + + ASSERT_EQ(network._weights.size(), 1u); + + network._weights[0]->weight.data().fill_(0.0); + network._weights[0]->bias.data().fill_(1.0); + network.betaActionDistribution().alphaModule()->weight.data().fill_( + inverseSoftplusPlusOne(alpha_target)); + network.betaActionDistribution().betaModule()->weight.data().fill_( + inverseSoftplusPlusOne(beta_target)); + + auto input = torch::zeros({1, 1}, at::kDouble); + network.evaluate(input, false); + + const Real alpha = network.betaActionDistribution().alphaTensor().item(); + const Real beta = network.betaActionDistribution().betaTensor().item(); + const Real normalized = (action_value - min_value) / (max_value - min_value); + const Real log_norm = std::lgamma(alpha) + std::lgamma(beta) - std::lgamma(alpha + beta); + const Real expected = (alpha - 1.0) * std::log(normalized) + + (beta - 1.0) * std::log1p(-normalized) - log_norm - + std::log(max_value - min_value); + + auto action = torch::tensor({{action_value}}, at::kDouble); + const Real actual = network.logProbability(action).item(); + + EXPECT_NEAR(actual, expected, 1e-12); +} + +TEST(LibtorchActorNeuralNetTest, gaussianActorUsesPhysicalActionScalingAndStateIndependentStd) +{ + constexpr Real input_shift = 1.0; + constexpr Real input_scale = 2.0; + constexpr Real action_scale = 5.0; + const Real log_std = std::log(2.0); + constexpr Real physical_action = 20.0; + constexpr Real expected_deterministic_action = 15.0; + + TestableLibtorchActorNeuralNet network("test_gaussian", + 1, + 1, + {}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {input_shift}, + {input_scale}, + {action_scale}); + + network.gaussianActionDistribution().meanModule()->weight.data().fill_(1.5); + network.gaussianActionDistribution().meanModule()->bias.data().fill_(0.0); + network.gaussianActionDistribution().stdModule()->weight.data().fill_(123.0); + network.gaussianActionDistribution().stdModule()->bias.data().fill_(log_std); + + auto input = torch::tensor({{2.0}}, at::kDouble); + const Real deterministic_action = network.evaluate(input, false).item(); + EXPECT_NEAR(deterministic_action, expected_deterministic_action, 1e-12); + EXPECT_NEAR( + network.gaussianActionDistribution().stdTensor().item(), std::exp(log_std), 1e-12); + + const Real unscaled_mean = expected_deterministic_action / action_scale; + const Real unscaled_action = physical_action / action_scale; + const Real expected_log_probability = + -std::pow(unscaled_action - unscaled_mean, 2) / (2.0 * 4.0) - log_std - + 0.5 * std::log(2.0 * libMesh::pi) - std::log(action_scale); + + auto action = torch::tensor({{physical_action}}, at::kDouble); + const Real actual_log_probability = network.logProbability(action).item(); + + EXPECT_NEAR(actual_log_probability, expected_log_probability, 1e-12); + + auto second_input = torch::tensor({{4.0}}, at::kDouble); + network.evaluate(second_input, false); + EXPECT_NEAR( + network.gaussianActionDistribution().stdTensor().item(), std::exp(log_std), 1e-12); +} + +TEST(LibtorchActorNeuralNetTest, gaussianActorCanUseStateDependentStdWhenRequested) +{ + TestableLibtorchActorNeuralNet network("test_state_dependent_gaussian", + 1, + 1, + {}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {1.0}, + {2.0}, + {1.0}, + false); + + network.gaussianActionDistribution().meanModule()->weight.data().fill_(0.0); + network.gaussianActionDistribution().meanModule()->bias.data().fill_(0.0); + network.gaussianActionDistribution().stdModule()->weight.data().fill_(0.5); + network.gaussianActionDistribution().stdModule()->bias.data().fill_(0.0); + + auto first_input = torch::tensor({{2.0}}, at::kDouble); + network.evaluate(first_input, false); + const Real first_std = network.gaussianActionDistribution().stdTensor().item(); + + auto second_input = torch::tensor({{4.0}}, at::kDouble); + network.evaluate(second_input, false); + const Real second_std = network.gaussianActionDistribution().stdTensor().item(); + + EXPECT_NEAR(first_std, std::exp(1.0), 1e-12); + EXPECT_NEAR(second_std, std::exp(3.0), 1e-12); + EXPECT_GT(second_std, first_std); +} + +TEST(LibtorchActorNeuralNetTest, explicitGeneratorKeepsGaussianSamplingStableAcrossCopies) +{ + TestableLibtorchActorNeuralNet original("test_gaussian", + 1, + 1, + {}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {0.0}, + {1.0}, + {1.0}); + + original.gaussianActionDistribution().meanModule()->weight.data().fill_(0.75); + original.gaussianActionDistribution().meanModule()->bias.data().fill_(1.25); + original.gaussianActionDistribution().stdModule()->weight.data().fill_(0.0); + original.gaussianActionDistribution().stdModule()->bias.data().fill_(std::log(0.5)); + + TestableLibtorchActorNeuralNet copied(original); + + auto original_input = torch::tensor({{2.0}}, at::kDouble); + auto copied_input = torch::tensor({{2.0}}, at::kDouble); + auto original_generator = Moose::makeLibtorchCPUGenerator(12345); + auto copied_generator = Moose::makeLibtorchCPUGenerator(12345); + + const auto original_action = original.evaluate(original_input, true, original_generator); + const auto copied_action = copied.evaluate(copied_input, true, copied_generator); + + EXPECT_TRUE(torch::allclose(original_action, copied_action, /* rtol = */ 0.0, /* atol = */ 0.0)); +} + +TEST(LibtorchActorNeuralNetTest, explicitGeneratorMakesInitializationIndependentOfConstructionOrder) +{ + TestableLibtorchActorNeuralNet first("first_actor", + 2, + 1, + {3}, + {"relu"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {0.0, 0.0}, + {1.0, 1.0}, + {1.0}); + TestableLibtorchActorNeuralNet second("second_actor", + 2, + 1, + {3}, + {"relu"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {0.0, 0.0}, + {1.0, 1.0}, + {1.0}); + + first.initializeNeuralNetwork(Moose::makeLibtorchCPUGenerator(2468)); + second.initializeNeuralNetwork(Moose::makeLibtorchCPUGenerator(2468)); + + const auto first_parameters = first.named_parameters(); + const auto second_parameters = second.named_parameters(); + ASSERT_EQ(first_parameters.size(), second_parameters.size()); + for (const auto i : index_range(first_parameters)) + { + EXPECT_EQ(first_parameters[i].key(), second_parameters[i].key()); + EXPECT_TRUE(torch::allclose(first_parameters[i].value(), + second_parameters[i].value(), + /* rtol = */ 0.0, + /* atol = */ 0.0)); + } +} + +TEST(LibtorchActorNeuralNetTest, loadActorStateAcceptsTorchSaveArchive) +{ + TestableLibtorchActorNeuralNet saved("saved_actor", + 2, + 1, + {2}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {1.0, -2.0}, + {0.5, 3.0}, + {4.0}); + + saved._weights[0]->weight.data() = torch::tensor({{1.0, 2.0}, {3.0, 4.0}}, at::kDouble); + saved._weights[0]->bias.data() = torch::tensor({5.0, 6.0}, at::kDouble); + saved.gaussianActionDistribution().meanModule()->weight.data() = + torch::tensor({{7.0, 8.0}}, at::kDouble); + saved.gaussianActionDistribution().meanModule()->bias.data() = torch::tensor({9.0}, at::kDouble); + saved.gaussianActionDistribution().stdModule()->weight.data() = + torch::tensor({{-1.5, 2.5}}, at::kDouble); + saved.gaussianActionDistribution().stdModule()->bias.data() = torch::tensor({-3.5}, at::kDouble); + + Moose::UnitUtils::TempFile archive; + torch::save(std::make_shared(saved), archive.path().string()); + + TestableLibtorchActorNeuralNet restored("restored_actor", + 2, + 1, + {2}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {1.0, -2.0}, + {0.5, 3.0}, + {4.0}); + + Moose::loadLibtorchActorNeuralNetState(restored, archive.path().string()); + + const auto saved_parameters = saved.named_parameters(); + const auto restored_parameters = restored.named_parameters(); + ASSERT_EQ(saved_parameters.size(), restored_parameters.size()); + for (std::size_t i = 0; i < saved_parameters.size(); ++i) + { + EXPECT_EQ(saved_parameters[i].key(), restored_parameters[i].key()); + EXPECT_TRUE(torch::allclose(saved_parameters[i].value(), + restored_parameters[i].value(), + /*rtol=*/0.0, + /*atol=*/0.0)); + } + + const auto saved_buffers = saved.named_buffers(); + const auto restored_buffers = restored.named_buffers(); + ASSERT_EQ(saved_buffers.size(), restored_buffers.size()); + for (std::size_t i = 0; i < saved_buffers.size(); ++i) + { + EXPECT_EQ(saved_buffers[i].key(), restored_buffers[i].key()); + EXPECT_TRUE(torch::allclose(saved_buffers[i].value(), + restored_buffers[i].value(), + /*rtol=*/0.0, + /*atol=*/0.0)); + } + + auto saved_input = torch::tensor({{3.0, -1.0}}, at::kDouble); + auto restored_input = saved_input.clone(); + EXPECT_TRUE(torch::allclose(saved.evaluate(saved_input, false), + restored.evaluate(restored_input, false), + /*rtol=*/0.0, + /*atol=*/0.0)); +} + +TEST(LibtorchActorNeuralNetTest, loadActorStateRejectsHiddenLayerMismatch) +{ + TestableLibtorchActorNeuralNet saved("saved_actor", + 2, + 1, + {2}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {1.0, -2.0}, + {0.5, 3.0}, + {4.0}); + + Moose::UnitUtils::TempFile archive; + torch::save(std::make_shared(saved), archive.path().string()); + + TestableLibtorchActorNeuralNet restored("restored_actor", + 2, + 1, + {3}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {1.0, -2.0}, + {0.5, 3.0}, + {4.0}); + + EXPECT_ANY_THROW(Moose::loadLibtorchActorNeuralNetState(restored, archive.path().string())); +} + +TEST(LibtorchActorNeuralNetTest, loadActorStateRejectsBoundednessMismatch) +{ + TestableLibtorchActorNeuralNet saved("saved_actor", + 1, + 1, + {2}, + {"linear"}, + {}, + {}, + torch::kCPU, + torch::kDouble, + true, + {1.0}, + {2.0}, + {1.0}); + + Moose::UnitUtils::TempFile archive; + torch::save(std::make_shared(saved), archive.path().string()); + + TestableLibtorchActorNeuralNet restored("restored_actor", + 1, + 1, + {2}, + {"linear"}, + {-2.0}, + {2.0}, + torch::kCPU, + torch::kDouble, + true, + {1.0}, + {2.0}, + {1.0}); + + EXPECT_ANY_THROW(Moose::loadLibtorchActorNeuralNetState(restored, archive.path().string())); +} + +#endif diff --git a/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C b/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C new file mode 100644 index 000000000000..a45a16455d33 --- /dev/null +++ b/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C @@ -0,0 +1,202 @@ +//* This file is part of the MOOSE framework +//* https://mooseframework.inl.gov +//* +//* All rights reserved, see COPYRIGHT for full restrictions +//* https://github.com/idaholab/moose/blob/master/COPYRIGHT +//* +//* Licensed under LGPL 2.1, please see LICENSE for details +//* https://www.gnu.org/licenses/lgpl-2.1.html + +#ifdef MOOSE_LIBTORCH_ENABLED + +#include "gtest/gtest.h" + +#include "LibtorchActorNeuralNet.h" +#include "LibtorchArtificialNeuralNet.h" +#include "LibtorchObservationHistoryHelper.h" +#include "LibtorchRandomUtils.h" +#include "LibtorchRLMiniBatchSampler.h" +#include "LibtorchRLPPOLoss.h" +#include "LibtorchRLTrajectoryBuffer.h" +#include "LibtorchRLValueEstimator.h" + +#include + +namespace +{ + +TEST(LibtorchRLCoreTest, ObservationHistoryStacksCurrentAndTrajectoryData) +{ + LibtorchObservationHistoryHelper history(3); + + const auto expanded_observation_factors = history.expandObservationFactors({0.5, 2.0}); + EXPECT_EQ(expanded_observation_factors, std::vector({0.5, 2.0, 0.5, 2.0, 0.5, 2.0})); + + std::vector> old_observations; + history.initializeHistory({1.0, 6.0}, old_observations); + + const auto stacked_current = history.stackCurrentObservation({3.0, 2.0}, old_observations); + EXPECT_EQ(stacked_current, std::vector({3.0, 2.0, 1.0, 6.0, 1.0, 6.0})); + + std::vector> component_trajectories = {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}; + const auto stacked_trajectory = history.stackTrajectoryObservation(component_trajectories, 2); + EXPECT_EQ(stacked_trajectory, std::vector({3.0, 6.0, 2.0, 5.0, 1.0, 4.0})); +} + +TEST(LibtorchRLCoreTest, ObservationHistoryRejectsMalformedStoredHistory) +{ + LibtorchObservationHistoryHelper history(3); + + EXPECT_THROW( + { + try + { + history.stackCurrentObservation({3.0, 2.0}, {{1.0, 6.0}}); + } + catch (const std::exception & e) + { + EXPECT_EQ(std::string(e.what()), + "Observation history must contain 2 stored entries, but got 1."); + throw; + } + }, + std::exception); +} + +TEST(LibtorchRLCoreTest, ObservationHistoryRejectsZeroTimesteps) +{ + EXPECT_THROW( + { + try + { + LibtorchObservationHistoryHelper history(0); + static_cast(history); + } + catch (const std::exception & e) + { + EXPECT_EQ(std::string(e.what()), + "Observation history requires at least one input timestep."); + throw; + } + }, + std::exception); +} + +TEST(LibtorchRLCoreTest, ValueEstimatorComputesGAETargets) +{ + Moose::LibtorchArtificialNeuralNet value_network("value", 1, 1, {}, {"linear"}); + auto value_params = value_network.named_parameters(); + value_params[0].value().data().fill_(1.0); + value_params[1].value().data().fill_(0.0); + + LibtorchRLTrajectoryBuffer::Trajectory trajectory; + trajectory.observations = {{1.0}, {2.0}}; + trajectory.next_observations = {{2.0}, {3.0}}; + trajectory.rewards = {0.5, 1.0}; + + LibtorchRLValueEstimator estimator(0.9, 0.95); + const auto targets = estimator.estimate(trajectory, value_network); + + ASSERT_EQ(targets.advantages.size(), 2u); + ASSERT_EQ(targets.value_targets.size(), 2u); + EXPECT_NEAR(targets.advantages[0], 2.7535, 1e-12); + EXPECT_NEAR(targets.advantages[1], 1.7, 1e-12); + EXPECT_NEAR(targets.value_targets[0], 3.7535, 1e-12); + EXPECT_NEAR(targets.value_targets[1], 3.7, 1e-12); +} + +TEST(LibtorchRLCoreTest, PPOLossUsesStoredLogProbabilityAndValueTarget) +{ + Moose::LibtorchActorNeuralNet policy_network("policy", 1, 1, {}, {"linear"}); + policy_network.gaussianActionDistribution().meanModule()->weight.data().fill_(0.0); + policy_network.gaussianActionDistribution().meanModule()->bias.data().fill_(0.0); + policy_network.gaussianActionDistribution().stdModule()->weight.data().fill_(0.0); + policy_network.gaussianActionDistribution().stdModule()->bias.data().fill_(0.0); + + Moose::LibtorchArtificialNeuralNet value_network("value", 1, 1, {}, {"linear"}); + auto value_params = value_network.named_parameters(); + value_params[0].value().data().fill_(0.0); + value_params[1].value().data().fill_(1.0); + + LibtorchRLMiniBatch batch; + batch.observations = torch::zeros({1, 1}, torch::TensorOptions().dtype(torch::kDouble)); + batch.actions = torch::zeros({1, 1}, torch::TensorOptions().dtype(torch::kDouble)); + batch.old_log_probabilities = + torch::tensor({{-0.5 * std::log(2.0 * libMesh::pi)}}, + torch::TensorOptions().dtype(torch::kDouble)); + batch.value_targets = torch::tensor({{1.5}}, torch::TensorOptions().dtype(torch::kDouble)); + batch.advantages = torch::tensor({{2.0}}, torch::TensorOptions().dtype(torch::kDouble)); + + LibtorchRLPPOLoss loss(0.2, 0.01); + const auto loss_values = loss.compute(policy_network, value_network, batch); + + const Real expected_entropy = 0.5 * std::log(2.0 * libMesh::pi) + 0.5; + const Real expected_actor_loss = -(2.0 + 0.01 * expected_entropy); + const Real expected_critic_loss = 0.25; + + EXPECT_NEAR(loss_values.entropy.item(), expected_entropy, 1e-12); + EXPECT_NEAR(loss_values.actor_loss.item(), expected_actor_loss, 1e-12); + EXPECT_NEAR(loss_values.critic_loss.item(), expected_critic_loss, 1e-12); +} + +TEST(LibtorchRLCoreTest, MiniBatchSamplerStandardizesAdvantagesPerBatch) +{ + LibtorchRLTrajectoryBuffer::TensorBatch batch; + batch.observations = + torch::tensor({{0.0}, {1.0}, {2.0}, {3.0}}, torch::TensorOptions().dtype(torch::kDouble)); + batch.actions = + torch::tensor({{0.1}, {0.2}, {0.3}, {0.4}}, torch::TensorOptions().dtype(torch::kDouble)); + batch.log_probabilities = + torch::tensor({{-1.0}, {-1.1}, {-1.2}, {-1.3}}, torch::TensorOptions().dtype(torch::kDouble)); + batch.value_targets = + torch::tensor({{1.0}, {2.0}, {3.0}, {4.0}}, torch::TensorOptions().dtype(torch::kDouble)); + batch.advantages = + torch::tensor({{1.0}, {2.0}, {3.0}, {4.0}}, torch::TensorOptions().dtype(torch::kDouble)); + + LibtorchRLMiniBatchSampler sampler; + const auto mini_batches = sampler.sample(batch, 2, true); + + ASSERT_EQ(mini_batches.size(), 2u); + for (const auto & mini_batch : mini_batches) + { + ASSERT_EQ(mini_batch.size(), 2); + EXPECT_NEAR(mini_batch.advantages.mean().item(), 0.0, 1e-12); + } +} + +TEST(LibtorchRLCoreTest, MiniBatchSamplerUsesExplicitGeneratorForDeterministicShuffling) +{ + LibtorchRLTrajectoryBuffer::TensorBatch batch; + batch.observations = + torch::tensor({{0.0}, {1.0}, {2.0}, {3.0}}, torch::TensorOptions().dtype(torch::kDouble)); + batch.actions = + torch::tensor({{0.1}, {0.2}, {0.3}, {0.4}}, torch::TensorOptions().dtype(torch::kDouble)); + batch.log_probabilities = + torch::tensor({{-1.0}, {-1.1}, {-1.2}, {-1.3}}, torch::TensorOptions().dtype(torch::kDouble)); + batch.value_targets = + torch::tensor({{1.0}, {2.0}, {3.0}, {4.0}}, torch::TensorOptions().dtype(torch::kDouble)); + batch.advantages = + torch::tensor({{1.0}, {2.0}, {3.0}, {4.0}}, torch::TensorOptions().dtype(torch::kDouble)); + + LibtorchRLMiniBatchSampler sampler; + const auto first_batches = sampler.sample(batch, 2, false, Moose::makeLibtorchCPUGenerator(9876)); + const auto second_batches = + sampler.sample(batch, 2, false, Moose::makeLibtorchCPUGenerator(9876)); + + ASSERT_EQ(first_batches.size(), second_batches.size()); + for (const auto i : index_range(first_batches)) + { + EXPECT_TRUE(torch::allclose(first_batches[i].observations, + second_batches[i].observations, + /* rtol = */ 0.0, + /* atol = */ 0.0)); + EXPECT_TRUE(torch::allclose(first_batches[i].actions, + second_batches[i].actions, + /* rtol = */ 0.0, + /* atol = */ 0.0)); + } +} + +} // namespace + +#endif diff --git a/test/tests/controls/libtorch_nn_control/gold/torch_parameter_read.json b/test/tests/controls/libtorch_nn_control/gold/torch_parameter_read.json index f04acdb645fe..5f2e6e84d21e 100644 --- a/test/tests/controls/libtorch_nn_control/gold/torch_parameter_read.json +++ b/test/tests/controls/libtorch_nn_control/gold/torch_parameter_read.json @@ -112,12 +112,23 @@ 0.3051220898124913, 0.2652967572212219 ], + "input_scaling_factors": [ + 1.0, + 1.0 + ], + "input_shift_factors": [ + 0.0, + 0.0 + ], "output_layer_.bias": [ 0.4794089532666399 ], "output_layer_.weight": [ 0.47736589074351915, 0.11757504436858923 + ], + "output_scaling_factors": [ + 1.0 ] } }, diff --git a/test/tests/controls/libtorch_nn_control/read_control.i b/test/tests/controls/libtorch_nn_control/read_control.i index 5c8aff04a390..ac9a711a9ac6 100644 --- a/test/tests/controls/libtorch_nn_control/read_control.i +++ b/test/tests/controls/libtorch_nn_control/read_control.i @@ -97,7 +97,7 @@ cp = 1.0 [src_control] type = LibtorchNeuralNetControl parameters = "Kernels/anti_source/value" - responses = 'T_max' + observations = 'T_max' execute_on = 'TIMESTEP_BEGIN' [] []