idaholab · grmnptr · Jan 8, 2025 · Jan 8, 2025 · Jan 14, 2025 · Jan 14, 2025
diff --git a/framework/doc/content/bib/moose.bib b/framework/doc/content/bib/moose.bib
@@ -45,13 +45,6 @@ @book{muller1995neural
   publisher={Springer Science \& Business Media}
 }
 
-@article{schulman2017proximal,
-  title={Proximal policy optimization algorithms},
-  author={Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg},
-  journal={arXiv preprint arXiv:1707.06347},
-  year={2017}
-}
-
 @article{tonks2012object,
   title={An object-oriented finite element framework for multiphysics phase field simulations},
   author={Tonks, Michael R and Gaston, Derek and Millett, Paul C and Andr\v{s}, David and Talbot, Paul},

@@ -12,6 +12,7 @@
 #pragma once
 
 #include "LibtorchArtificialNeuralNet.h"
+#include "LibtorchObservationHistoryHelper.h"
 #include "Control.h"
 
 /**
@@ -30,6 +31,9 @@ class LibtorchNeuralNetControl : public Control
   /// Construct using input parameters
   LibtorchNeuralNetControl(const InputParameters & parameters);
 
+  /// Load any file-backed controller state after full object construction
+  virtual void initialSetup() override;
+
   /// Execute neural network to determine the controllable parameter values
   virtual void execute() override;
 
@@ -48,13 +52,18 @@ class LibtorchNeuralNetControl : public Control
    * when copying the neural network from a main app which trains it.
    * @param input_nn Reference to a neural network which will be copied into this object
    */
-  void loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn);
+  virtual void loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn);
+
+  /**
+   * Load the controller neural network from the configured checkpoint file.
+   */
+  virtual void loadControlNeuralNetFromFile();
 
   /// Return a reference to the stored neural network
   const Moose::LibtorchNeuralNetBase & controlNeuralNet() const;
 
   /// Return true if the object already has a neural netwok
-  bool hasControlNeuralNet() const { return (_nn != NULL); };
+  bool hasControlNeuralNet() const { return _nn != nullptr; };
 
 protected:
   /**
@@ -68,41 +77,44 @@ class LibtorchNeuralNetControl : public Control
                                  const std::vector<std::string> & conditional_param,
                                  bool should_be_defined = true);
 
-  /// Function that updates the values of the current response
-  void updateCurrentResponse();
+  /// Refresh the current observation values from the linked postprocessors.
+  void updateCurrentObservation();
 
   /// Function that prepares the input tensor for the controller neural network
   torch::Tensor prepareInputTensor();
 
   /// The values of the current observed postprocessor values
-  std::vector<Real> _current_response;
-  /// This variable is populated if the controller needs acess to older values of the
+  std::vector<Real> _current_observation;
+  /// This variable is populated if the controller needs access to older values of the
   /// observed postprocessor values
-  std::vector<std::vector<Real>> & _old_responses;
+  std::vector<std::vector<Real>> & _old_observations;
 
   /// The names of the controllable parameters
   const std::vector<std::string> & _control_names;
-  /// The control signals from the last evaluation of the controller
-  std::vector<Real> _current_control_signals;
+  /// The control signals from the last evaluation of the controller, saved for recover/restart.
+  std::vector<Real> & _current_control_signals;
 
   /// Names of the postprocessors which contain the observations of the system
-  const std::vector<PostprocessorName> & _response_names;
+  const std::vector<PostprocessorName> & _observation_names;
 
-  /// Links to the current response postprocessor values. This is necessary so that we can check
+  /// Links to the current observation postprocessor values. This is necessary so that we can check
   /// if the postprocessors exist.
-  std::vector<const Real *> _response_values;
+  std::vector<const Real *> _observation_values;
 
   /// Number of timesteps to use as input data from the reporters (this influences how many past
-  /// results are used, e.g. the size of _old_responses)
+  /// results are used, e.g. the size of _old_observations)
   const unsigned int _input_timesteps;
 
-  /// Shifting constants for the responses
-  const std::vector<Real> _response_shift_factors;
-  /// Scaling constants (multipliers) for the responses
-  const std::vector<Real> _response_scaling_factors;
+  /// Shifting constants for the observations
+  const std::vector<Real> _observation_shift_factors;
+  /// Scaling constants (multipliers) for the observations
+  const std::vector<Real> _observation_scaling_factors;
   /// Multipliers for the actions
   const std::vector<Real> _action_scaling_factors;
 
+  /// Shared observation history stacking and factor-expansion helper
+  const LibtorchObservationHistoryHelper _observation_history;
+
   /// Pointer to the neural net object which is supposed to be used to control
   /// the parameter values. The controller owns this object, but it can be read
   /// from file or copied by a transfer.

@@ -13,6 +13,7 @@
 
 #include <torch/torch.h>
 #include <torch/script.h>
+#include <torch/serialize/archive.h>
 #include "LibtorchNeuralNetBase.h"
 #include "MooseError.h"
 #include "DataIO.h"
@@ -22,44 +23,57 @@
 namespace Moose
 {
 
-// A class that describes a simple feed-forward neural net.
+/**
+ * Simple feed-forward neural net with optional affine input and output scaling.
+ */
 class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeuralNetBase
 {
 public:
   /**
-   * Construct using input parameters
-   * @param name Name of the neural network
-   * @param num_inputs The number of input neurons/parameters
-   * @param num_neurons_per_layer Number of neurons per hidden layer
-   * @param num_outputs The number of output neurons
+   * Build a plain feed-forward neural network.
+   * @param name Name of the neural network module.
+   * @param num_inputs Number of input neurons or parameters.
+   * @param num_outputs Number of output neurons.
+   * @param num_neurons_per_layer Hidden-layer widths.
+   * @param activation_function Hidden-layer activation names.
+   * @param device_type Torch device used by the module.
+   * @param scalar_type Torch scalar type used by the module.
+   * @param build_on_construct Whether to build the torch modules right away.
+   * @param input_shift_factors Optional affine input shifts.
+   * @param input_scaling_factors Optional affine input scales.
+   * @param output_scaling_factors Optional output scaling factors.
    */
   LibtorchArtificialNeuralNet(const std::string name,
                               const unsigned int num_inputs,
                               const unsigned int num_outputs,
                               const std::vector<unsigned int> & num_neurons_per_layer,
                               const std::vector<std::string> & activation_function = {"relu"},
                               const torch::DeviceType device_type = torch::kCPU,
-                              const torch::ScalarType scalar_type = torch::kDouble);
+                              const torch::ScalarType scalar_type = torch::kDouble,
+                              const bool build_on_construct = true,
+                              const std::vector<Real> & input_shift_factors = {},
+                              const std::vector<Real> & input_scaling_factors = {},
+                              const std::vector<Real> & output_scaling_factors = {});
 
   /**
-   * Copy construct an artificial neural network
-   * @param nn The neural network which needs to be copied
+   * Copy-construct a feed-forward neural network.
+   * @param nn Neural network to copy.
+   * @param build_on_construct Whether to rebuild the module structure during the copy.
    */
-  LibtorchArtificialNeuralNet(const Moose::LibtorchArtificialNeuralNet & nn);
+  LibtorchArtificialNeuralNet(const Moose::LibtorchArtificialNeuralNet & nn,
+                              const bool build_on_construct = true);
 
   /**
-   * Add layers to the neural network
-   * @param layer_name The name of the layer to be added
-   * @param parameters A map of parameter names and the corresponding values which
-   *                   describe the neural net layer architecture
+   * Add one linear layer to the network.
+   * @param layer_name Name of the layer to add.
+   * @param parameters Small parameter map that describes the layer shape.
    */
   virtual void addLayer(const std::string & layer_name,
                         const std::unordered_map<std::string, unsigned int> & parameters);
 
   /**
-   * Overriding the forward substitution function for the neural network, unfortunately
-   * this cannot be const since it creates a graph in the background
-   * @param x Input tensor for the evaluation
+   * Run a forward pass through the network.
+   * @param x Input tensor for the evaluation.
    */
   virtual torch::Tensor forward(const torch::Tensor & x) override;
 
@@ -79,13 +93,61 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu
   torch::DeviceType deviceType() const { return _device_type; }
   /// Return the data type which is used by this neural network
   torch::ScalarType dataType() const { return _data_type; }
+  /// Return the affine input shift factors used before evaluation
+  const std::vector<Real> & inputShiftFactors() const { return _input_shift_factors; }
+  /// Return the affine input scaling factors used before evaluation
+  const std::vector<Real> & inputScalingFactors() const { return _input_scaling_factors; }
+  /// Return the output scaling factors applied after evaluation
+  const std::vector<Real> & outputScalingFactors() const { return _output_scaling_factors; }
   /// Construct the neural network
-  void constructNeuralNetwork();
+  virtual void constructNeuralNetwork();
+
+  /// Update cached affine metadata vectors from the registered libtorch buffers.
+  void synchronizeAffineFactorsFromBuffers();
+
+  /**
+   * Map an activation name to the orthogonal-initialization gain we want to use.
+   * @param activation Activation name to look up.
+   */
+  Real determineGain(const std::string & activation);
+
+  /**
+   * Initialize the trainable weights and biases.
+   * @param generator Optional torch random-number generator used for reproducible initialization.
+   */
+  virtual void initializeNeuralNetwork(c10::optional<at::Generator> generator = c10::nullopt);
 
   /// Store the network architecture in a json file (for debugging, visualization)
   void store(nlohmann::json & json) const;
 
 protected:
+  /**
+   * Set affine metadata by either accepting the user values or filling defaults.
+   * @param factors User-provided affine factors.
+   * @param expected_size Expected number of entries.
+   * @param default_value Default value used when the vector is empty.
+   * @param factor_name Name used in error messages.
+   */
+  static std::vector<Real> setAffineFactors(const std::vector<Real> & factors,
+                                            unsigned int expected_size,
+                                            Real default_value,
+                                            const std::string & factor_name);
+
+  /// Initialize the registered affine metadata buffers used by serialization.
+  void initializeAffineBuffers();
+
+  /**
+   * Apply affine preprocessing to the raw input tensor.
+   * @param x Raw input tensor.
+   */
+  virtual torch::Tensor preprocessInput(const torch::Tensor & x) const;
+
+  /**
+   * Apply the configured output scaling to a network output tensor.
+   * @param y Raw network output tensor.
+   */
+  virtual torch::Tensor scaleOutput(const torch::Tensor & y) const;
+
   /// Name of the neural network
   const std::string _name;
   /// Submodules that hold linear operations and the corresponding
@@ -104,10 +166,24 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu
   const torch::DeviceType _device_type;
   /// The data type used in this neural network
   const torch::ScalarType _data_type;
+  /// Affine preprocessing applied to the flattened input
+  std::vector<Real> _input_shift_factors;
+  /// Multiplicative affine preprocessing applied after shifting the input
+  std::vector<Real> _input_scaling_factors;
+  /// Multiplicative scaling applied after the network output is formed
+  std::vector<Real> _output_scaling_factors;
+  /// Registered libtorch buffer holding the affine input shifts
+  torch::Tensor _input_shift_tensor;
+  /// Registered libtorch buffer holding the affine input scaling factors
+  torch::Tensor _input_scale_tensor;
+  /// Registered libtorch buffer holding the output scaling factors
+  torch::Tensor _output_scale_tensor;
 };
 
 void to_json(nlohmann::json & json, const Moose::LibtorchArtificialNeuralNet * const & network);
 
+void loadLibtorchArtificialNeuralNetState(Moose::LibtorchArtificialNeuralNet & nn,
+                                          const std::string & filename);
 }
 
 template <>

@@ -0,0 +1,85 @@
+//* This file is part of the MOOSE framework
+//* https://mooseframework.inl.gov
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#ifdef MOOSE_LIBTORCH_ENABLED
+
+#pragma once
+
+#include "MooseTypes.h"
+
+#include <vector>
+
+/**
+ * Shared observation history stacking and factor-expansion logic for libtorch-based controls and
+ * trainers.
+ */
+class LibtorchObservationHistoryHelper
+{
+public:
+  /**
+   * Build an observation-history helper for libtorch inputs.
+   * @param input_timesteps Number of timesteps to stack into each flattened input.
+   */
+  LibtorchObservationHistoryHelper(unsigned int input_timesteps);
+
+  /// Return the number of timesteps stacked into each flattened input.
+  unsigned int inputTimesteps() const { return _input_timesteps; }
+
+  /**
+   * Fill the history buffer with copies of the current observation.
+   * @param observation Current observation.
+   * @param old_observations History buffer that stores previous observations.
+   */
+  void initializeHistory(const std::vector<Real> & observation,
+                         std::vector<std::vector<Real>> & old_observations) const;
+
+  /**
+   * Advance the history buffer by inserting the latest observation.
+   * @param observation Current observation.
+   * @param old_observations History buffer ordered from newest to oldest.
+   */
+  void advanceHistory(const std::vector<Real> & observation,
+                      std::vector<std::vector<Real>> & old_observations) const;
+
+  /**
+   * Repeat per-observation-entry factors across all stacked timesteps.
+   * @param observation_factors Per-entry factors for one observation vector.
+   */
+  std::vector<Real> expandObservationFactors(const std::vector<Real> & observation_factors) const;
+
+  /**
+   * Flatten the current observation together with its stored history.
+   * @param observation Current observation.
+   * @param old_observations History buffer ordered from newest to oldest.
+   */
+  std::vector<Real>
+  stackCurrentObservation(const std::vector<Real> & observation,
+                          const std::vector<std::vector<Real>> & old_observations) const;
+
+  /**
+   * Flatten one time slice of observation-component trajectories with causal history.
+   * This uses [component][time] because the trainer receives reporter data one observation
+   * component at a time, so keeping that layout avoids building an extra transposed
+   * [time][component] container before stacking.
+   * @param component_trajectories Observation trajectories indexed as [component][time].
+   * @param time_index Time index to stack.
+   */
+  std::vector<Real>
+  stackTrajectoryObservation(const std::vector<std::vector<Real>> & component_trajectories,
+                             unsigned int time_index) const;
+
+private:
+  /// Check that all observation-component trajectories have a consistent shape.
+  void validateTrajectoryShape(const std::vector<std::vector<Real>> & component_trajectories) const;
+
+  /// Number of timesteps stacked into each flattened observation.
+  const unsigned int _input_timesteps;
+};
+
+#endif