diff --git a/framework/doc/content/bib/moose.bib b/framework/doc/content/bib/moose.bib
index 003a179ae1da..25a3b2529b42 100644
--- a/framework/doc/content/bib/moose.bib
+++ b/framework/doc/content/bib/moose.bib
@@ -45,13 +45,6 @@ @book{muller1995neural
   publisher={Springer Science \& Business Media}
 }
 
-@article{schulman2017proximal,
-  title={Proximal policy optimization algorithms},
-  author={Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg},
-  journal={arXiv preprint arXiv:1707.06347},
-  year={2017}
-}
-
 @article{tonks2012object,
   title={An object-oriented finite element framework for multiphysics phase field simulations},
   author={Tonks, Michael R and Gaston, Derek and Millett, Paul C and Andr\v{s}, David and Talbot, Paul},
diff --git a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h
index b6229008cce3..db05d7d8d254 100644
--- a/framework/include/libtorch/controls/LibtorchNeuralNetControl.h
+++ b/framework/include/libtorch/controls/LibtorchNeuralNetControl.h
@@ -12,6 +12,7 @@
 #pragma once
 
 #include "LibtorchArtificialNeuralNet.h"
+#include "LibtorchObservationHistoryHelper.h"
 #include "Control.h"
 
 /**
@@ -30,6 +31,9 @@ class LibtorchNeuralNetControl : public Control
   /// Construct using input parameters
   LibtorchNeuralNetControl(const InputParameters & parameters);
 
+  /// Load any file-backed controller state after full object construction
+  virtual void initialSetup() override;
+
   /// Execute neural network to determine the controllable parameter values
   virtual void execute() override;
 
@@ -48,13 +52,18 @@ class LibtorchNeuralNetControl : public Control
    * when copying the neural network from a main app which trains it.
    * @param input_nn Reference to a neural network which will be copied into this object
    */
-  void loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn);
+  virtual void loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn);
+
+  /**
+   * Load the controller neural network from the configured checkpoint file.
+   */
+  virtual void loadControlNeuralNetFromFile();
 
   /// Return a reference to the stored neural network
   const Moose::LibtorchNeuralNetBase & controlNeuralNet() const;
 
   /// Return true if the object already has a neural netwok
-  bool hasControlNeuralNet() const { return (_nn != NULL); };
+  bool hasControlNeuralNet() const { return _nn != nullptr; };
 
 protected:
   /**
@@ -68,41 +77,44 @@ class LibtorchNeuralNetControl : public Control
                                  const std::vector<std::string> & conditional_param,
                                  bool should_be_defined = true);
 
-  /// Function that updates the values of the current response
-  void updateCurrentResponse();
+  /// Refresh the current observation values from the linked postprocessors.
+  void updateCurrentObservation();
 
   /// Function that prepares the input tensor for the controller neural network
   torch::Tensor prepareInputTensor();
 
   /// The values of the current observed postprocessor values
-  std::vector<Real> _current_response;
-  /// This variable is populated if the controller needs acess to older values of the
+  std::vector<Real> _current_observation;
+  /// This variable is populated if the controller needs access to older values of the
   /// observed postprocessor values
-  std::vector<std::vector<Real>> & _old_responses;
+  std::vector<std::vector<Real>> & _old_observations;
 
   /// The names of the controllable parameters
   const std::vector<std::string> & _control_names;
-  /// The control signals from the last evaluation of the controller
-  std::vector<Real> _current_control_signals;
+  /// The control signals from the last evaluation of the controller, saved for recover/restart.
+  std::vector<Real> & _current_control_signals;
 
   /// Names of the postprocessors which contain the observations of the system
-  const std::vector<PostprocessorName> & _response_names;
+  const std::vector<PostprocessorName> & _observation_names;
 
-  /// Links to the current response postprocessor values. This is necessary so that we can check
+  /// Links to the current observation postprocessor values. This is necessary so that we can check
   /// if the postprocessors exist.
-  std::vector<const Real *> _response_values;
+  std::vector<const Real *> _observation_values;
 
   /// Number of timesteps to use as input data from the reporters (this influences how many past
-  /// results are used, e.g. the size of _old_responses)
+  /// results are used, e.g. the size of _old_observations)
   const unsigned int _input_timesteps;
 
-  /// Shifting constants for the responses
-  const std::vector<Real> _response_shift_factors;
-  /// Scaling constants (multipliers) for the responses
-  const std::vector<Real> _response_scaling_factors;
+  /// Shifting constants for the observations
+  const std::vector<Real> _observation_shift_factors;
+  /// Scaling constants (multipliers) for the observations
+  const std::vector<Real> _observation_scaling_factors;
   /// Multipliers for the actions
   const std::vector<Real> _action_scaling_factors;
 
+  /// Shared observation history stacking and factor-expansion helper
+  const LibtorchObservationHistoryHelper _observation_history;
+
   /// Pointer to the neural net object which is supposed to be used to control
   /// the parameter values. The controller owns this object, but it can be read
   /// from file or copied by a transfer.
diff --git a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h
index c995d61ed7d1..aa2a40eb25d4 100644
--- a/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h
+++ b/framework/include/libtorch/utils/LibtorchArtificialNeuralNet.h
@@ -13,6 +13,7 @@
 
 #include <torch/torch.h>
 #include <torch/script.h>
+#include <torch/serialize/archive.h>
 #include "LibtorchNeuralNetBase.h"
 #include "MooseError.h"
 #include "DataIO.h"
@@ -22,16 +23,25 @@
 namespace Moose
 {
 
-// A class that describes a simple feed-forward neural net.
+/**
+ * Simple feed-forward neural net with optional affine input and output scaling.
+ */
 class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeuralNetBase
 {
 public:
   /**
-   * Construct using input parameters
-   * @param name Name of the neural network
-   * @param num_inputs The number of input neurons/parameters
-   * @param num_neurons_per_layer Number of neurons per hidden layer
-   * @param num_outputs The number of output neurons
+   * Build a plain feed-forward neural network.
+   * @param name Name of the neural network module.
+   * @param num_inputs Number of input neurons or parameters.
+   * @param num_outputs Number of output neurons.
+   * @param num_neurons_per_layer Hidden-layer widths.
+   * @param activation_function Hidden-layer activation names.
+   * @param device_type Torch device used by the module.
+   * @param scalar_type Torch scalar type used by the module.
+   * @param build_on_construct Whether to build the torch modules right away.
+   * @param input_shift_factors Optional affine input shifts.
+   * @param input_scaling_factors Optional affine input scales.
+   * @param output_scaling_factors Optional output scaling factors.
    */
   LibtorchArtificialNeuralNet(const std::string name,
                               const unsigned int num_inputs,
@@ -39,27 +49,31 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu
                               const std::vector<unsigned int> & num_neurons_per_layer,
                               const std::vector<std::string> & activation_function = {"relu"},
                               const torch::DeviceType device_type = torch::kCPU,
-                              const torch::ScalarType scalar_type = torch::kDouble);
+                              const torch::ScalarType scalar_type = torch::kDouble,
+                              const bool build_on_construct = true,
+                              const std::vector<Real> & input_shift_factors = {},
+                              const std::vector<Real> & input_scaling_factors = {},
+                              const std::vector<Real> & output_scaling_factors = {});
 
   /**
-   * Copy construct an artificial neural network
-   * @param nn The neural network which needs to be copied
+   * Copy-construct a feed-forward neural network.
+   * @param nn Neural network to copy.
+   * @param build_on_construct Whether to rebuild the module structure during the copy.
    */
-  LibtorchArtificialNeuralNet(const Moose::LibtorchArtificialNeuralNet & nn);
+  LibtorchArtificialNeuralNet(const Moose::LibtorchArtificialNeuralNet & nn,
+                              const bool build_on_construct = true);
 
   /**
-   * Add layers to the neural network
-   * @param layer_name The name of the layer to be added
-   * @param parameters A map of parameter names and the corresponding values which
-   *                   describe the neural net layer architecture
+   * Add one linear layer to the network.
+   * @param layer_name Name of the layer to add.
+   * @param parameters Small parameter map that describes the layer shape.
    */
   virtual void addLayer(const std::string & layer_name,
                         const std::unordered_map<std::string, unsigned int> & parameters);
 
   /**
-   * Overriding the forward substitution function for the neural network, unfortunately
-   * this cannot be const since it creates a graph in the background
-   * @param x Input tensor for the evaluation
+   * Run a forward pass through the network.
+   * @param x Input tensor for the evaluation.
    */
   virtual torch::Tensor forward(const torch::Tensor & x) override;
 
@@ -79,13 +93,61 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu
   torch::DeviceType deviceType() const { return _device_type; }
   /// Return the data type which is used by this neural network
   torch::ScalarType dataType() const { return _data_type; }
+  /// Return the affine input shift factors used before evaluation
+  const std::vector<Real> & inputShiftFactors() const { return _input_shift_factors; }
+  /// Return the affine input scaling factors used before evaluation
+  const std::vector<Real> & inputScalingFactors() const { return _input_scaling_factors; }
+  /// Return the output scaling factors applied after evaluation
+  const std::vector<Real> & outputScalingFactors() const { return _output_scaling_factors; }
   /// Construct the neural network
-  void constructNeuralNetwork();
+  virtual void constructNeuralNetwork();
+
+  /// Update cached affine metadata vectors from the registered libtorch buffers.
+  void synchronizeAffineFactorsFromBuffers();
+
+  /**
+   * Map an activation name to the orthogonal-initialization gain we want to use.
+   * @param activation Activation name to look up.
+   */
+  Real determineGain(const std::string & activation);
+
+  /**
+   * Initialize the trainable weights and biases.
+   * @param generator Optional torch random-number generator used for reproducible initialization.
+   */
+  virtual void initializeNeuralNetwork(c10::optional<at::Generator> generator = c10::nullopt);
 
   /// Store the network architecture in a json file (for debugging, visualization)
   void store(nlohmann::json & json) const;
 
 protected:
+  /**
+   * Set affine metadata by either accepting the user values or filling defaults.
+   * @param factors User-provided affine factors.
+   * @param expected_size Expected number of entries.
+   * @param default_value Default value used when the vector is empty.
+   * @param factor_name Name used in error messages.
+   */
+  static std::vector<Real> setAffineFactors(const std::vector<Real> & factors,
+                                            unsigned int expected_size,
+                                            Real default_value,
+                                            const std::string & factor_name);
+
+  /// Initialize the registered affine metadata buffers used by serialization.
+  void initializeAffineBuffers();
+
+  /**
+   * Apply affine preprocessing to the raw input tensor.
+   * @param x Raw input tensor.
+   */
+  virtual torch::Tensor preprocessInput(const torch::Tensor & x) const;
+
+  /**
+   * Apply the configured output scaling to a network output tensor.
+   * @param y Raw network output tensor.
+   */
+  virtual torch::Tensor scaleOutput(const torch::Tensor & y) const;
+
   /// Name of the neural network
   const std::string _name;
   /// Submodules that hold linear operations and the corresponding
@@ -104,10 +166,24 @@ class LibtorchArtificialNeuralNet : public torch::nn::Module, public LibtorchNeu
   const torch::DeviceType _device_type;
   /// The data type used in this neural network
   const torch::ScalarType _data_type;
+  /// Affine preprocessing applied to the flattened input
+  std::vector<Real> _input_shift_factors;
+  /// Multiplicative affine preprocessing applied after shifting the input
+  std::vector<Real> _input_scaling_factors;
+  /// Multiplicative scaling applied after the network output is formed
+  std::vector<Real> _output_scaling_factors;
+  /// Registered libtorch buffer holding the affine input shifts
+  torch::Tensor _input_shift_tensor;
+  /// Registered libtorch buffer holding the affine input scaling factors
+  torch::Tensor _input_scale_tensor;
+  /// Registered libtorch buffer holding the output scaling factors
+  torch::Tensor _output_scale_tensor;
 };
 
 void to_json(nlohmann::json & json, const Moose::LibtorchArtificialNeuralNet * const & network);
 
+void loadLibtorchArtificialNeuralNetState(Moose::LibtorchArtificialNeuralNet & nn,
+                                          const std::string & filename);
 }
 
 template <>
diff --git a/framework/include/libtorch/utils/LibtorchObservationHistoryHelper.h b/framework/include/libtorch/utils/LibtorchObservationHistoryHelper.h
new file mode 100644
index 000000000000..97a19521a2db
--- /dev/null
+++ b/framework/include/libtorch/utils/LibtorchObservationHistoryHelper.h
@@ -0,0 +1,85 @@
+//* This file is part of the MOOSE framework
+//* https://mooseframework.inl.gov
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#ifdef MOOSE_LIBTORCH_ENABLED
+
+#pragma once
+
+#include "MooseTypes.h"
+
+#include <vector>
+
+/**
+ * Shared observation history stacking and factor-expansion logic for libtorch-based controls and
+ * trainers.
+ */
+class LibtorchObservationHistoryHelper
+{
+public:
+  /**
+   * Build an observation-history helper for libtorch inputs.
+   * @param input_timesteps Number of timesteps to stack into each flattened input.
+   */
+  LibtorchObservationHistoryHelper(unsigned int input_timesteps);
+
+  /// Return the number of timesteps stacked into each flattened input.
+  unsigned int inputTimesteps() const { return _input_timesteps; }
+
+  /**
+   * Fill the history buffer with copies of the current observation.
+   * @param observation Current observation.
+   * @param old_observations History buffer that stores previous observations.
+   */
+  void initializeHistory(const std::vector<Real> & observation,
+                         std::vector<std::vector<Real>> & old_observations) const;
+
+  /**
+   * Advance the history buffer by inserting the latest observation.
+   * @param observation Current observation.
+   * @param old_observations History buffer ordered from newest to oldest.
+   */
+  void advanceHistory(const std::vector<Real> & observation,
+                      std::vector<std::vector<Real>> & old_observations) const;
+
+  /**
+   * Repeat per-observation-entry factors across all stacked timesteps.
+   * @param observation_factors Per-entry factors for one observation vector.
+   */
+  std::vector<Real> expandObservationFactors(const std::vector<Real> & observation_factors) const;
+
+  /**
+   * Flatten the current observation together with its stored history.
+   * @param observation Current observation.
+   * @param old_observations History buffer ordered from newest to oldest.
+   */
+  std::vector<Real>
+  stackCurrentObservation(const std::vector<Real> & observation,
+                          const std::vector<std::vector<Real>> & old_observations) const;
+
+  /**
+   * Flatten one time slice of observation-component trajectories with causal history.
+   * This uses [component][time] because the trainer receives reporter data one observation
+   * component at a time, so keeping that layout avoids building an extra transposed
+   * [time][component] container before stacking.
+   * @param component_trajectories Observation trajectories indexed as [component][time].
+   * @param time_index Time index to stack.
+   */
+  std::vector<Real>
+  stackTrajectoryObservation(const std::vector<std::vector<Real>> & component_trajectories,
+                             unsigned int time_index) const;
+
+private:
+  /// Check that all observation-component trajectories have a consistent shape.
+  void validateTrajectoryShape(const std::vector<std::vector<Real>> & component_trajectories) const;
+
+  /// Number of timesteps stacked into each flattened observation.
+  const unsigned int _input_timesteps;
+};
+
+#endif
diff --git a/framework/include/libtorch/utils/LibtorchRandomUtils.h b/framework/include/libtorch/utils/LibtorchRandomUtils.h
new file mode 100644
index 000000000000..9ac469c57ced
--- /dev/null
+++ b/framework/include/libtorch/utils/LibtorchRandomUtils.h
@@ -0,0 +1,43 @@
+//* This file is part of the MOOSE framework
+//* https://mooseframework.inl.gov
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#ifdef MOOSE_LIBTORCH_ENABLED
+
+#pragma once
+
+#include <torch/torch.h>
+
+#include "MooseTypes.h"
+
+namespace Moose
+{
+
+/// Create an owned CPU generator using libtorch's default seed behavior.
+at::Generator makeLibtorchCPUGenerator();
+
+/**
+ * Create an owned CPU generator with an explicit seed.
+ * @param seed Seed value passed to the libtorch CPU generator.
+ */
+at::Generator makeLibtorchCPUGenerator(uint64_t seed);
+
+/**
+ * Fill a tensor with a (semi) orthogonal matrix using the provided generator.
+ * This mirrors torch::nn::init::orthogonal_, but avoids the ambient default RNG.
+ * @param tensor Tensor to initialize in place.
+ * @param gain Scaling factor applied after the orthogonal initialization.
+ * @param generator Optional torch random-number generator used to sample the initialization.
+ */
+void orthogonalInitializeTensor(torch::Tensor & tensor,
+                                Real gain = 1.0,
+                                c10::optional<at::Generator> generator = c10::nullopt);
+
+} // namespace Moose
+
+#endif
diff --git a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C
index 5502ce168647..9f3c8a39ce2f 100644
--- a/framework/src/libtorch/controls/LibtorchNeuralNetControl.C
+++ b/framework/src/libtorch/controls/LibtorchNeuralNetControl.C
@@ -23,59 +23,61 @@ LibtorchNeuralNetControl::validParams()
   InputParameters params = Control::validParams();
   params.addClassDescription("Controls the value of multiple controllable input parameters using a "
                              "Libtorch-based neural network.");
-  params.addRequiredParam<std::vector<std::string>>("parameters",
-                                                    "The input parameter(s) to control.");
+  params.addRequiredParam<std::vector<std::string>>(
+      "parameters", "Controllable input parameters driven by the network.");
   params.addRequiredParam<std::vector<PostprocessorName>>(
-      "responses", "The responses (prostprocessors) which are used for the control.");
+      "observations", "Postprocessors used as the current observation vector.");
   params.addParam<std::vector<Real>>(
-      "response_shift_factors",
-      "Constants which will be used to shift the response values. This is used for the "
-      "manipulation of the neural net inputs for better training efficiency.");
+      "observation_shift_factors",
+      {},
+      "Optional offsets applied to the observation values before scaling.");
   params.addParam<std::vector<Real>>(
-      "response_scaling_factors",
-      "Constants which will be used to multiply the shifted response values. This is used for "
-      "the manipulation of the neural net inputs for better training efficiency.");
-  params.addParam<std::string>("filename",
-                               "Define if the neural net is supposed to be loaded from a file.");
+      "observation_scaling_factors",
+      {},
+      "Optional multipliers applied after shifting the observation values.");
+  params.addParam<std::string>("filename", "Checkpoint file to load for the controller network.");
   params.addParam<bool>("torch_script_format",
                         false,
-                        "If we want to load the neural net using the torch-script format.");
-  params.addParam<unsigned int>(
+                        "Whether the checkpoint should be read as a scripted Torch module.");
+  params.addRangeCheckedParam<unsigned int>(
       "input_timesteps",
       1,
-      "Number of time steps to use in the input data, if larger than 1, "
-      "data from the previous timesteps will be used as well as inputs in the training.");
-  params.addParam<std::vector<unsigned int>>("num_neurons_per_layer",
-                                             "The number of neurons on each hidden layer.");
+      "1<=input_timesteps",
+      "Number of recent timesteps to stack into each network input.");
+  params.addParam<std::vector<unsigned int>>(
+      "num_neurons_per_layer", "Hidden-layer widths used when constructing the controller.");
   params.addParam<std::vector<std::string>>(
       "activation_function",
       std::vector<std::string>({"relu"}),
-      "The type of activation functions to use. It is either one value "
-      "or one value per hidden layer.");
+      "Activation name for each hidden layer, or one shared value for all layers.");
 
   params.addParam<std::vector<Real>>(
       "action_scaling_factors",
-      "Scale factor that multiplies the NN output to obtain a physically meaningful value.");
+      {},
+      "Per-action scaling embedded in the controller outputs so saved checkpoints stay in "
+      "physical units.");
 
   return params;
 }
 
 LibtorchNeuralNetControl::LibtorchNeuralNetControl(const InputParameters & parameters)
   : Control(parameters),
-    _old_responses(declareRestartableData<std::vector<std::vector<Real>>>("old_responses")),
+    _old_observations(declareRestartableData<std::vector<std::vector<Real>>>("old_observations")),
     _control_names(getParam<std::vector<std::string>>("parameters")),
-    _current_control_signals(std::vector<Real>(_control_names.size(), 0.0)),
-    _response_names(getParam<std::vector<PostprocessorName>>("responses")),
+    _current_control_signals(declareRestartableData<std::vector<Real>>(
+        "current_control_signals", std::vector<Real>(_control_names.size(), 0.0))),
+    _observation_names(getParam<std::vector<PostprocessorName>>("observations")),
     _input_timesteps(getParam<unsigned int>("input_timesteps")),
-    _response_shift_factors(isParamValid("response_shift_factors")
-                                ? getParam<std::vector<Real>>("response_shift_factors")
-                                : std::vector<Real>(_response_names.size(), 0.0)),
-    _response_scaling_factors(isParamValid("response_scaling_factors")
-                                  ? getParam<std::vector<Real>>("response_scaling_factors")
-                                  : std::vector<Real>(_response_names.size(), 1.0)),
-    _action_scaling_factors(isParamValid("action_scaling_factors")
+    _observation_shift_factors(isParamSetByUser("observation_shift_factors")
+                                   ? getParam<std::vector<Real>>("observation_shift_factors")
+                                   : std::vector<Real>(_observation_names.size(), 0.0)),
+    _observation_scaling_factors(isParamSetByUser("observation_scaling_factors")
+                                     ? getParam<std::vector<Real>>("observation_scaling_factors")
+                                     : std::vector<Real>(_observation_names.size(), 1.0)),
+    _action_scaling_factors(isParamSetByUser("action_scaling_factors")
                                 ? getParam<std::vector<Real>>("action_scaling_factors")
-                                : std::vector<Real>(_control_names.size(), 1.0))
+                                : std::vector<Real>(_control_names.size(), 1.0)),
+    _observation_history(_input_timesteps)
 {
   // We first check if the input parameters make sense and throw errors if different parameter
   // combinations are not allowed
@@ -83,14 +85,14 @@ LibtorchNeuralNetControl::LibtorchNeuralNetControl(const InputParameters & param
                             {"num_neurons_per_layer", "activation_function"},
                             !getParam<bool>("torch_script_format"));
 
-  if (_response_names.size() != _response_shift_factors.size())
-    paramError("response_shift_factors",
-               "The number of shift factors is not the same as the number of responses!");
+  if (_observation_names.size() != _observation_shift_factors.size())
+    paramError("observation_shift_factors",
+               "The number of shift factors is not the same as the number of observations!");
 
-  if (_response_names.size() != _response_scaling_factors.size())
-    paramError(
-        "response_scaling_factors",
-        "The number of normalization coefficients is not the same as the number of responses!");
+  if (_observation_names.size() != _observation_scaling_factors.size())
+    paramError("observation_scaling_factors",
+               "The number of normalization coefficients is not the same as the number of "
+               "observations!");
 
   if (_control_names.size() != _action_scaling_factors.size())
     paramError("action_scaling_factors",
@@ -99,45 +101,68 @@ LibtorchNeuralNetControl::LibtorchNeuralNetControl(const InputParameters & param
 
   // We link to the postprocessor values so that we can fetch them any time. This also raises
   // errors if we don't have the postprocessors requested in the input.
-  for (unsigned int resp_i = 0; resp_i < _response_names.size(); ++resp_i)
-    _response_values.push_back(&getPostprocessorValueByName(_response_names[resp_i]));
+  for (unsigned int obs_i = 0; obs_i < _observation_names.size(); ++obs_i)
+    _observation_values.push_back(&getPostprocessorValueByName(_observation_names[obs_i]));
+}
+
+void
+LibtorchNeuralNetControl::initialSetup()
+{
+  // File-backed controllers are loaded after full construction so derived controls can override
+  // the loader without constructor-time type checks.
+  if (isParamSetByUser("filename"))
+    loadControlNeuralNetFromFile();
+}
 
-  // If the user wants to read the neural net from file, we do it. We can read it from a
-  // torchscript file, or we can create a shell and read back the parameters
-  if (isParamValid("filename"))
+void
+LibtorchNeuralNetControl::loadControlNeuralNetFromFile()
+{
+  const auto & filename = getParam<std::string>("filename");
+  if (getParam<bool>("torch_script_format"))
+    _nn = std::make_shared<Moose::TorchScriptModule>(filename);
+  else
   {
-    std::string filename = getParam<std::string>("filename");
-    if (getParam<bool>("torch_script_format"))
-      _nn = std::make_shared<Moose::TorchScriptModule>(filename);
-    else
+    unsigned int num_inputs = _observation_names.size() * _input_timesteps;
+    unsigned int num_outputs = _control_names.size();
+    std::vector<unsigned int> num_neurons_per_layer =
+        getParam<std::vector<unsigned int>>("num_neurons_per_layer");
+    std::vector<std::string> activation_functions =
+        isParamSetByUser("activation_function")
+            ? getParam<std::vector<std::string>>("activation_function")
+            : std::vector<std::string>({"relu"});
+    const auto input_shift_factors =
+        _observation_history.expandObservationFactors(_observation_shift_factors);
+    const auto input_scaling_factors =
+        _observation_history.expandObservationFactors(_observation_scaling_factors);
+    auto nn = std::make_shared<Moose::LibtorchArtificialNeuralNet>(filename,
+                                                                   num_inputs,
+                                                                   num_outputs,
+                                                                   num_neurons_per_layer,
+                                                                   activation_functions,
+                                                                   torch::kCPU,
+                                                                   torch::kDouble,
+                                                                   true,
+                                                                   input_shift_factors,
+                                                                   input_scaling_factors,
+                                                                   _action_scaling_factors);
+
+    try
     {
-      unsigned int num_inputs = _response_names.size() * _input_timesteps;
-      unsigned int num_outputs = _control_names.size();
-      std::vector<unsigned int> num_neurons_per_layer =
-          getParam<std::vector<unsigned int>>("num_neurons_per_layer");
-      std::vector<std::string> activation_functions =
-          parameters.isParamSetByUser("activation_function")
-              ? getParam<std::vector<std::string>>("activation_function")
-              : std::vector<std::string>({"relu"});
-      auto nn = std::make_shared<Moose::LibtorchArtificialNeuralNet>(
-          filename, num_inputs, num_outputs, num_neurons_per_layer, activation_functions);
-
-      try
-      {
-        torch::load(nn, filename);
-        _nn = std::make_shared<Moose::LibtorchArtificialNeuralNet>(*nn);
-      }
-      catch (const c10::Error & e)
-      {
-        mooseError(
-            "The requested pytorch parameter file could not be loaded. This can either be the"
-            "result of the file not existing or a misalignment in the generated container and"
-            "the data in the file. Make sure the dimensions of the generated neural net are the"
-            "same as the dimensions of the parameters in the input file!\n",
-            e.msg());
-      }
+      Moose::loadLibtorchArtificialNeuralNetState(*nn, filename);
+      _nn = std::make_shared<Moose::LibtorchArtificialNeuralNet>(*nn);
+    }
+    catch (const c10::Error & e)
+    {
+      mooseError(
+          "The requested pytorch parameter file could not be loaded. This can either be the"
+          "result of the file not existing or a misalignment in the generated container and"
+          "the data in the file. Make sure the dimensions of the generated neural net are the"
+          "same as the dimensions of the parameters in the input file!\n",
+          e.msg());
     }
   }
+
+  execute();
 }
 
 void
@@ -146,14 +171,13 @@ LibtorchNeuralNetControl::execute()
   if (_nn)
   {
     const unsigned int n_controls = _control_names.size();
-    const unsigned int num_old_timesteps = _input_timesteps - 1;
 
-    // Fetch current reporter values and populate _current_response
-    updateCurrentResponse();
+    // Fetch current observation values from the linked postprocessors.
+    updateCurrentObservation();
 
     // If this is the first timestep, we fill up the old values with the initial value
-    if (_old_responses.empty())
-      _old_responses.assign(num_old_timesteps, _current_response);
+    if (_old_observations.empty())
+      _observation_history.initializeHistory(_current_observation, _old_observations);
 
     // Organize the old an current solution into a tensor so we can evaluate the neural net
     torch::Tensor input_tensor = prepareInputTensor();
@@ -164,16 +188,14 @@ LibtorchNeuralNetControl::execute()
     _current_control_signals = {action.data_ptr<Real>(), action.data_ptr<Real>() + action.size(1)};
     for (unsigned int control_i = 0; control_i < n_controls; ++control_i)
     {
-      // We scale the controllable value for physically meaningful control action
       setControllableValueByName<Real>(_control_names[control_i],
-                                       _current_control_signals[control_i] *
-                                           _action_scaling_factors[control_i]);
+                                       _current_control_signals[control_i]);
     }
 
     // We add the curent solution to the old solutions and move everything in there one step
     // backward
-    std::rotate(_old_responses.rbegin(), _old_responses.rbegin() + 1, _old_responses.rend());
-    _old_responses[0] = _current_response;
+    if (_old_observations.size())
+      _observation_history.advanceHistory(_current_observation, _old_observations);
   }
 }
 
@@ -204,14 +226,11 @@ LibtorchNeuralNetControl::conditionalParameterError(
 }
 
 void
-LibtorchNeuralNetControl::updateCurrentResponse()
+LibtorchNeuralNetControl::updateCurrentObservation()
 {
-  // Gather the current response values from the reporters
-  _current_response.clear();
-
-  for (const auto & resp_i : index_range(_response_names))
-    _current_response.push_back((*_response_values[resp_i] - _response_shift_factors[resp_i]) *
-                                _response_scaling_factors[resp_i]);
+  _current_observation.resize(_observation_names.size());
+  for (const auto obs_i : index_range(_observation_names))
+    _current_observation[obs_i] = *_observation_values[obs_i];
 }
 
 void
@@ -223,14 +242,8 @@ LibtorchNeuralNetControl::loadControlNeuralNet(const Moose::LibtorchArtificialNe
 torch::Tensor
 LibtorchNeuralNetControl::prepareInputTensor()
 {
-  const unsigned int num_old_timesteps = _input_timesteps - 1;
-
-  // We convert the standard vectors to libtorch tensors
-  std::vector<Real> raw_input(_current_response);
-
-  for (const auto & step_i : make_range(num_old_timesteps))
-    raw_input.insert(raw_input.end(), _old_responses[step_i].begin(), _old_responses[step_i].end());
-
+  auto raw_input =
+      _observation_history.stackCurrentObservation(_current_observation, _old_observations);
   torch::Tensor input_tensor;
   LibtorchUtils::vectorToTensor(raw_input, input_tensor);
 
diff --git a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C
index 58fb40e33cf6..99175551d987 100644
--- a/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C
+++ b/framework/src/libtorch/utils/LibtorchArtificialNeuralNet.C
@@ -10,7 +10,187 @@
 #ifdef MOOSE_LIBTORCH_ENABLED
 
 #include "LibtorchArtificialNeuralNet.h"
+#include "LibtorchRandomUtils.h"
 #include "MooseError.h"
+#include "LibtorchUtils.h"
+
+#include <cmath>
+
+namespace
+{
+
+/**
+ * Try to read one tensor from a plain libtorch archive.
+ * @param archive Archive being read.
+ * @param key Serialized tensor name.
+ * @param tensor Tensor that receives the loaded data.
+ * @return True when the tensor was found and loaded.
+ */
+bool
+readArchiveTensor(torch::serialize::InputArchive & archive,
+                  const std::string & key,
+                  torch::Tensor & tensor)
+{
+  try
+  {
+    archive.read(key, tensor);
+    return true;
+  }
+  catch (const c10::Error &)
+  {
+    return false;
+  }
+}
+
+/**
+ * Copy a stored tensor into an existing parameter or buffer.
+ * @param destination Tensor owned by the current module.
+ * @param source Tensor read from disk.
+ */
+void
+copyTensor(torch::Tensor & destination, const torch::Tensor & source)
+{
+  destination.data().copy_(source.to(destination.options()));
+}
+
+/// Return true for affine-metadata buffers that older checkpoints may omit.
+bool
+isOptionalArtificialNeuralNetBuffer(const std::string & key)
+{
+  return key == "input_shift" || key == "input_scale" || key == "output_scale";
+}
+
+/**
+ * Look up one named tensor in a torch named-parameter or named-buffer list.
+ * @param tensors Torch named tensor list.
+ * @param key Tensor name to search for.
+ * @param tensor Tensor that receives the match.
+ * @return True when the requested tensor exists.
+ */
+template <typename NamedTensorList>
+bool
+findNamedTensor(const NamedTensorList & tensors, const std::string & key, torch::Tensor & tensor)
+{
+  for (const auto & entry : tensors)
+    if (entry.name == key)
+    {
+      tensor = entry.value;
+      return true;
+    }
+
+  return false;
+}
+
+/**
+ * Load ANN parameters and buffers from a plain libtorch archive.
+ * @param nn Neural network that receives the loaded state.
+ * @param filename Checkpoint file to read.
+ * @param error Human-readable error string filled on failure.
+ * @return True when the network was loaded successfully.
+ */
+bool
+loadArtificialNeuralNetStateFromArchive(Moose::LibtorchArtificialNeuralNet & nn,
+                                        const std::string & filename,
+                                        std::string & error)
+{
+  try
+  {
+    torch::serialize::InputArchive archive;
+    archive.load_from(filename);
+
+    for (auto & parameter : nn.named_parameters())
+    {
+      torch::Tensor stored_tensor;
+      if (!readArchiveTensor(archive, parameter.key(), stored_tensor))
+      {
+        error = "Missing serialized parameter: " + parameter.key();
+        return false;
+      }
+
+      copyTensor(parameter.value(), stored_tensor);
+    }
+
+    for (auto & buffer : nn.named_buffers())
+    {
+      torch::Tensor stored_tensor;
+      if (!readArchiveTensor(archive, buffer.key(), stored_tensor))
+      {
+        if (isOptionalArtificialNeuralNetBuffer(buffer.key()))
+          continue;
+
+        error = "Missing serialized buffer: " + buffer.key();
+        return false;
+      }
+
+      copyTensor(buffer.value(), stored_tensor);
+    }
+
+    nn.synchronizeAffineFactorsFromBuffers();
+    return true;
+  }
+  catch (const c10::Error & e)
+  {
+    error = e.msg();
+    return false;
+  }
+}
+
+/**
+ * Load ANN parameters and buffers from a scripted Torch module.
+ * @param nn Neural network that receives the loaded state.
+ * @param filename Checkpoint file to read.
+ * @param error Human-readable error string filled on failure.
+ * @return True when the network was loaded successfully.
+ */
+bool
+loadArtificialNeuralNetStateFromTorchScript(Moose::LibtorchArtificialNeuralNet & nn,
+                                            const std::string & filename,
+                                            std::string & error)
+{
+  try
+  {
+    const auto scripted = torch::jit::load(filename);
+    const auto scripted_parameters = scripted.named_parameters();
+    const auto scripted_buffers = scripted.named_buffers();
+
+    for (auto & parameter : nn.named_parameters())
+    {
+      torch::Tensor stored_tensor;
+      if (!findNamedTensor(scripted_parameters, parameter.key(), stored_tensor))
+      {
+        error = "Missing scripted parameter: " + parameter.key();
+        return false;
+      }
+
+      copyTensor(parameter.value(), stored_tensor);
+    }
+
+    for (auto & buffer : nn.named_buffers())
+    {
+      torch::Tensor stored_tensor;
+      if (!findNamedTensor(scripted_buffers, buffer.key(), stored_tensor))
+      {
+        if (isOptionalArtificialNeuralNetBuffer(buffer.key()))
+          continue;
+
+        error = "Missing scripted buffer: " + buffer.key();
+        return false;
+      }
+
+      copyTensor(buffer.value(), stored_tensor);
+    }
+
+    nn.synchronizeAffineFactorsFromBuffers();
+    return true;
+  }
+  catch (const c10::Error & e)
+  {
+    error = e.msg();
+    return false;
+  }
+}
+
+} // namespace
 
 namespace Moose
 {
@@ -22,27 +202,40 @@ LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet(
     const std::vector<unsigned int> & num_neurons_per_layer,
     const std::vector<std::string> & activation_function,
     const torch::DeviceType device_type,
-    const torch::ScalarType data_type)
+    const torch::ScalarType data_type,
+    const bool build_on_construct,
+    const std::vector<Real> & input_shift_factors,
+    const std::vector<Real> & input_scaling_factors,
+    const std::vector<Real> & output_scaling_factors)
   : _name(name),
     _num_inputs(num_inputs),
     _num_outputs(num_outputs),
     _num_neurons_per_layer(num_neurons_per_layer),
-    _activation_function(MultiMooseEnum("relu sigmoid elu gelu linear", "relu")),
+    _activation_function(MultiMooseEnum("relu sigmoid elu gelu linear tanh", "relu")),
     _device_type(device_type),
-    _data_type(data_type)
+    _data_type(data_type),
+    _input_shift_factors(
+        setAffineFactors(input_shift_factors, num_inputs, 0.0, "input_shift_factors")),
+    _input_scaling_factors(
+        setAffineFactors(input_scaling_factors, num_inputs, 1.0, "input_scaling_factors")),
+    _output_scaling_factors(
+        setAffineFactors(output_scaling_factors, num_outputs, 1.0, "output_scaling_factors"))
 {
   _activation_function = activation_function;
+  initializeAffineBuffers();
 
   // Check if the number of activation functions matches the number of hidden layers
   if ((_activation_function.size() != 1) &&
       (_activation_function.size() != _num_neurons_per_layer.size()))
     mooseError("The number of activation functions should be either one or the same as the number "
                "of hidden layers");
-  constructNeuralNetwork();
+
+  if (build_on_construct)
+    constructNeuralNetwork();
 }
 
 LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet(
-    const Moose::LibtorchArtificialNeuralNet & nn)
+    const Moose::LibtorchArtificialNeuralNet & nn, const bool build_on_construct)
   : torch::nn::Module(),
     _name(nn.name()),
     _num_inputs(nn.numInputs()),
@@ -50,16 +243,103 @@ LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet(
     _num_neurons_per_layer(nn.numNeuronsPerLayer()),
     _activation_function(nn.activationFunctions()),
     _device_type(nn.deviceType()),
-    _data_type(nn.dataType())
+    _data_type(nn.dataType()),
+    _input_shift_factors(nn.inputShiftFactors()),
+    _input_scaling_factors(nn.inputScalingFactors()),
+    _output_scaling_factors(nn.outputScalingFactors())
 {
+  initializeAffineBuffers();
 
   // We construct the NN architecture
-  constructNeuralNetwork();
-  // We fill it up with the current parameter values
-  const auto & from_params = nn.named_parameters();
-  auto to_params = this->named_parameters();
-  for (unsigned int param_i : make_range(from_params.size()))
-    to_params[param_i].value().data() = from_params[param_i].value().data().clone();
+  if (build_on_construct)
+  {
+    constructNeuralNetwork();
+    // We fill it up with the current parameter values
+    const auto & from_params = nn.named_parameters();
+    auto to_params = this->named_parameters();
+    for (unsigned int param_i : make_range(from_params.size()))
+      to_params[param_i].value().data() = from_params[param_i].value().data().clone();
+
+    const auto & from_buffers = nn.named_buffers();
+    auto to_buffers = this->named_buffers();
+    for (unsigned int buffer_i : make_range(from_buffers.size()))
+      to_buffers[buffer_i].value().data() = from_buffers[buffer_i].value().data().clone();
+  }
+}
+
+Real
+LibtorchArtificialNeuralNet::determineGain(const std::string & activation)
+{
+  if (activation == "relu")
+    return std::sqrt(2);
+  if (activation == "tanh")
+    return 5.0 / 3.0;
+
+  return 1.0;
+}
+
+void
+LibtorchArtificialNeuralNet::initializeNeuralNetwork(const c10::optional<at::Generator> generator)
+{
+  for (unsigned int i = 0; i < numHiddenLayers(); ++i)
+  {
+    const auto & activation =
+        _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0];
+    const Real gain = determineGain(activation);
+    Moose::orthogonalInitializeTensor(_weights[i]->weight, gain, generator);
+    torch::nn::init::zeros_(_weights[i]->bias);
+  }
+
+  Moose::orthogonalInitializeTensor(_weights.back()->weight, 1.0, generator);
+  torch::nn::init::zeros_(_weights.back()->bias);
+}
+
+std::vector<Real>
+LibtorchArtificialNeuralNet::setAffineFactors(const std::vector<Real> & factors,
+                                              const unsigned int expected_size,
+                                              const Real default_value,
+                                              const std::string & factor_name)
+{
+  const auto resolved_factors =
+      factors.empty() ? std::vector<Real>(expected_size, default_value) : factors;
+
+  if (resolved_factors.size() != expected_size)
+    mooseError("The number of ", factor_name, " entries must match ", expected_size, ".");
+
+  return resolved_factors;
+}
+
+void
+LibtorchArtificialNeuralNet::initializeAffineBuffers()
+{
+  auto input_shift = _input_shift_factors;
+  LibtorchUtils::vectorToTensor(input_shift, _input_shift_tensor);
+  _input_shift_tensor = register_buffer(
+      "input_shift", _input_shift_tensor.transpose(0, 1).to(_data_type).to(_device_type));
+
+  auto input_scale = _input_scaling_factors;
+  LibtorchUtils::vectorToTensor(input_scale, _input_scale_tensor);
+  _input_scale_tensor = register_buffer(
+      "input_scale", _input_scale_tensor.transpose(0, 1).to(_data_type).to(_device_type));
+
+  auto output_scale = _output_scaling_factors;
+  LibtorchUtils::vectorToTensor(output_scale, _output_scale_tensor);
+  _output_scale_tensor = register_buffer(
+      "output_scale", _output_scale_tensor.transpose(0, 1).to(_data_type).to(_device_type));
+}
+
+void
+LibtorchArtificialNeuralNet::synchronizeAffineFactorsFromBuffers()
+{
+  auto input_shift = _input_shift_tensor.detach().reshape({-1}).to(torch::kCPU).to(torch::kDouble);
+  LibtorchUtils::tensorToVector(input_shift, _input_shift_factors);
+
+  auto input_scale = _input_scale_tensor.detach().reshape({-1}).to(torch::kCPU).to(torch::kDouble);
+  LibtorchUtils::tensorToVector(input_scale, _input_scaling_factors);
+
+  auto output_scale =
+      _output_scale_tensor.detach().reshape({-1}).to(torch::kCPU).to(torch::kDouble);
+  LibtorchUtils::tensorToVector(output_scale, _output_scaling_factors);
 }
 
 void
@@ -84,14 +364,28 @@ LibtorchArtificialNeuralNet::constructNeuralNetwork()
   _weights.back()->to(_device_type, _data_type);
 }
 
+torch::Tensor
+LibtorchArtificialNeuralNet::preprocessInput(const torch::Tensor & x) const
+{
+  torch::Tensor input(x);
+  if (_data_type != input.scalar_type())
+    input = input.to(_data_type);
+  if (_device_type != input.device().type())
+    input = input.to(_device_type);
+
+  return (input - _input_shift_tensor) * _input_scale_tensor;
+}
+
+torch::Tensor
+LibtorchArtificialNeuralNet::scaleOutput(const torch::Tensor & y) const
+{
+  return y * _output_scale_tensor;
+}
+
 torch::Tensor
 LibtorchArtificialNeuralNet::forward(const torch::Tensor & x)
 {
-  torch::Tensor output(x);
-  if (_data_type != output.scalar_type())
-    output.to(_data_type);
-  if (_device_type != output.device().type())
-    output.to(_device_type);
+  torch::Tensor output = preprocessInput(x);
 
   for (unsigned int i = 0; i < _weights.size() - 1; ++i)
   {
@@ -101,6 +395,8 @@ LibtorchArtificialNeuralNet::forward(const torch::Tensor & x)
       output = torch::relu(_weights[i]->forward(output));
     else if (activation == "sigmoid")
       output = torch::sigmoid(_weights[i]->forward(output));
+    else if (activation == "tanh")
+      output = torch::tanh(_weights[i]->forward(output));
     else if (activation == "elu")
       output = torch::elu(_weights[i]->forward(output));
     else if (activation == "gelu")
@@ -109,9 +405,7 @@ LibtorchArtificialNeuralNet::forward(const torch::Tensor & x)
       output = _weights[i]->forward(output);
   }
 
-  output = _weights[_weights.size() - 1]->forward(output);
-
-  return output;
+  return scaleOutput(_weights[_weights.size() - 1]->forward(output));
 }
 
 void
@@ -145,6 +439,10 @@ LibtorchArtificialNeuralNet::store(nlohmann::json & json) const
         named_params[param_i].value().data_ptr<Real>(),
         named_params[param_i].value().data_ptr<Real>() + named_params[param_i].value().numel());
   }
+
+  json["input_shift_factors"] = _input_shift_factors;
+  json["input_scaling_factors"] = _input_scaling_factors;
+  json["output_scaling_factors"] = _output_scaling_factors;
 }
 
 void
@@ -154,6 +452,26 @@ to_json(nlohmann::json & json, const Moose::LibtorchArtificialNeuralNet * const
     network->store(json);
 }
 
+void
+loadLibtorchArtificialNeuralNetState(Moose::LibtorchArtificialNeuralNet & nn,
+                                     const std::string & filename)
+{
+  std::string archive_error;
+  if (loadArtificialNeuralNetStateFromArchive(nn, filename, archive_error))
+    return;
+
+  std::string scripted_error;
+  if (loadArtificialNeuralNetStateFromTorchScript(nn, filename, scripted_error))
+    return;
+
+  mooseError("The requested pytorch parameter file could not be loaded. This can either be the "
+             "result of the file not existing or a misalignment in the generated container and "
+             "the data in the file. Make sure the dimensions of the generated neural net are the "
+             "same as the dimensions of the parameters in the input file!\nArchive load error: ",
+             archive_error,
+             "\nTorchScript load error: ",
+             scripted_error);
+}
 }
 
 template <>
@@ -233,7 +551,7 @@ dataLoad<Moose::LibtorchArtificialNeuralNet>(
   nn = std::make_shared<Moose::LibtorchArtificialNeuralNet>(
       name, num_inputs, num_outputs, num_neurons_per_layer, activation_functions, divt, datt);
 
-  torch::load(nn, name);
+  Moose::loadLibtorchArtificialNeuralNetState(*nn, name);
 }
 
 template <>
diff --git a/framework/src/libtorch/utils/LibtorchObservationHistoryHelper.C b/framework/src/libtorch/utils/LibtorchObservationHistoryHelper.C
new file mode 100644
index 000000000000..a98d931d71e1
--- /dev/null
+++ b/framework/src/libtorch/utils/LibtorchObservationHistoryHelper.C
@@ -0,0 +1,133 @@
+//* This file is part of the MOOSE framework
+//* https://mooseframework.inl.gov
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#ifdef MOOSE_LIBTORCH_ENABLED
+
+#include "LibtorchObservationHistoryHelper.h"
+
+#include "MooseError.h"
+
+#include <algorithm>
+#include "libmesh/utility.h"
+
+LibtorchObservationHistoryHelper::LibtorchObservationHistoryHelper(
+    const unsigned int input_timesteps)
+  : _input_timesteps(input_timesteps)
+{
+  if (_input_timesteps == 0)
+    mooseError("Observation history requires at least one input timestep.");
+}
+
+void
+LibtorchObservationHistoryHelper::validateTrajectoryShape(
+    const std::vector<std::vector<Real>> & component_trajectories) const
+{
+  if (component_trajectories.empty())
+    return;
+
+  const auto trajectory_size = component_trajectories.front().size();
+  for (const auto & trajectory : component_trajectories)
+    if (trajectory.size() != trajectory_size)
+      mooseError("Observation trajectories must all have the same number of timesteps.");
+}
+
+void
+LibtorchObservationHistoryHelper::initializeHistory(
+    const std::vector<Real> & observation, std::vector<std::vector<Real>> & old_observations) const
+{
+  old_observations.assign(_input_timesteps - 1, observation);
+}
+
+void
+LibtorchObservationHistoryHelper::advanceHistory(
+    const std::vector<Real> & observation, std::vector<std::vector<Real>> & old_observations) const
+{
+  if (old_observations.empty())
+    return;
+
+  std::rotate(old_observations.rbegin(), old_observations.rbegin() + 1, old_observations.rend());
+  old_observations[0] = observation;
+}
+
+std::vector<Real>
+LibtorchObservationHistoryHelper::expandObservationFactors(
+    const std::vector<Real> & observation_factors) const
+{
+  if (observation_factors.empty())
+    return {};
+
+  std::vector<Real> expanded;
+  expanded.reserve(observation_factors.size() * _input_timesteps);
+
+  for (const auto lag : make_range(_input_timesteps))
+  {
+    libmesh_ignore(lag);
+    expanded.insert(expanded.end(), observation_factors.begin(), observation_factors.end());
+  }
+
+  return expanded;
+}
+
+std::vector<Real>
+LibtorchObservationHistoryHelper::stackCurrentObservation(
+    const std::vector<Real> & observation,
+    const std::vector<std::vector<Real>> & old_observations) const
+{
+  const auto expected_history_size = _input_timesteps - 1;
+  if (old_observations.size() != expected_history_size)
+    mooseError("Observation history must contain ",
+               expected_history_size,
+               " stored entries, but got ",
+               old_observations.size(),
+               ".");
+
+  std::vector<Real> stacked;
+  stacked.reserve(observation.size() * _input_timesteps);
+
+  stacked.insert(stacked.end(), observation.begin(), observation.end());
+
+  for (const auto & history_entry : old_observations)
+  {
+    if (history_entry.size() != observation.size())
+      mooseError("Observation history entries must have the same size as the current "
+                 "observation.");
+    stacked.insert(stacked.end(), history_entry.begin(), history_entry.end());
+  }
+
+  return stacked;
+}
+
+std::vector<Real>
+LibtorchObservationHistoryHelper::stackTrajectoryObservation(
+    const std::vector<std::vector<Real>> & component_trajectories,
+    const unsigned int time_index) const
+{
+  validateTrajectoryShape(component_trajectories);
+
+  if (component_trajectories.empty())
+    return {};
+
+  const auto trajectory_size = component_trajectories.front().size();
+  if (time_index >= trajectory_size)
+    mooseError("Requested observation time index is out of range.");
+
+  std::vector<Real> stacked;
+  stacked.reserve(component_trajectories.size() * _input_timesteps);
+
+  for (const auto lag : make_range(_input_timesteps))
+  {
+    const auto source_index = time_index > lag ? time_index - lag : 0;
+    for (const auto component_i : make_range(component_trajectories.size()))
+      stacked.push_back(component_trajectories[component_i][source_index]);
+  }
+
+  return stacked;
+}
+
+#endif
diff --git a/framework/src/libtorch/utils/LibtorchRandomUtils.C b/framework/src/libtorch/utils/LibtorchRandomUtils.C
new file mode 100644
index 000000000000..f1c18b349dc1
--- /dev/null
+++ b/framework/src/libtorch/utils/LibtorchRandomUtils.C
@@ -0,0 +1,71 @@
+//* This file is part of the MOOSE framework
+//* https://mooseframework.inl.gov
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#ifdef MOOSE_LIBTORCH_ENABLED
+
+#include "LibtorchRandomUtils.h"
+
+#include <ATen/CPUGeneratorImpl.h>
+#include <ATen/ops/linalg_qr.h>
+#include <ATen/ops/normal.h>
+
+#include "MooseError.h"
+
+namespace Moose
+{
+
+at::Generator
+makeLibtorchCPUGenerator()
+{
+  return at::detail::createCPUGenerator();
+}
+
+at::Generator
+makeLibtorchCPUGenerator(const uint64_t seed)
+{
+  return at::detail::createCPUGenerator(seed);
+}
+
+void
+orthogonalInitializeTensor(torch::Tensor & tensor,
+                           const Real gain,
+                           const c10::optional<at::Generator> generator)
+{
+  if (tensor.ndimension() < 2)
+    mooseError("Only tensors with 2 or more dimensions are supported for orthogonal "
+               "initialization.");
+
+  if (!tensor.numel())
+    return;
+
+  torch::NoGradGuard no_grad;
+
+  const auto rows = tensor.size(0);
+  const auto cols = tensor.numel() / rows;
+  auto flattened = torch::empty({rows, cols}, tensor.options());
+  at::normal_out(flattened, 0.0, 1.0, {rows, cols}, generator);
+
+  if (rows < cols)
+    flattened = flattened.transpose(0, 1);
+
+  auto qr = at::linalg_qr(flattened, "reduced");
+  auto q = std::get<0>(qr);
+  const auto phases = torch::diag(std::get<1>(qr), 0).sign();
+  q = q * phases;
+
+  if (rows < cols)
+    q = q.transpose(0, 1);
+
+  tensor.view_as(q).copy_(q);
+  tensor.mul_(gain);
+}
+
+} // namespace Moose
+
+#endif
diff --git a/framework/src/postprocessors/PointValue.C b/framework/src/postprocessors/PointValue.C
index 8abe943b9f4c..3b9cf803f65e 100644
--- a/framework/src/postprocessors/PointValue.C
+++ b/framework/src/postprocessors/PointValue.C
@@ -50,7 +50,6 @@ void
 PointValue::execute()
 {
   _value = _system.point_value(_var_number, _point, false);
-
   /**
    * If we get exactly zero, we don't know if the locator couldn't find an element, or
    * if the solution is truly zero, more checking is needed.
diff --git a/modules/stochastic_tools/doc/content/bib/stochastic_tools.bib b/modules/stochastic_tools/doc/content/bib/stochastic_tools.bib
index 9af1f07f1888..c321338e7187 100644
--- a/modules/stochastic_tools/doc/content/bib/stochastic_tools.bib
+++ b/modules/stochastic_tools/doc/content/bib/stochastic_tools.bib
@@ -135,6 +135,20 @@ @article{kingma2014adam
   url={https://doi.org/10.48550/arXiv.1412.6980}
 }
 
+@article{schulman2015gae,
+  title={High-dimensional continuous control using generalized advantage estimation},
+  author={Schulman, John and Moritz, Philipp and Levine, Sergey and Jordan, Michael and Abbeel, Pieter},
+  journal={arXiv preprint arXiv:1506.02438},
+  year={2015}
+}
+
+@article{schulman2017proximal,
+  title={Proximal policy optimization algorithms},
+  author={Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg},
+  journal={arXiv preprint arXiv:1707.06347},
+  year={2017}
+}
+
 @article{li2016pss,
   title={Matlab codes of Subset Simulation for reliability analysis and structural optimization},
   author={Li, H. S. and Cao, Z. J.},
@@ -363,4 +377,4 @@ @article{calderhead2014general
     pages = {17408--17413},
     year  = {2014},
     doi = {10.1073/pnas.1408184111}
-}
\ No newline at end of file
+}
diff --git a/modules/stochastic_tools/doc/content/modules/stochastic_tools/examples/libtorch_drl_control.md b/modules/stochastic_tools/doc/content/modules/stochastic_tools/examples/libtorch_drl_control.md
index 71e7a0517bfe..3bf2772eac80 100644
--- a/modules/stochastic_tools/doc/content/modules/stochastic_tools/examples/libtorch_drl_control.md
+++ b/modules/stochastic_tools/doc/content/modules/stochastic_tools/examples/libtorch_drl_control.md
@@ -5,6 +5,17 @@ The following example demonstrates how to set up a Proximal Policy Optimization
 Deep Reinforcement Learning (DRL) training sequence for neural-net-based controllers of MOOSE simulations.
 See [!cite](schulman2017proximal) for a more theoretical background on the PPO algorithm.
 
+The detailed PPO and GAE equations used by the trainer are documented on
+[LibtorchDRLControlTrainer](source/libtorch/trainers/LibtorchDRLControlTrainer.md).
+At a high level, the trainer collects on-policy tuples
+$(o_t, a_t, \log \pi_{\mathrm{old}}(a_t|o_t), r_t, o_{t+1})$, computes GAE
+advantages [!cite](schulman2015gae), and updates separate actor and critic
+networks with the PPO clipped objective [!cite](schulman2017proximal). The
+paired [LibtorchDRLControl](source/libtorch/controls/LibtorchDRLControl.md)
+executes the actor at `TIMESTEP_BEGIN`, stacks `input_timesteps` observations,
+and can optionally reuse or smooth sampled actions before they are applied to
+the physics model.
+
 ## Problem Statement
 
 In this example we would like to design a DRL-based controller for the air conditioning of a
@@ -80,9 +91,10 @@ using [LibtorchControlValuePostprocessor](source/libtorch/postprocessors/Libtorc
 [LibtorchDRLLogProbabilityPostprocessor](source/libtorch/postprocessors/LibtorchDRLLogProbabilityPostprocessor.md) as
 shown above.
 Furthermore, the additional [LibtorchNeuralNetControl](LibtorchNeuralNetControl.md) (`src_control_final`)
-can be used to evaluate the neural network without the additional random
-sampling process needed for the training process. In other words, this object will evaluate the
-final product of this training process.
+is optional and can be used to evaluate the trained network with a plain
+deterministic control object. The same trained actor could also be executed
+through [LibtorchDRLControl.md] with
+[!param](/Controls/LibtorchDRLControl/stochastic) set to `false`.
 
 ### Main Application
 
@@ -157,10 +169,12 @@ to balance these two factors by tuning the parameters in the `Trainer` and `Cont
   layout={'xaxis':{'type':'linear', 'title':'Number of simulations'},
           'yaxis':{'type':'linear','title':'Average Episodic Reward'}}
 
-Following the training procedure, we can replace the [LibtorchDRLControl.md] object with
+Following the training procedure, we can either set
+[!param](/Controls/LibtorchDRLControl/stochastic) to `false` on
+[LibtorchDRLControl.md] or replace it with
 [LibtorchNeuralNetControl](source/libtorch/controls/LibtorchNeuralNetControl.md)
-to evaluate the final version of the neural network
-without the additional randomization. By doing this, the following results are obtained:
+for plain deterministic inference. In this example we use the latter, and the
+following results are obtained:
 
 !plot scatter
   id=results caption=The evolution of the room temperature at the sensor over the day.
diff --git a/modules/stochastic_tools/doc/content/source/libtorch/controls/LibtorchDRLControl.md b/modules/stochastic_tools/doc/content/source/libtorch/controls/LibtorchDRLControl.md
index 1a1ad7c057a3..ef7038e3d0c5 100644
--- a/modules/stochastic_tools/doc/content/source/libtorch/controls/LibtorchDRLControl.md
+++ b/modules/stochastic_tools/doc/content/source/libtorch/controls/LibtorchDRLControl.md
@@ -5,10 +5,44 @@
 
 ## Overview
 
-This object controls a physical process using a neural network, just like [LibtorchNeuralNetControl](source/libtorch/controls/LibtorchNeuralNetControl.md),
-with an additional functionality of randomizing the action values to avoid overfitting in the control process.
-This control object is supposed to be used in conjunction with [LibtorchDRLControlTrainer.md]. In other
-cases when the neural network needs to be simply evaluated, the user is encouraged to use [LibtorchNeuralNetControl](source/libtorch/controls/LibtorchNeuralNetControl.md).
+This object is the runtime policy executor paired with
+[LibtorchDRLControlTrainer](source/libtorch/trainers/LibtorchDRLControlTrainer.md).
+It extends
+[LibtorchNeuralNetControl](source/libtorch/controls/LibtorchNeuralNetControl.md)
+with stochastic policy sampling, action reuse, optional smoothing, and
+restartable policy state. For deterministic execution of the same actor, set
+[!param](/Controls/LibtorchDRLControl/stochastic) to `false`. Use
+[LibtorchNeuralNetControl](source/libtorch/controls/LibtorchNeuralNetControl.md)
+instead when a plain deterministic neural-net control object is preferred
+without the DRL-specific execution features.
+
+## Execution Model
+
+`LibtorchDRLControl` only supports `TIMESTEP_BEGIN`. On each execution it reads
+the current observation values, combines them with the stored history implied by
+[!param](/Controls/LibtorchDRLControl/input_timesteps), and evaluates the actor
+when a new policy action is needed.
+
+If [!param](/Controls/LibtorchDRLControl/stochastic) is `true`, the action is
+sampled from the actor distribution and the corresponding log probabilities are
+stored for PPO training. If it is `false`, the deterministic actor output is
+used instead. The policy evaluation can be reused across multiple controller
+executions with [!param](/Controls/LibtorchDRLControl/num_steps_in_period), so a
+new action is only sampled every configured number of executions.
+
+The applied control can also be relaxed with
+[!param](/Controls/LibtorchDRLControl/smoother):
+\begin{equation}
+u_t^{\mathrm{applied}} = u_{t-1}^{\mathrm{applied}} +
+\alpha\left(u_t^{\mathrm{policy}} - u_{t-1}^{\mathrm{applied}}\right),
+\end{equation}
+where $\alpha$ is the `smoother` value. Setting `smoother = 1` applies the raw
+policy action directly.
+
+The controller stores the observation history, smoothed signal, and the libtorch
+CPU random-number-generator state as restartable data. This keeps stochastic
+recovered runs aligned with uninterrupted runs, provided the same controller
+state is recovered.
 
 !if! function=hasCapability('libtorch')
 
diff --git a/modules/stochastic_tools/doc/content/source/libtorch/trainers/LibtorchDRLControlTrainer.md b/modules/stochastic_tools/doc/content/source/libtorch/trainers/LibtorchDRLControlTrainer.md
index 7d5fcc1e1a54..659ed5b20691 100644
--- a/modules/stochastic_tools/doc/content/source/libtorch/trainers/LibtorchDRLControlTrainer.md
+++ b/modules/stochastic_tools/doc/content/source/libtorch/trainers/LibtorchDRLControlTrainer.md
@@ -5,8 +5,78 @@
 
 ## Overview
 
-This object is supposed to train a Deep Reinforcement Learning (DRL) controller
-using the Proximal Policy Optimization (PPO) algorithm [!cite](schulman2017proximal).
+This object trains an on-policy actor-critic controller using Proximal Policy
+Optimization (PPO) [!cite](schulman2017proximal) together with generalized
+advantage estimation (GAE) [!cite](schulman2015gae). Reporter trajectories are
+assembled into transitions, flattened across samples, shuffled into mini-batches,
+and then used to update separate actor and critic neural networks.
+
+## Algorithm Summary
+
+For `input_timesteps = H`, the observation passed to the actor and critic is the
+stacked history
+\begin{equation}
+o_t = \left[s_t,\ s_{t-1},\ \ldots,\ s_{t-H+1}\right],
+\end{equation}
+where early missing history entries are filled with the earliest available
+reporter value.
+
+Each collected trajectory is converted into the tuples
+$(o_t, a_t, \log \pi_{\mathrm{old}}(a_t|o_t), r_t, o_{t+1})$. The
+[!param](/Trainers/LibtorchDRLControlTrainer/shift_outputs) option aligns the
+reported action and log-probability sequences with the reward sequence when the
+control is applied at the beginning of a time step and the reward is measured at
+the end of that step. The
+[!param](/Trainers/LibtorchDRLControlTrainer/timestep_window) option can be used
+to downsample long reporter trajectories before these transitions are assembled.
+
+The critic is first evaluated on $o_t$ and $o_{t+1}$, and the temporal-difference
+residual is computed as
+\begin{equation}
+\delta_t = r_t + \gamma V_{\phi}(o_{t+1}) - V_{\phi}(o_t).
+\end{equation}
+The trainer then computes the reverse-time GAE recursion
+\begin{equation}
+\hat{A}_t = \delta_t + \gamma \lambda \hat{A}_{t+1},
+\end{equation}
+with critic regression targets
+\begin{equation}
+\hat{V}_t = \hat{A}_t + V_{\phi}(o_t).
+\end{equation}
+
+For each mini-batch, PPO uses the probability ratio
+\begin{equation}
+r_t(\theta) = \exp\left(\log \pi_{\theta}(a_t|o_t) -
+\log \pi_{\mathrm{old}}(a_t|o_t)\right),
+\end{equation}
+and the actor objective implemented here is
+\begin{equation}
+L_{\mathrm{actor}} =
+-\frac{1}{N}\sum_t \left[
+\min\left(r_t(\theta)\hat{A}_t,
+\mathrm{clip}\left(r_t(\theta), 1-\epsilon, 1+\epsilon\right)\hat{A}_t\right)
++ c_H \mathcal{H}\left[\pi_{\theta}(\cdot|o_t)\right]
+\right].
+\end{equation}
+The critic is trained with a mean-squared-error loss,
+\begin{equation}
+L_{\mathrm{critic}} = \frac{1}{N}\sum_t
+\left(V_{\phi}(o_t) - \hat{V}_t\right)^2.
+\end{equation}
+
+If [!param](/Trainers/LibtorchDRLControlTrainer/standardize_advantage) is set,
+each sampled mini-batch uses zero-mean, unit-variance advantages before the PPO
+loss is evaluated.
+
+## Notes
+
+The actor embeds the configured input shifting, input scaling, and action scaling
+directly into the neural-network module so that transferred and checkpointed
+controllers operate in the same normalized coordinates used during training.
+
+The trainer updates the actor and critic with separate Adam optimizers and then
+broadcasts the updated parameters so all MPI ranks hold the same networks after
+each PPO update.
 
 ## Example Input File Syntax
 
diff --git a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i
index 2dad8e5f504f..ff4f127ef0af 100644
--- a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i
+++ b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_sub.i
@@ -10,8 +10,8 @@ air_effective_k = 0.5 # W/(m K)
     xmax = 7.0
     ymin = 0.0
     ymax = 5.0
-    nx = 35
-    ny = 25
+    nx = 20
+    ny = 20
   []
 []
 
@@ -97,7 +97,7 @@ air_effective_k = 0.5 # W/(m K)
     type = FunctionValuePostprocessor
     function = reward_function
     execute_on = 'INITIAL TIMESTEP_END'
-    indirect_dependencies = 'center_temp_tend env_temp'
+    indirect_dependencies = 'center_temp_tend'
   []
   [top_flux]
     type = LibtorchControlValuePostprocessor
@@ -112,42 +112,22 @@ air_effective_k = 0.5 # W/(m K)
 [Reporters]
   [T_reporter]
     type = AccumulateReporter
-    reporters = 'center_temp_tend/value env_temp/value reward/value top_flux/value log_prob_top_flux/value'
+    reporters = 'center_temp_tend/value reward/value top_flux/value log_prob_top_flux/value'
   []
 []
 
 [Controls]
-  inactive = 'src_control_final'
   [src_control]
     type = LibtorchDRLControl
     parameters = "BCs/top_flux/value"
-    responses = 'center_temp_tend env_temp'
+    observations = 'center_temp_tend'
 
     # keep consistent with LibtorchDRLControlTrainer
-    input_timesteps = 2
-    response_scaling_factors = '0.03 0.03'
-    response_shift_factors = '290 290'
-    action_standard_deviations = '0.02'
-    action_scaling_factors = 200
-
-    execute_on = 'TIMESTEP_BEGIN'
-  []
-  [src_control_final]
-    type = LibtorchNeuralNetControl
-
-    filename = 'mynet_control.net'
-    num_neurons_per_layer = '16 6'
-    activation_function = 'relu'
-
-    parameters = "BCs/top_flux/value"
-    responses = 'center_temp_tend env_temp'
-
-    # keep consistent with LibtorchDRLControlTrainer
-    input_timesteps = 2
-    response_scaling_factors = '0.03 0.03'
-    response_shift_factors = '290 290'
-    action_standard_deviations = '0.02'
-    action_scaling_factors = 200
+    input_timesteps = 1
+    observation_scaling_factors = '0.03'
+    observation_shift_factors = '290'
+    action_scaling_factors = 20
+    stochastic = true
 
     execute_on = 'TIMESTEP_BEGIN'
   []
@@ -157,21 +137,17 @@ air_effective_k = 0.5 # W/(m K)
   type = Transient
   solve_type = 'NEWTON'
 
-  petsc_options_iname = '-pc_type -pc_factor_shift_type'
-  petsc_options_value = 'lu NONZERO'
+  petsc_options_iname = '-pc_type -pc_hypre_type'
+  petsc_options_value = 'hypre boomeramg'
   line_search = 'none'
 
-  nl_rel_tol = 1e-7
+  nl_rel_tol = 1e-6
 
   start_time = 0.0
   end_time = 86400
-  dt = 900.0
+  dt = ${fparse 86400/80}
 []
 
 [Outputs]
   console = false
-  [c]
-    type = CSV
-    execute_on = FINAL
-  []
 []
diff --git a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i
index 341a251214d3..e09ee3eb98c5 100644
--- a/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i
+++ b/modules/stochastic_tools/examples/libtorch_drl_control/libtorch_drl_control_trainer.i
@@ -13,63 +13,76 @@
     type = SamplerFullSolveMultiApp
     sampler = dummy
     input_files = 'libtorch_drl_control_sub.i'
+    mode = batch-reset
   []
 []
 
 [Transfers]
   [nn_transfer]
-    type = LibtorchNeuralNetControlTransfer
+    type = SamplerDRLControlTransfer
     to_multi_app = runner
     trainer_name = nn_trainer
     control_name = src_control
+    sampler = dummy
   []
   [r_transfer]
-    type = MultiAppReporterTransfer
+    type = SamplerReporterTransfer
     from_multi_app = runner
-    to_reporters = 'results/center_temp results/env_temp results/reward results/top_flux results/log_prob_top_flux'
-    from_reporters = 'T_reporter/center_temp_tend:value T_reporter/env_temp:value T_reporter/reward:value T_reporter/top_flux:value T_reporter/log_prob_top_flux:value'
+    sampler = dummy
+    stochastic_reporter = storage
+    from_reporter = 'T_reporter/center_temp_tend:value T_reporter/reward:value T_reporter/top_flux:value T_reporter/log_prob_top_flux:value'
   []
 []
 
 [Trainers]
   [nn_trainer]
     type = LibtorchDRLControlTrainer
-    response = 'results/center_temp results/env_temp'
-    control = 'results/top_flux'
-    log_probability = 'results/log_prob_top_flux'
-    reward = 'results/reward'
+    observation = 'storage/r_transfer:T_reporter:center_temp_tend:value'
+    control = 'storage/r_transfer:T_reporter:top_flux:value'
+    log_probability = 'storage/r_transfer:T_reporter:log_prob_top_flux:value'
+    reward = 'storage/r_transfer:T_reporter:reward:value'
 
-    num_epochs = 1000
-    update_frequency = 10
-    decay_factor = 0.0
+    num_epochs = 50
+    update_frequency = 1
+    decay_factor = 0.8
+    lambda_factor = 1.0
 
     loss_print_frequency = 10
 
-    critic_learning_rate = 0.0001
-    num_critic_neurons_per_layer = '64 27'
+    critic_learning_rate = 0.001
+    num_critic_neurons_per_layer = '32 16'
+    critic_activation_functions = 'relu relu'
+
+    control_learning_rate = 0.001
+    num_control_neurons_per_layer = '32 16'
+    control_activation_functions = 'relu relu'
 
-    control_learning_rate = 0.0005
-    num_control_neurons_per_layer = '16 6'
 
     # keep consistent with LibtorchNeuralNetControl
-    input_timesteps = 2
-    response_scaling_factors = '0.03 0.03'
-    response_shift_factors = '290 290'
-    action_standard_deviations = '0.02'
+    input_timesteps = 1
+    observation_scaling_factors = '0.03'
+    observation_shift_factors = '290'
+    action_scaling_factors = 20
 
     standardize_advantage = true
 
+    batch_size = 80
+
     read_from_file = false
+
+    entropy_coeff = 0.0
+
+    # min_control_value = ${fparse -0.1}
+    # max_control_value = ${fparse 0.1}
+
   []
 []
 
 [Reporters]
-  [results]
-    type = ConstantReporter
-    real_vector_names = 'center_temp env_temp reward top_flux log_prob_top_flux'
-    real_vector_values = '0; 0; 0; 0; 0'
-    outputs = csv
-    execute_on = timestep_begin
+  [storage]
+    type = StochasticReporter
+    parallel_type = ROOT
+    outputs = none
   []
   [reward]
     type = DRLRewardReporter
@@ -79,11 +92,12 @@
 
 [Executioner]
   type = Transient
-  num_steps = 440
+  num_steps = 200
 []
 
 [Outputs]
   file_base = output/train_out
-  csv = true
-  time_step_interval = 10
+  json = true
+  time_step_interval = 1
+  execute_on = TIMESTEP_END
 []
diff --git a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h
index fbfccd9eafcf..e6c8074935db 100644
--- a/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h
+++ b/modules/stochastic_tools/include/libtorch/controls/LibtorchDRLControl.h
@@ -11,7 +11,9 @@
 
 #pragma once
 
-#include "LibtorchArtificialNeuralNet.h"
+#include <cstdint>
+
+#include "LibtorchActorNeuralNet.h"
 #include "LibtorchNeuralNetControl.h"
 
 /**
@@ -29,6 +31,9 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl
   /// Construct using input parameters
   LibtorchDRLControl(const InputParameters & parameters);
 
+  /// Restore any restartable controller state after base setup completes.
+  virtual void initialSetup() override;
+
   /// We compute the actions in this function together with the corresponding logarithmic probabilities.
   virtual void execute() override;
 
@@ -39,25 +44,53 @@ class LibtorchDRLControl : public LibtorchNeuralNetControl
    */
   Real getSignalLogProbability(const unsigned int signal_index) const;
 
-protected:
   /**
-   * Function which computes the logarithmic probability of given actions.
-   * @param action The tensor containing the perturbed control signals (also known as the action of
-   * the controller)
-   * @param output_tensor The expected value of the signals predicted by the neural net
-   * @return The logarithmic probability of the action with respect to the neural net prediction
+   * Copy an actor network into this DRL controller.
+   * @param input_nn Actor network to copy into the controller.
    */
-  torch::Tensor computeLogProbability(const torch::Tensor & action,
-                                      const torch::Tensor & output_tensor);
+  void loadControlNeuralNet(const Moose::LibtorchActorNeuralNet & input_nn);
+
+  virtual void loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn) override;
+
+  virtual void loadControlNeuralNetFromFile() override;
+
+  /// Reset the owned policy-sampling generator to a known seed.
+  void setPolicySampleSeed(uint64_t seed);
 
+protected:
   /// The log probability of control signals from the last evaluation of the controller
-  std::vector<Real> _current_control_signal_log_probabilities;
+  std::vector<Real> & _current_control_signal_log_probabilities;
+
+  /// The smoothed control signal from the previous execution, saved for restart/recover.
+  std::vector<Real> & _previous_control_signal;
+  /// The current smoothed control signal applied to the controllable parameters.
+  std::vector<Real> & _current_smoothed_signal;
+
+  /// Actor network used when the controller operates as a stochastic policy.
+  std::shared_ptr<Moose::LibtorchActorNeuralNet> _actor_nn;
+  /// Owned libtorch CPU generator used for policy sampling.
+  at::Generator _policy_generator;
+  /// Restartable serialized state for the owned policy-sampling generator.
+  std::vector<std::uint8_t> & _policy_generator_state;
+
+  /// Number of controller executions remaining before the next policy evaluation.
+  unsigned int & _executions_until_next_policy_evaluation;
+  /// Number of controller executions between policy evaluations.
+  const unsigned int _num_steps_in_period;
+  /// Relaxation factor applied while smoothing control updates.
+  const Real _smoother;
+  /// Whether to sample actions stochastically instead of using the deterministic actor output.
+  const bool _stochastic;
+
+private:
+  /// Advance the reuse schedule and report whether this execution should evaluate the policy.
+  bool shouldEvaluatePolicy();
 
-  /// Standard deviation for the actions, supplied by the user
-  const std::vector<Real> _action_std;
+  /// Restore the owned libtorch generator state from restartable storage.
+  void restorePolicyGeneratorState();
 
-  /// Standard deviations converted to a 2D diagonal tensor that can be used by Libtorch routines.
-  torch::Tensor _std;
+  /// Mirror the owned libtorch generator state into restartable storage.
+  void savePolicyGeneratorState();
 };
 
 #endif
diff --git a/modules/stochastic_tools/include/libtorch/reporters/DRLRewardReporter.h b/modules/stochastic_tools/include/libtorch/reporters/DRLRewardReporter.h
index d9792772de3a..2482e7166a9d 100644
--- a/modules/stochastic_tools/include/libtorch/reporters/DRLRewardReporter.h
+++ b/modules/stochastic_tools/include/libtorch/reporters/DRLRewardReporter.h
@@ -27,7 +27,17 @@ class DRLRewardReporter : public GeneralReporter, public SurrogateModelInterface
 
 protected:
   /// The reward values which will be saved
-  Real & _reward;
+  Real & _average_reward;
+
+  /// The standard deviation of the reward values which will be saved
+  Real & _std_reward;
+
+  /// The per-sample average reward values which will be saved
+  std::vector<Real> & _sample_average_reward;
+
+  /// The per-sample reward standard deviations which will be saved
+  std::vector<Real> & _sample_std_reward;
+
   /// The DRL trainer which computes the reward values
   LibtorchDRLControlTrainer & _trainer;
 };
diff --git a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h
index 93bb35e8c1db..30cf9041d316 100644
--- a/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h
+++ b/modules/stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h
@@ -12,105 +12,102 @@
 #pragma once
 
 #include <torch/torch.h>
-#include "LibtorchArtificialNeuralNet.h"
+#include "LibtorchActorNeuralNet.h"
+#include "LibtorchObservationHistoryHelper.h"
+#include "LibtorchRLMiniBatchSampler.h"
+#include "LibtorchRLPPOLoss.h"
+#include "LibtorchRLTrajectoryBuffer.h"
+#include "LibtorchRLValueEstimator.h"
 
 #include "libmesh/utility.h"
 #include "SurrogateTrainer.h"
 
 /**
- * This trainer is responsible for training neural networks that efficiently control
- * different processes. It utilizes the Proximal Policy Optimization algorithms. For more
- * information on the algorithm, see the following resources: Schulman, John, et al. "Proximal
- * policy optimization algorithms." arXiv preprint arXiv:1707.06347 (2017).
- * https://medium.com/analytics-vidhya/coding-ppo-from-scratch-with-pytorch-part-1-4-613dfc1b14c8
- * https://stable-baselines.readthedocs.io/en/master/modules/ppo2.html
+ * Fixed-horizon actor-critic trainer that collects trajectories from MOOSE reporters and runs a
+ * PPO update on top of reusable RL-core components (observation history, trajectory buffer,
+ * mini-batch sampler, value estimator, and PPO loss).
  */
 class LibtorchDRLControlTrainer : public SurrogateTrainerBase
 {
 public:
   static InputParameters validParams();
 
-  /// construct using input parameters
+  /**
+   * Build the PPO-based DRL trainer.
+   * @param parameters Input parameters for the trainer.
+   */
   LibtorchDRLControlTrainer(const InputParameters & parameters);
 
+  /// Pull fresh rollout data from the reporters and trigger training when ready.
   virtual void execute() override;
 
   /**
-   * Function which returns the current average episodic reward. It is only updated
-   * at the end of every episode.
+   * Return the current average episodic reward.
+   * @return Average episodic reward over the latest training window.
    */
   Real averageEpisodeReward() { return _average_episode_reward; }
+  /// Return the current episodic reward standard deviation.
+  Real stdEpisodeReward() { return _std_episode_reward; }
 
-  /// The condensed training function
-  void trainController();
-
-  const Moose::LibtorchArtificialNeuralNet & controlNeuralNet() const { return *_control_nn; }
-
-protected:
-  /// Compute the average eposiodic reward
-  void computeAverageEpisodeReward();
-
-  /**
-   * Function to convert input/output data from std::vector<std::vector> to torch::tensor
-   * @param vector_data The input data in vector-vectors format
-   * @param tensor_data The tensor where we would like to save the results
-   * @param detach If the gradient info needs to be detached from the tensor
-   */
-  void convertDataToTensor(std::vector<std::vector<Real>> & vector_data,
-                           torch::Tensor & tensor_data,
-                           const bool detach = false);
+  /// Return per-sample mean episodic rewards from the latest update window.
+  std::vector<Real> sampleAverageEpsiodeRewards() { return _sample_average_episode_reward; }
+  /// Return per-sample episodic reward standard deviations from the latest update window.
+  std::vector<Real> sampleStdEpsiodeRewards() { return _sample_std_episode_reward; }
 
   /**
-   * Function which evaluates the critic to get the value (discounter reward)
-   * @param input The observation values (responses)
-   * @return The estimated value
+   * Run the PPO update on a flattened on-policy batch.
+   * @param batch Flattened trajectory batch to train on.
    */
-  torch::Tensor evaluateValue(torch::Tensor & input);
+  void trainController(const LibtorchRLTrajectoryBuffer::TensorBatch & batch);
 
-  /**
-   * Function which evaluates the control net and then computes the logarithmic probability of the
-   * action
-   * @param input The observation values (responses)
-   * @param output The actions corresponding to the observations
-   * @return The estimated value for the logarithmic probability
-   */
-  torch::Tensor evaluateAction(torch::Tensor & input, torch::Tensor & output);
+  /// Return the current actor network.
+  const Moose::LibtorchActorNeuralNet & controlNeuralNet() const { return *_control_nn; }
+  /// Return the trainer seed used for sampling and shuffling.
+  unsigned int seed() const { return _seed; }
 
-  /// Compute the return value by discounting the rewards and summing them
-  void computeRewardToGo();
+protected:
+  /// Compute the average episodic reward statistics for the latest samples.
+  void computeEpisodeRewardStatistics();
 
-  /// Reset data after updating the neural network
+  /// Reset the stored rollout data after an update.
   void resetData();
 
-  /// Response reporter names
-  const std::vector<ReporterName> _response_names;
+  /// Observation reporter names
+  const std::vector<ReporterName> _state_names;
 
-  /// Pointers to the current values of the responses
-  std::vector<const std::vector<Real> *> _response_value_pointers;
+  /// Pointers to the current values of the observations
+  /// We can have multiple observations, multiple samples, multiple timesteps
+  std::vector<const std::vector<std::vector<Real>> *> _state_value_pointers;
 
-  /// Shifting constants for the responses
-  const std::vector<Real> _response_shift_factors;
+  /// Shifting constants for the observations
+  const std::vector<Real> _state_shift_factors;
 
-  /// Scaling constants for the responses
-  const std::vector<Real> _response_scaling_factors;
+  /// Scaling constants for the observations
+  const std::vector<Real> _state_scaling_factors;
 
   /// Control reporter names
-  const std::vector<ReporterName> _control_names;
+  const std::vector<ReporterName> _action_names;
+
+  /// Multiplicative action scaling embedded in the actor outputs
+  const std::vector<Real> _action_scaling_factors;
 
   /// Pointers to the current values of the control signals
-  std::vector<const std::vector<Real> *> _control_value_pointers;
+  /// We can have multiple control signals, multiple samples, multiple timesteps
+  std::vector<const std::vector<std::vector<Real>> *> _action_value_pointers;
 
   /// Log probability reporter names
   const std::vector<ReporterName> _log_probability_names;
 
   /// Pointers to the current values of the control log probabilities
-  std::vector<const std::vector<Real> *> _log_probability_value_pointers;
+  /// We can have multiple control signals, multiple samples, multiple timesteps
+  std::vector<const std::vector<std::vector<Real>> *> _log_probability_value_pointers;
 
   /// Reward reporter name
   const ReporterName _reward_name;
 
   /// Pointer to the current values of the reward
-  const std::vector<Real> * _reward_value_pointer;
+  /// We can have multiple samples, multiple timesteps
+  const std::vector<std::vector<Real>> * _reward_value_pointer;
 
   /// Number of timesteps to fetch from the reporters to be the input of then eural nets
   const unsigned int _input_timesteps;
@@ -120,19 +117,6 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase
   /// Number of outputs for the control neural network
   unsigned int _num_outputs;
 
-  ///@{
-  /// The gathered data from the reporters, each row represents one QoI, each column represents one time step
-  std::vector<std::vector<Real>> _input_data;
-  std::vector<std::vector<Real>> _output_data;
-  std::vector<std::vector<Real>> _log_probability_data;
-  ///@}
-
-  ///@{
-  /// The reward and return data. The return is calculated using the _reward_data
-  std::vector<Real> _reward_data;
-  std::vector<Real> _return_data;
-  ///@}
-
   /// Number of epochs for the training of the emulator
   const unsigned int _num_epochs;
 
@@ -156,9 +140,8 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase
 
   /// Decaying factor that is used when calculating the return from the reward
   const Real _decay_factor;
-
-  /// Standard deviation for the actions
-  const std::vector<Real> _action_std;
+  /// GAE lambda factor used while estimating advantages and returns.
+  const Real _lambda_factor;
 
   /// Name of the pytorch output file. This is used for loading and storing
   /// already existing data
@@ -176,6 +159,13 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase
 
   /// Storage for the current average episode reward
   Real _average_episode_reward;
+  /// Storage for the current episode reward standard deviation
+  Real _std_episode_reward;
+
+  /// Per-sample mean episodic rewards over the latest update window
+  std::vector<Real> _sample_average_episode_reward;
+  /// Per-sample episodic reward standard deviations over the latest update window
+  std::vector<Real> _sample_std_episode_reward;
 
   /// Switch to enable the standardization of the advantages
   const bool _standardize_advantage;
@@ -183,55 +173,75 @@ class LibtorchDRLControlTrainer : public SurrogateTrainerBase
   /// The frequency the loss should be printed
   const unsigned int _loss_print_frequency;
 
+  /// Base seed for stochastic optimizers and policy sampling.
+  const unsigned int _seed;
+
+  /// Optional lower bounds for each control signal
+  std::vector<Real> _min_values;
+  /// Optional upper bounds for each control signal
+  std::vector<Real> _max_values;
+
   /// Pointer to the control (or actor) neural net object
-  std::shared_ptr<Moose::LibtorchArtificialNeuralNet> _control_nn;
+  std::shared_ptr<Moose::LibtorchActorNeuralNet> _control_nn;
   /// Pointer to the critic neural net object
   std::shared_ptr<Moose::LibtorchArtificialNeuralNet> _critic_nn;
 
-  /// standard deviation in a tensor format for sampling the actual control value
-  torch::Tensor _std;
+  /// Best average episode reward seen so far while training
+  Real _highest_reward;
+  /// Entropy bonus coefficient used in the PPO actor loss
+  Real _entropy_coeff;
 
-  /// Torch::tensor version of the input and action data
-  torch::Tensor _input_tensor;
-  torch::Tensor _output_tensor;
-  torch::Tensor _return_tensor;
-  torch::Tensor _log_probability_tensor;
+  /// Adam optimizer used to update the actor network
+  std::unique_ptr<torch::optim::Adam> _actor_optimizer;
+  /// Adam optimizer used to update the critic network
+  std::unique_ptr<torch::optim::Adam> _critic_optimizer;
 
 private:
   /**
-   * Extract the response values from the postprocessors of the controlled system.
-   * This assumes that they are stored in an AccumulateReporter
-   * @param data The data where we would like to store the response values
-   * @param reporter_names The names of the reporters which need to be extracted
-   * @param num_timesteps The number of timesteps we want to use for training
+   * Resolve reporter names into cached pointer storage.
+   * @param reporter_names Reporter names to look up.
+   * @param pointer_storage Output vector that receives the reporter pointers.
    */
-  void getInputDataFromReporter(std::vector<std::vector<Real>> & data,
-                                const std::vector<const std::vector<Real> *> & reporter_links,
-                                const unsigned int num_timesteps);
+  void getReporterPointers(const std::vector<ReporterName> & reporter_names,
+                           std::vector<const std::vector<std::vector<Real>> *> & pointer_storage);
+
+  /// Pull trajectories out of the reporters and append them to the trajectory buffer.
+  void collectTrajectoriesFromReporters();
+
   /**
-   * Extract the output (actions, logarithmic probabilities) values from the postprocessors
-   * of the controlled system. This assumes that they are stored in an AccumulateReporter
-   * @param data The data where we would like to store the output values
-   * @param reporter_names The names of the reporters which need to be extracted
+   * Figure out how many aligned transitions a raw reporter sequence contains.
+   * @param raw_sequence_size Number of raw time entries in the reporter sequence.
+   * @return Number of valid transitions after history stacking and downsampling.
    */
-  void getOutputDataFromReporter(std::vector<std::vector<Real>> & data,
-                                 const std::vector<const std::vector<Real> *> & reporter_links);
+  unsigned int computeNumTransitions(std::size_t raw_sequence_size) const;
 
   /**
-   * Extract the reward values from the postprocessors of the controlled system
-   * This assumes that they are stored in an AccumulateReporter.
-   * @param data The data where we would like to store the reward values
-   * @param reporter_names The name of the reporter which need to be extracted
+   * Downsample one raw reporter sequence into the aligned rollout sequence we train on.
+   * @param sample Raw reporter sequence.
+   * @param offset Starting offset used for the aligned sequence.
+   * @param num_entries Number of aligned entries to extract.
+   * @return Downsampled sequence.
    */
-  void getRewardDataFromReporter(std::vector<Real> & data,
-                                 const std::vector<Real> * const reporter_link);
-
-  /// Getting reporter pointers with given names
-  void getReporterPointers(const std::vector<ReporterName> & reporter_names,
-                           std::vector<const std::vector<Real> *> & pointer_storage);
+  std::vector<Real> extractDownsampledSequence(const std::vector<Real> & sample,
+                                               unsigned int offset,
+                                               unsigned int num_entries) const;
 
   /// Counter for number of transient simulations that have been run before updating the controller
   unsigned int _update_counter;
+
+  /// Reporter downsampling stride used while assembling rollout trajectories
+  unsigned int _timestep_window;
+
+  /// Shared observation history stacking and factor-expansion helper
+  const LibtorchObservationHistoryHelper _observation_history;
+  /// Accumulated on-policy rollout data waiting to be flattened and trained on
+  LibtorchRLTrajectoryBuffer _trajectory_buffer;
+  /// Mini-batch sampler used to split flattened rollout data for PPO updates
+  const LibtorchRLMiniBatchSampler _sampler;
+  /// Helper that builds value targets and advantages from collected trajectories
+  const LibtorchRLValueEstimator _value_estimator;
+  /// PPO loss helper for the actor and critic updates
+  const LibtorchRLPPOLoss _ppo_loss;
 };
 
 #endif
diff --git a/modules/stochastic_tools/include/libtorch/transfers/SamplerDRLControlTransfer.h b/modules/stochastic_tools/include/libtorch/transfers/SamplerDRLControlTransfer.h
new file mode 100644
index 000000000000..0b97a480c159
--- /dev/null
+++ b/modules/stochastic_tools/include/libtorch/transfers/SamplerDRLControlTransfer.h
@@ -0,0 +1,56 @@
+//* This file is part of the MOOSE framework
+//* https://www.mooseframework.org
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#ifdef MOOSE_LIBTORCH_ENABLED
+
+#pragma once
+
+// torch-based includes
+#include "LibtorchDRLControlTrainer.h"
+
+#include "StochasticToolsTransfer.h"
+#include "SurrogateModelInterface.h"
+
+class SamplerDRLControlTransfer : public StochasticToolsTransfer, public SurrogateModelInterface
+{
+public:
+  static InputParameters validParams();
+
+  /**
+   * Build the transfer that pushes a trained controller into subapps.
+   * @param parameters Input parameters for the transfer.
+   */
+  SamplerDRLControlTransfer(const InputParameters & parameters);
+
+  /// Execute the transfer in the standard non-batch path.
+  virtual void execute() override;
+
+  ///@{
+  /**
+   * Methods used when running in batch mode (see SamplerFullSolveMultiApp).
+   */
+  virtual void initialSetup() override;
+  virtual void initializeFromMultiapp() override;
+  virtual void executeFromMultiapp() override;
+  virtual void finalizeFromMultiapp() override;
+
+  virtual void initializeToMultiapp() override;
+  virtual void executeToMultiapp() override;
+  virtual void finalizeToMultiapp() override;
+  ///@}
+
+protected:
+  /// The name of the control object on the other app where we want to copy our neural net
+  const std::string _control_name;
+
+  /// The trainer object which will contains the control neural net
+  const LibtorchDRLControlTrainer & _trainer;
+};
+
+#endif
diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h
new file mode 100644
index 000000000000..e1e8f04b4b55
--- /dev/null
+++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchActionDistribution.h
@@ -0,0 +1,269 @@
+//* This file is part of the MOOSE framework
+//* https://www.mooseframework.org
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#ifdef MOOSE_LIBTORCH_ENABLED
+
+#pragma once
+
+#include <torch/torch.h>
+
+#include "MooseTypes.h"
+
+#include <string>
+#include <vector>
+
+namespace Moose
+{
+
+/**
+ * Reusable continuous-action distribution interface for actor policies.
+ */
+class LibtorchActionDistribution : public torch::nn::Module
+{
+public:
+  /**
+   * Build an action-distribution module for an actor network.
+   * @param name Module name used for registration and serialization.
+   * @param num_inputs Number of actor features feeding the distribution.
+   * @param num_outputs Number of action dimensions produced by the distribution.
+   * @param device_type Torch device used by the module.
+   * @param scalar_type Torch scalar type used by the module.
+   * @param output_scaling_factors Optional per-action scaling applied in physical units.
+   */
+  LibtorchActionDistribution(const std::string & name,
+                             unsigned int num_inputs,
+                             unsigned int num_outputs,
+                             torch::DeviceType device_type = torch::kCPU,
+                             torch::ScalarType scalar_type = torch::kDouble,
+                             const std::vector<Real> & output_scaling_factors = {});
+
+  /// Initialize the trainable distribution parameters.
+  virtual void initialize(c10::optional<at::Generator> generator = c10::nullopt) = 0;
+
+  /**
+   * Refresh cached distribution parameters from the latest actor features.
+   * @param input Feature tensor coming from the actor body.
+   */
+  virtual void reset(const torch::Tensor & input) = 0;
+
+  /**
+   * Draw a stochastic action sample in physical units.
+   * @param generator Optional random-number generator used for sampling.
+   */
+  virtual torch::Tensor sample(c10::optional<at::Generator> generator = c10::nullopt) const = 0;
+
+  /// Return the deterministic action used for evaluation.
+  virtual torch::Tensor deterministicAction() const = 0;
+
+  /**
+   * Evaluate the log-probability of an action under the current distribution.
+   * @param action Action tensor in physical units.
+   */
+  virtual torch::Tensor logProbability(const torch::Tensor & action) const = 0;
+
+  /// Compute the entropy of the current distribution.
+  virtual torch::Tensor entropy() const = 0;
+
+  /// Tell callers whether the distribution enforces explicit action bounds.
+  virtual bool isBounded() const = 0;
+
+  /// Sync cached scaling metadata from the registered buffers after loading state.
+  void synchronizeScalingFactorsFromBuffer();
+
+protected:
+  /**
+   * Convert actor features to the configured device and scalar type.
+   * @param input Raw actor feature tensor.
+   */
+  torch::Tensor prepareFeatures(const torch::Tensor & input) const;
+  /**
+   * Convert actions to the configured device and scalar type.
+   * @param action Raw action tensor.
+   */
+  torch::Tensor prepareAction(const torch::Tensor & action) const;
+  /// Return the registered tensor that stores per-action scaling factors.
+  const torch::Tensor & actionScaleTensor() const { return _action_scale_tensor; }
+
+  /// Module name used for registration and serialization.
+  const std::string _name;
+  /// Number of actor features feeding this distribution.
+  const unsigned int _num_inputs;
+  /// Number of action dimensions produced by this distribution.
+  const unsigned int _num_outputs;
+  /// Torch device used by this distribution.
+  const torch::DeviceType _device_type;
+  /// Torch scalar type used by this distribution.
+  const torch::ScalarType _data_type;
+  /// Cached per-action scaling factors applied in physical units.
+  std::vector<Real> _output_scaling_factors;
+
+  /// Registered libtorch buffer holding the per-action scaling factors.
+  torch::Tensor _action_scale_tensor;
+};
+
+/**
+ * Gaussian action distribution for unbounded action spaces.
+ */
+class LibtorchGaussianActionDistribution : public LibtorchActionDistribution
+{
+public:
+  /**
+   * Build the Gaussian action distribution used for unbounded controls.
+   * @param name Module name used for registration and serialization.
+   * @param num_inputs Number of actor features feeding the distribution.
+   * @param num_outputs Number of action dimensions produced by the distribution.
+   * @param device_type Torch device used by the module.
+   * @param scalar_type Torch scalar type used by the module.
+   * @param build_on_construct Whether to build the torch modules right away.
+   * @param output_scaling_factors Optional per-action scaling applied in physical units.
+   * @param state_independent_std Whether the learned std should ignore the current state.
+   */
+  LibtorchGaussianActionDistribution(const std::string & name,
+                                     unsigned int num_inputs,
+                                     unsigned int num_outputs,
+                                     torch::DeviceType device_type = torch::kCPU,
+                                     torch::ScalarType scalar_type = torch::kDouble,
+                                     bool build_on_construct = true,
+                                     const std::vector<Real> & output_scaling_factors = {},
+                                     bool state_independent_std = true);
+
+  virtual void initialize(c10::optional<at::Generator> generator = c10::nullopt) override;
+
+  virtual void reset(const torch::Tensor & input) override;
+
+  virtual torch::Tensor
+  sample(c10::optional<at::Generator> generator = c10::nullopt) const override;
+
+  virtual torch::Tensor deterministicAction() const override;
+
+  virtual torch::Tensor logProbability(const torch::Tensor & action) const override;
+
+  virtual torch::Tensor entropy() const override;
+
+  virtual bool isBounded() const override { return false; }
+
+  /// Return whether the Gaussian std ignores the current actor features.
+  bool stateIndependentStd() const { return _state_independent_std; }
+  /// Return the Gaussian mean head.
+  torch::nn::Linear & meanModule() { return _mean_module; }
+  /// Return the Gaussian mean head.
+  const torch::nn::Linear & meanModule() const { return _mean_module; }
+  /// Return the Gaussian std head.
+  torch::nn::Linear & stdModule() { return _std_module; }
+  /// Return the Gaussian std head.
+  const torch::nn::Linear & stdModule() const { return _std_module; }
+  /// Return the cached Gaussian standard deviation tensor.
+  const torch::Tensor & stdTensor() const { return _std_tensor; }
+
+private:
+  /// Build and register the Gaussian distribution heads.
+  void constructDistribution();
+
+  /// Whether the Gaussian std ignores the current actor features.
+  const bool _state_independent_std;
+  /// Linear head that produces the Gaussian action mean.
+  torch::nn::Linear _mean_module{nullptr};
+  /// Linear head that produces the Gaussian log-std inputs or bias-only std state.
+  torch::nn::Linear _std_module{nullptr};
+  /// Cached Gaussian action mean from the latest reset.
+  torch::Tensor _mean;
+  /// Cached Gaussian action standard deviation from the latest reset.
+  torch::Tensor _std_tensor;
+  /// Cached Gaussian action log standard deviation from the latest reset.
+  torch::Tensor _log_std_tensor;
+};
+
+/**
+ * Beta action distribution for bounded action spaces.
+ */
+class LibtorchBetaActionDistribution : public LibtorchActionDistribution
+{
+public:
+  /**
+   * Build the Beta action distribution used for bounded controls.
+   * @param name Module name used for registration and serialization.
+   * @param num_inputs Number of actor features feeding the distribution.
+   * @param num_outputs Number of action dimensions produced by the distribution.
+   * @param minimum_values Lower action bounds in physical units.
+   * @param maximum_values Upper action bounds in physical units.
+   * @param device_type Torch device used by the module.
+   * @param scalar_type Torch scalar type used by the module.
+   * @param build_on_construct Whether to build the torch modules right away.
+   * @param output_scaling_factors Optional extra per-action scaling in physical units.
+   */
+  LibtorchBetaActionDistribution(const std::string & name,
+                                 unsigned int num_inputs,
+                                 unsigned int num_outputs,
+                                 const std::vector<Real> & minimum_values,
+                                 const std::vector<Real> & maximum_values,
+                                 torch::DeviceType device_type = torch::kCPU,
+                                 torch::ScalarType scalar_type = torch::kDouble,
+                                 bool build_on_construct = true,
+                                 const std::vector<Real> & output_scaling_factors = {});
+
+  virtual void initialize(c10::optional<at::Generator> generator = c10::nullopt) override;
+
+  virtual void reset(const torch::Tensor & input) override;
+
+  virtual torch::Tensor
+  sample(c10::optional<at::Generator> generator = c10::nullopt) const override;
+
+  virtual torch::Tensor deterministicAction() const override;
+
+  virtual torch::Tensor logProbability(const torch::Tensor & action) const override;
+
+  virtual torch::Tensor entropy() const override;
+
+  virtual bool isBounded() const override { return true; }
+
+  /// Return the Beta alpha head.
+  torch::nn::Linear & alphaModule() { return _alpha_module; }
+  /// Return the Beta alpha head.
+  const torch::nn::Linear & alphaModule() const { return _alpha_module; }
+  /// Return the Beta beta head.
+  torch::nn::Linear & betaModule() { return _beta_module; }
+  /// Return the Beta beta head.
+  const torch::nn::Linear & betaModule() const { return _beta_module; }
+  /// Return the cached Beta alpha tensor.
+  const torch::Tensor & alphaTensor() const { return _alpha_tensor; }
+  /// Return the cached Beta beta tensor.
+  const torch::Tensor & betaTensor() const { return _beta_tensor; }
+
+private:
+  /// Build and register the Beta distribution heads.
+  void constructDistribution();
+
+  /// Lower action bounds in physical units.
+  const std::vector<Real> _minimum_values;
+  /// Upper action bounds in physical units.
+  const std::vector<Real> _maximum_values;
+
+  /// Linear head that produces the Beta alpha parameters.
+  torch::nn::Linear _alpha_module{nullptr};
+  /// Linear head that produces the Beta beta parameters.
+  torch::nn::Linear _beta_module{nullptr};
+  /// Tensor form of the lower action bounds.
+  torch::Tensor _min_tensor;
+  /// Tensor form of the upper action bounds.
+  torch::Tensor _max_tensor;
+  /// Cached Beta alpha parameters from the latest reset.
+  torch::Tensor _alpha_tensor;
+  /// Cached Beta beta parameters from the latest reset.
+  torch::Tensor _beta_tensor;
+  /// Cached sum of the alpha and beta parameters.
+  torch::Tensor _alpha_beta_tensor;
+  /// Cached log normalization factor for the latest Beta distribution state.
+  torch::Tensor _log_norm;
+  /// Cached normalized Beta mean from the latest reset.
+  torch::Tensor _mean;
+};
+
+} // namespace Moose
+
+#endif
diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h
new file mode 100644
index 000000000000..774cb198b051
--- /dev/null
+++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchActorNeuralNet.h
@@ -0,0 +1,210 @@
+//* This file is part of the MOOSE framework
+//* https://www.mooseframework.org
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#ifdef MOOSE_LIBTORCH_ENABLED
+
+#pragma once
+
+#include <torch/torch.h>
+#include "LibtorchActionDistribution.h"
+#include "LibtorchArtificialNeuralNet.h"
+
+namespace Moose
+{
+
+/**
+ * Feed-forward actor network coupled to a Gaussian or Beta action distribution.
+ */
+class LibtorchActorNeuralNet : public LibtorchArtificialNeuralNet
+{
+public:
+  /**
+   * Build an actor network with either a Gaussian or Beta action distribution.
+   * @param name Name of the neural network module.
+   * @param num_inputs Number of actor inputs.
+   * @param num_outputs Number of action outputs.
+   * @param num_neurons_per_layer Hidden-layer widths.
+   * @param activation_function Hidden-layer activation names.
+   * @param minimum_values Optional lower action bounds. Leave empty for Gaussian actions.
+   * @param maximum_values Optional upper action bounds. Leave empty for Gaussian actions.
+   * @param device_type Torch device used by the module.
+   * @param scalar_type Torch scalar type used by the module.
+   * @param build_on_construct Whether to build the torch modules right away.
+   * @param input_shift_factors Optional affine input shifts.
+   * @param input_scaling_factors Optional affine input scales.
+   * @param output_scaling_factors Optional per-action scaling in physical units.
+   * @param state_independent_std Whether the Gaussian std should ignore the current state.
+   */
+  LibtorchActorNeuralNet(const std::string name,
+                         const unsigned int num_inputs,
+                         const unsigned int num_outputs,
+                         const std::vector<unsigned int> & num_neurons_per_layer,
+                         const std::vector<std::string> & activation_function = {"relu"},
+                         const std::vector<Real> & minimum_values = {},
+                         const std::vector<Real> & maximum_values = {},
+                         const torch::DeviceType device_type = torch::kCPU,
+                         const torch::ScalarType scalar_type = torch::kDouble,
+                         const bool build_on_construct = true,
+                         const std::vector<Real> & input_shift_factors = {},
+                         const std::vector<Real> & input_scaling_factors = {},
+                         const std::vector<Real> & output_scaling_factors = {},
+                         const bool state_independent_std = true);
+
+  /**
+   * Copy-construct an actor network.
+   * @param nn Actor network to copy.
+   * @param build_on_construct Whether to rebuild the module structure during the copy.
+   */
+  LibtorchActorNeuralNet(const Moose::LibtorchActorNeuralNet & nn,
+                         const bool build_on_construct = true);
+
+  virtual torch::Tensor forward(const torch::Tensor & x) override;
+
+  /**
+   * Evaluate the actor and either sample from it or use its deterministic action.
+   * @param input Input tensor for the evaluation.
+   * @param sampled Whether to draw a stochastic sample.
+   * @param generator Optional random-number generator used for sampling.
+   */
+  virtual torch::Tensor evaluate(torch::Tensor & input,
+                                 bool sampled,
+                                 c10::optional<at::Generator> generator = c10::nullopt);
+
+  /**
+   * Sample an action from the already-reset distribution.
+   * @param generator Optional random-number generator used for sampling.
+   */
+  virtual torch::Tensor sample(c10::optional<at::Generator> generator = c10::nullopt);
+
+  virtual void constructNeuralNetwork() override;
+
+  /// Return the active action distribution as the common base type.
+  const LibtorchActionDistribution & actionDistribution() const { return *_action_distribution; }
+  /// Return the active action distribution as the common base type.
+  LibtorchActionDistribution & actionDistribution() { return *_action_distribution; }
+
+  /// Return the Gaussian action distribution pointer, or nullptr for bounded actors.
+  const LibtorchGaussianActionDistribution * gaussianActionDistributionPtr() const;
+  /// Return the Gaussian action distribution pointer, or nullptr for bounded actors.
+  LibtorchGaussianActionDistribution * gaussianActionDistributionPtr();
+  /// Return the Gaussian action distribution reference. Errors if the actor is bounded.
+  const LibtorchGaussianActionDistribution & gaussianActionDistribution() const;
+  /// Return the Gaussian action distribution reference. Errors if the actor is bounded.
+  LibtorchGaussianActionDistribution & gaussianActionDistribution();
+
+  /// Return the Beta action distribution pointer, or nullptr for Gaussian actors.
+  const LibtorchBetaActionDistribution * betaActionDistributionPtr() const;
+  /// Return the Beta action distribution pointer, or nullptr for Gaussian actors.
+  LibtorchBetaActionDistribution * betaActionDistributionPtr();
+  /// Return the Beta action distribution reference. Errors if the actor is unbounded.
+  const LibtorchBetaActionDistribution & betaActionDistribution() const;
+  /// Return the Beta action distribution reference. Errors if the actor is unbounded.
+  LibtorchBetaActionDistribution & betaActionDistribution();
+
+  /// Return whether the Gaussian std ignores the current actor features.
+  bool stateIndependentStd() const { return _state_independent_std; }
+  /// Return the configured lower action bounds.
+  const std::vector<Real> & minValues() const { return _minimum_values; }
+  /// Return the configured upper action bounds.
+  const std::vector<Real> & maxValues() const { return _maximum_values; }
+
+  /**
+   * Refresh the cached distribution parameters from a fresh input tensor.
+   * @param input Input tensor used to update the distribution.
+   */
+  void resetDistributionParams(torch::Tensor input);
+
+  /**
+   * Evaluate the log-probability of an action under the current actor state.
+   * @param other Action tensor in physical units.
+   */
+  torch::Tensor logProbability(const torch::Tensor & other);
+
+  /// Compute the entropy of the current action distribution.
+  torch::Tensor entropy();
+
+  virtual void
+  initializeNeuralNetwork(c10::optional<at::Generator> generator = c10::nullopt) override;
+
+protected:
+  /// Lower action bounds used by bounded actor distributions.
+  const std::vector<Real> _minimum_values;
+  /// Upper action bounds used by bounded actor distributions.
+  const std::vector<Real> _maximum_values;
+  /// Whether the Gaussian std ignores the current actor features.
+  const bool _state_independent_std;
+  /// Action-distribution module attached to the actor output.
+  std::shared_ptr<LibtorchActionDistribution> _action_distribution;
+};
+
+/**
+ * Dump an actor network into JSON for reporter output and debugging.
+ * @param json JSON object that receives the serialized state.
+ * @param network Actor network pointer to serialize.
+ */
+void to_json(nlohmann::json & json, const Moose::LibtorchActorNeuralNet * const & network);
+
+/**
+ * Load an actor checkpoint written as a native libtorch state archive.
+ * @param nn Actor network that receives the loaded state.
+ * @param filename Checkpoint file to read.
+ */
+void loadLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn,
+                                     const std::string & filename);
+
+}
+
+/**
+ * Serialize the actor-network metadata needed for restart.
+ * @param stream Stream that receives the serialized data.
+ * @param nn Actor network shared pointer to serialize.
+ * @param context Serialization context passed through MOOSE data I/O.
+ */
+template <>
+void dataStore<Moose::LibtorchActorNeuralNet>(std::ostream & stream,
+                                              std::shared_ptr<Moose::LibtorchActorNeuralNet> & nn,
+                                              void * context);
+
+/**
+ * Deserialize the actor-network metadata needed for restart.
+ * @param stream Stream that provides the serialized data.
+ * @param nn Actor network shared pointer to populate.
+ * @param context Serialization context passed through MOOSE data I/O.
+ */
+template <>
+void dataLoad<Moose::LibtorchActorNeuralNet>(std::istream & stream,
+                                             std::shared_ptr<Moose::LibtorchActorNeuralNet> & nn,
+                                             void * context);
+
+// This is needed because the reporter which is used to output the neural net parameters to JSON
+// requires a dataStore/dataLoad. However, these functions will be empty due to the fact that
+// we are only interested in the JSON output and we don't want to output everything
+/**
+ * Placeholder serializer for reporter-only actor pointers.
+ * @param stream Stream that would receive the serialized data.
+ * @param nn Reporter actor pointer.
+ * @param context Serialization context passed through MOOSE data I/O.
+ */
+template <>
+void dataStore<Moose::LibtorchActorNeuralNet const>(std::ostream & stream,
+                                                    Moose::LibtorchActorNeuralNet const *& nn,
+                                                    void * context);
+
+/**
+ * Placeholder deserializer for reporter-only actor pointers.
+ * @param stream Stream that would provide the serialized data.
+ * @param nn Reporter actor pointer.
+ * @param context Serialization context passed through MOOSE data I/O.
+ */
+template <>
+void dataLoad<Moose::LibtorchActorNeuralNet const>(std::istream & stream,
+                                                   Moose::LibtorchActorNeuralNet const *& nn,
+                                                   void * context);
+
+#endif
diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h
new file mode 100644
index 000000000000..8ea0ba65a1e0
--- /dev/null
+++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLMiniBatchSampler.h
@@ -0,0 +1,75 @@
+//* This file is part of the MOOSE framework
+//* https://mooseframework.inl.gov
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#ifdef MOOSE_LIBTORCH_ENABLED
+
+#pragma once
+
+#include "LibtorchRLTrajectoryBuffer.h"
+
+#include <torch/torch.h>
+
+#include <cstdint>
+#include <vector>
+
+struct LibtorchRLMiniBatch
+{
+  /// Flattened observation rows for the mini-batch.
+  torch::Tensor observations;
+  /// Action rows that match the sampled observations.
+  torch::Tensor actions;
+  /// Behavior-policy log probabilities for the sampled actions.
+  torch::Tensor old_log_probabilities;
+  /// Critic targets aligned with the sampled observations.
+  torch::Tensor value_targets;
+  /// Advantage estimates aligned with the sampled observations.
+  torch::Tensor advantages;
+
+  /// Return the number of rows stored in the mini-batch.
+  std::int64_t size() const { return observations.defined() ? observations.size(0) : 0; }
+};
+
+/**
+ * Samples shuffled mini-batches from a flattened on-policy trajectory batch.
+ */
+class LibtorchRLMiniBatchSampler
+{
+public:
+  /**
+   * Shuffle a flattened rollout batch into PPO-sized chunks.
+   * @param batch Flattened rollout tensors ready for PPO updates.
+   * @param batch_size Preferred number of rows per mini-batch.
+   * @param standardize_advantage Whether to normalize the advantages inside each mini-batch.
+   * @param generator Optional random-number generator used for the row permutation.
+   */
+  std::vector<LibtorchRLMiniBatch>
+  sample(const LibtorchRLTrajectoryBuffer::TensorBatch & batch,
+         unsigned int batch_size,
+         bool standardize_advantage,
+         c10::optional<at::Generator> generator = c10::nullopt) const;
+
+private:
+  /**
+   * Sanity-check that the flattened rollout tensors all line up.
+   * @param batch Flattened rollout tensors to validate.
+   */
+  static void validateBatch(const LibtorchRLTrajectoryBuffer::TensorBatch & batch);
+
+  /**
+   * Slice one shuffled mini-batch out of the flattened rollout tensors.
+   * @param batch Flattened rollout tensors.
+   * @param indices Row indices assigned to this mini-batch.
+   * @param standardize_advantage Whether to normalize the advantages in this slice.
+   */
+  static LibtorchRLMiniBatch makeMiniBatch(const LibtorchRLTrajectoryBuffer::TensorBatch & batch,
+                                           const torch::Tensor & indices,
+                                           bool standardize_advantage);
+};
+
+#endif
diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLPPOLoss.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLPPOLoss.h
new file mode 100644
index 000000000000..8e526138bfc8
--- /dev/null
+++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLPPOLoss.h
@@ -0,0 +1,64 @@
+//* This file is part of the MOOSE framework
+//* https://mooseframework.inl.gov
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#ifdef MOOSE_LIBTORCH_ENABLED
+
+#pragma once
+
+#include "LibtorchActorNeuralNet.h"
+#include "LibtorchArtificialNeuralNet.h"
+#include "LibtorchRLMiniBatchSampler.h"
+
+struct LibtorchRLPPOLossOutput
+{
+  /// Clipped actor loss for the current mini-batch.
+  torch::Tensor actor_loss;
+  /// Critic regression loss for the current mini-batch.
+  torch::Tensor critic_loss;
+  /// Mean action-distribution entropy for the current mini-batch.
+  torch::Tensor entropy;
+};
+
+/**
+ * PPO clipped surrogate loss on top of the reusable RL buffer/value-estimation core.
+ */
+class LibtorchRLPPOLoss
+{
+public:
+  /**
+   * Build the PPO loss helper.
+   * @param clip_parameter PPO clipping width.
+   * @param entropy_coeff Weight applied to the entropy bonus.
+   */
+  LibtorchRLPPOLoss(Real clip_parameter, Real entropy_coeff);
+
+  /**
+   * Compute actor, critic, and entropy terms for one mini-batch.
+   * @param policy_network Actor network used for the policy term.
+   * @param value_network Critic network used for the value term.
+   * @param batch Mini-batch pulled from the on-policy trajectory buffer.
+   */
+  LibtorchRLPPOLossOutput compute(Moose::LibtorchActorNeuralNet & policy_network,
+                                  Moose::LibtorchArtificialNeuralNet & value_network,
+                                  const LibtorchRLMiniBatch & batch) const;
+
+private:
+  /**
+   * Collapse multi-action log-probabilities or entropies into one column tensor.
+   * @param tensor Action-wise tensor to reduce.
+   */
+  static torch::Tensor reduceActionDimension(const torch::Tensor & tensor);
+
+  /// PPO clipping width used in the actor surrogate objective.
+  const Real _clip_parameter;
+  /// Weight applied to the entropy bonus inside the actor loss.
+  const Real _entropy_coeff;
+};
+
+#endif
diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLTrajectoryBuffer.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLTrajectoryBuffer.h
new file mode 100644
index 000000000000..ffd8e88fa7cf
--- /dev/null
+++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLTrajectoryBuffer.h
@@ -0,0 +1,104 @@
+//* This file is part of the MOOSE framework
+//* https://mooseframework.inl.gov
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#ifdef MOOSE_LIBTORCH_ENABLED
+
+#pragma once
+
+#include "MooseTypes.h"
+
+#include <torch/torch.h>
+
+#include <cstdint>
+#include <vector>
+
+/**
+ * On-policy trajectory storage for fixed-horizon RL training.
+ */
+class LibtorchRLTrajectoryBuffer
+{
+public:
+  struct Trajectory
+  {
+    /// Observations for each transition.
+    std::vector<std::vector<Real>> observations;
+    /// Next observations for each transition.
+    std::vector<std::vector<Real>> next_observations;
+    /// Actions applied at each transition.
+    std::vector<std::vector<Real>> actions;
+    /// Action log-probabilities recorded during rollout.
+    std::vector<std::vector<Real>> log_probabilities;
+    /// Scalar rewards for each transition.
+    std::vector<Real> rewards;
+    /// Critic targets aligned with each transition.
+    std::vector<Real> value_targets;
+    /// Advantage estimates aligned with each transition.
+    std::vector<Real> advantages;
+  };
+
+  struct TensorBatch
+  {
+    /// Flattened observation matrix.
+    torch::Tensor observations;
+    /// Flattened next-observation matrix.
+    torch::Tensor next_observations;
+    /// Flattened action matrix.
+    torch::Tensor actions;
+    /// Flattened action log-probabilities.
+    torch::Tensor log_probabilities;
+    /// Flattened rewards.
+    torch::Tensor rewards;
+    /// Flattened critic targets.
+    torch::Tensor value_targets;
+    /// Flattened advantages.
+    torch::Tensor advantages;
+
+    /// Return the number of transitions represented by the batch.
+    std::int64_t size() const { return observations.defined() ? observations.size(0) : 0; }
+  };
+
+  /**
+   * Append one trajectory to the on-policy buffer.
+   * @param trajectory Trajectory to store.
+   */
+  void addTrajectory(Trajectory trajectory);
+
+  /// Clear every stored trajectory.
+  void clear();
+
+  bool empty() const { return _trajectories.empty(); }
+
+  std::size_t numTrajectories() const { return _trajectories.size(); }
+
+  /**
+   * Count the total number of transitions stored across every trajectory.
+   * @return Total transition count.
+   */
+  std::size_t numTransitions() const;
+
+  std::vector<Trajectory> & trajectories() { return _trajectories; }
+  const std::vector<Trajectory> & trajectories() const { return _trajectories; }
+
+  /**
+   * Flatten every stored trajectory into one tensor batch.
+   * @return Tensor batch ready for mini-batch sampling.
+   */
+  TensorBatch flatten() const;
+
+private:
+  /**
+   * Validate a trajectory before it is stored.
+   * @param trajectory Trajectory to validate.
+   */
+  static void validateTrajectory(const Trajectory & trajectory);
+
+  std::vector<Trajectory> _trajectories;
+};
+
+#endif
diff --git a/modules/stochastic_tools/include/libtorch/utils/LibtorchRLValueEstimator.h b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLValueEstimator.h
new file mode 100644
index 000000000000..020683b46ba0
--- /dev/null
+++ b/modules/stochastic_tools/include/libtorch/utils/LibtorchRLValueEstimator.h
@@ -0,0 +1,73 @@
+//* This file is part of the MOOSE framework
+//* https://mooseframework.inl.gov
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#ifdef MOOSE_LIBTORCH_ENABLED
+
+#pragma once
+
+#include "LibtorchArtificialNeuralNet.h"
+#include "LibtorchRLTrajectoryBuffer.h"
+
+#include <vector>
+
+/**
+ * Computes generalized-advantage estimates and value targets for an on-policy trajectory buffer,
+ * following Schulman et al., "High-Dimensional Continuous Control Using Generalized Advantage
+ * Estimation."
+ */
+class LibtorchRLValueEstimator
+{
+public:
+  struct Targets
+  {
+    /// Generalized-advantage estimates.
+    std::vector<Real> advantages;
+    /// Critic regression targets.
+    std::vector<Real> value_targets;
+  };
+
+  /**
+   * Build the GAE helper.
+   * @param discount_factor Reward discount factor.
+   * @param lambda_factor GAE lambda factor.
+   */
+  LibtorchRLValueEstimator(Real discount_factor, Real lambda_factor);
+
+  /**
+   * Fill every trajectory in the buffer with value targets and advantages.
+   * @param buffer On-policy trajectory buffer to update.
+   * @param value_network Critic used for target estimation.
+   */
+  void computeValueTargets(LibtorchRLTrajectoryBuffer & buffer,
+                           Moose::LibtorchArtificialNeuralNet & value_network) const;
+
+  /**
+   * Compute value targets and advantages for one trajectory.
+   * @param trajectory Trajectory to evaluate.
+   * @param value_network Critic used for target estimation.
+   */
+  Targets estimate(const LibtorchRLTrajectoryBuffer::Trajectory & trajectory,
+                   Moose::LibtorchArtificialNeuralNet & value_network) const;
+
+private:
+  /**
+   * Evaluate the critic on a batch of observations.
+   * @param observations Observation matrix to feed through the critic.
+   * @param value_network Critic used for the evaluation.
+   */
+  std::vector<Real> evaluate(const std::vector<std::vector<Real>> & observations,
+                             Moose::LibtorchArtificialNeuralNet & value_network) const;
+
+  /// Reward discount factor used in the temporal-difference recursion.
+  const Real _discount_factor;
+  /// GAE lambda factor used in the reverse-time advantage recursion.
+  const Real _lambda_factor;
+};
+
+#endif
diff --git a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C
index 6d9c313b6a12..4bd5f2ef6441 100644
--- a/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C
+++ b/modules/stochastic_tools/src/libtorch/controls/LibtorchDRLControl.C
@@ -10,9 +10,11 @@
 #ifdef MOOSE_LIBTORCH_ENABLED
 
 #include "LibtorchDRLControl.h"
-#include "TorchScriptModule.h"
-#include "LibtorchArtificialNeuralNet.h"
+#include "LibtorchRandomUtils.h"
 #include "Transient.h"
+#include "LibtorchUtils.h"
+
+#include <mutex>
 
 registerMooseObject("StochasticToolsApp", LibtorchDRLControl);
 
@@ -23,93 +25,237 @@ LibtorchDRLControl::validParams()
   params.addClassDescription(
       "Sets the value of multiple 'Real' input parameters and postprocessors based on a Deep "
       "Reinforcement Learning (DRL) neural network trained using a PPO algorithm.");
-  params.addRequiredParam<std::vector<Real>>(
-      "action_standard_deviations", "Standard deviation value used while sampling the actions.");
+  params.set<ExecFlagEnum>("execute_on") = EXEC_TIMESTEP_BEGIN;
+  params.suppressParameter<bool>("torch_script_format");
+
   params.addParam<unsigned int>("seed", "Seed for the random number generator.");
 
+  params.addRangeCheckedParam<unsigned int>(
+      "num_steps_in_period",
+      1,
+      "1<=num_steps_in_period",
+      "Number of controller executions between policy evaluations.");
+  params.addParam<Real>(
+      "smoother", 1.0, "Relaxation factor applied when smoothing control updates.");
+
+  params.addParam<bool>(
+      "stochastic",
+      true,
+      "If true, sample from the policy distribution; otherwise use the deterministic action.");
+
+  params.addParam<std::vector<Real>>(
+      "min_control_value", {}, "Optional lower bounds for each control signal.");
+  params.addParam<std::vector<Real>>(
+      "max_control_value", {}, "Optional upper bounds for each control signal.");
+  params.addParam<bool>(
+      "state_independent_std",
+      true,
+      "If true, interpret the unbounded Gaussian actor as learning one log-std per action "
+      "dimension. If false, use a state-dependent std head.");
+
   return params;
 }
 
 LibtorchDRLControl::LibtorchDRLControl(const InputParameters & parameters)
   : LibtorchNeuralNetControl(parameters),
-    _current_control_signal_log_probabilities(std::vector<Real>(_control_names.size(), 0.0)),
-    _action_std(getParam<std::vector<Real>>("action_standard_deviations"))
+    _current_control_signal_log_probabilities(declareRestartableData<std::vector<Real>>(
+        "current_control_signal_log_probabilities", std::vector<Real>(_control_names.size(), 0.0))),
+    _previous_control_signal(declareRestartableData<std::vector<Real>>(
+        "previous_control_signal", std::vector<Real>(_control_names.size(), 0.0))),
+    _current_smoothed_signal(declareRestartableData<std::vector<Real>>(
+        "current_smoothed_signal", std::vector<Real>(_control_names.size(), 0.0))),
+    _policy_generator(Moose::makeLibtorchCPUGenerator()),
+    _policy_generator_state(declareRestartableData<std::vector<std::uint8_t>>(
+        "policy_generator_state", std::vector<std::uint8_t>())),
+    _executions_until_next_policy_evaluation(
+        declareRestartableData<unsigned int>("executions_until_next_policy_evaluation", 0)),
+    _num_steps_in_period(getParam<unsigned int>("num_steps_in_period")),
+    _smoother(getParam<Real>("smoother")),
+    _stochastic(getParam<bool>("stochastic"))
 {
-  if (_control_names.size() != _action_std.size())
-    paramError("action_standard_deviations",
-               "Number of action_standard_deviations does not match the number of controlled "
-               "parameters.");
+  const auto & execute_on = getParam<ExecFlagEnum>("execute_on");
+  if (execute_on.size() != 1 || !execute_on.contains(EXEC_TIMESTEP_BEGIN))
+    paramError("execute_on", "LibtorchDRLControl only supports 'TIMESTEP_BEGIN' for 'execute_on'.");
 
-  // Fixing the RNG seed to make sure every experiment is the same.
   if (isParamValid("seed"))
-    torch::manual_seed(getParam<unsigned int>("seed"));
+    setPolicySampleSeed(getParam<unsigned int>("seed"));
+
+  savePolicyGeneratorState();
+}
 
-  // We convert and store the user-supplied standard deviations into a tensor which can be easily
-  // used by routines in libtorch
-  _std = torch::eye(_control_names.size());
-  for (unsigned int i = 0; i < _control_names.size(); ++i)
-    _std[i][i] = _action_std[i];
+void
+LibtorchDRLControl::initialSetup()
+{
+  LibtorchNeuralNetControl::initialSetup();
+  restorePolicyGeneratorState();
+  savePolicyGeneratorState();
+}
+
+void
+LibtorchDRLControl::loadControlNeuralNetFromFile()
+{
+  const auto & filename = getParam<std::string>("filename");
+  unsigned int num_inputs = _observation_names.size() * _input_timesteps;
+  unsigned int num_outputs = _control_names.size();
+  std::vector<unsigned int> num_neurons_per_layer =
+      getParam<std::vector<unsigned int>>("num_neurons_per_layer");
+  std::vector<std::string> activation_functions =
+      isParamSetByUser("activation_function")
+          ? getParam<std::vector<std::string>>("activation_function")
+          : std::vector<std::string>({"relu"});
+
+  const std::vector<Real> & minimum_values = getParam<std::vector<Real>>("min_control_value");
+  const std::vector<Real> & maximum_values = getParam<std::vector<Real>>("max_control_value");
+  const auto input_shift_factors =
+      _observation_history.expandObservationFactors(_observation_shift_factors);
+  const auto input_scaling_factors =
+      _observation_history.expandObservationFactors(_observation_scaling_factors);
+
+  _actor_nn =
+      std::make_shared<Moose::LibtorchActorNeuralNet>(filename,
+                                                      num_inputs,
+                                                      num_outputs,
+                                                      num_neurons_per_layer,
+                                                      activation_functions,
+                                                      minimum_values,
+                                                      maximum_values,
+                                                      torch::kCPU,
+                                                      torch::kDouble,
+                                                      true,
+                                                      input_shift_factors,
+                                                      input_scaling_factors,
+                                                      _action_scaling_factors,
+                                                      getParam<bool>("state_independent_std"));
+
+  Moose::loadLibtorchActorNeuralNetState(*_actor_nn, filename);
+  _nn = _actor_nn;
 }
 
 void
 LibtorchDRLControl::execute()
 {
-  if (_nn)
+  if (!_actor_nn)
   {
-    unsigned int n_controls = _control_names.size();
-    unsigned int num_old_timesteps = _input_timesteps - 1;
+    mooseAssert(!_nn, "LibtorchDRLControl should not store a non-actor controller network.");
+    return;
+  }
 
-    // Fill a vector with the current values of the responses
-    updateCurrentResponse();
+  mooseAssert(_current_execute_flag == EXEC_TIMESTEP_BEGIN,
+              "LibtorchDRLControl should only execute on TIMESTEP_BEGIN.");
 
-    // If this is the first time this control is called and we need to use older values, fill up the
-    // needed old values using the initial values
-    if (_old_responses.empty())
-      _old_responses.assign(num_old_timesteps, _current_response);
+  const unsigned int n_controls = _control_names.size();
+  const bool first_control_execution = _old_observations.empty();
 
-    // Organize the old an current solution into a tensor so we can evaluate the neural net
-    torch::Tensor input_tensor = prepareInputTensor();
+  // Fill a vector with the current observation values.
+  updateCurrentObservation();
 
-    // Evaluate the neural network to get the expected control value
-    torch::Tensor output_tensor = _nn->forward(input_tensor);
+  // Seed the observation history with the initial observation when the control first runs.
+  if (first_control_execution)
+    _observation_history.initializeHistory(_current_observation, _old_observations);
 
-    // Sample control value (action) from Gaussian distribution
-    torch::Tensor action = at::normal(output_tensor, _std);
+  if (shouldEvaluatePolicy())
+  {
+    torch::Tensor input_tensor = prepareInputTensor();
+    torch::Tensor action = _actor_nn->evaluate(input_tensor, _stochastic, _policy_generator);
+    savePolicyGeneratorState();
 
-    // Compute log probability
-    torch::Tensor log_probability = computeLogProbability(action, output_tensor);
+    if (_stochastic)
+    {
+      torch::Tensor log_probability = _actor_nn->logProbability(action);
+      _current_control_signal_log_probabilities = {log_probability.data_ptr<Real>(),
+                                                   log_probability.data_ptr<Real>() +
+                                                       log_probability.size(1)};
+    }
+    else
+      _current_control_signal_log_probabilities.assign(n_controls, 0.0);
 
-    // Convert data
     _current_control_signals = {action.data_ptr<Real>(), action.data_ptr<Real>() + action.size(1)};
 
-    _current_control_signal_log_probabilities = {log_probability.data_ptr<Real>(),
-                                                 log_probability.data_ptr<Real>() +
-                                                     log_probability.size(1)};
+    if (first_control_execution)
+      _current_smoothed_signal = _current_control_signals;
+  }
 
-    for (unsigned int control_i = 0; control_i < n_controls; ++control_i)
-    {
-      // We scale the controllable value for physically meaningful control action
-      setControllableValueByName<Real>(_control_names[control_i],
-                                       _current_control_signals[control_i] *
-                                           _action_scaling_factors[control_i]);
-    }
+  _previous_control_signal = _current_smoothed_signal;
+
+  for (const auto i : index_range(_current_smoothed_signal))
+    _current_smoothed_signal[i] =
+        _previous_control_signal[i] +
+        _smoother * (_current_control_signals[i] - _previous_control_signal[i]);
+
+  for (unsigned int control_i = 0; control_i < n_controls; ++control_i)
+    setControllableValueByName<Real>(_control_names[control_i],
+                                     _current_smoothed_signal[control_i]);
 
-    // We add the curent solution to the old solutions and move everything in there one step
-    // backward
-    std::rotate(_old_responses.rbegin(), _old_responses.rbegin() + 1, _old_responses.rend());
-    _old_responses[0] = _current_response;
+  _observation_history.advanceHistory(_current_observation, _old_observations);
+}
+
+void
+LibtorchDRLControl::loadControlNeuralNet(const Moose::LibtorchActorNeuralNet & input_nn)
+{
+  _actor_nn = std::make_shared<Moose::LibtorchActorNeuralNet>(input_nn);
+  _nn = _actor_nn;
+}
+
+void
+LibtorchDRLControl::loadControlNeuralNet(const Moose::LibtorchArtificialNeuralNet & input_nn)
+{
+  const auto * check = dynamic_cast<const Moose::LibtorchActorNeuralNet *>(&input_nn);
+  if (!check)
+    mooseError("This needs to be a LibtorchActorNeuralNet!");
+  loadControlNeuralNet(*check);
+}
+
+void
+LibtorchDRLControl::setPolicySampleSeed(const uint64_t seed)
+{
+  {
+    std::lock_guard<std::mutex> lock(_policy_generator.mutex());
+    _policy_generator.set_current_seed(seed);
   }
+  savePolicyGeneratorState();
 }
 
-torch::Tensor
-LibtorchDRLControl::computeLogProbability(const torch::Tensor & action,
-                                          const torch::Tensor & output_tensor)
+bool
+LibtorchDRLControl::shouldEvaluatePolicy()
 {
-  // Logarithmic probability of taken action, given the current distribution.
-  torch::Tensor var = torch::matmul(_std, _std);
+  if (_executions_until_next_policy_evaluation == 0)
+  {
+    _executions_until_next_policy_evaluation = _num_steps_in_period - 1;
+    return true;
+  }
+
+  --_executions_until_next_policy_evaluation;
+  return false;
+}
+
+void
+LibtorchDRLControl::restorePolicyGeneratorState()
+{
+  if (!_stochastic || _policy_generator_state.empty())
+    return;
+
+  auto state_tensor =
+      torch::from_blob(_policy_generator_state.data(),
+                       {static_cast<long>(_policy_generator_state.size())},
+                       torch::TensorOptions().dtype(torch::kUInt8).device(torch::kCPU))
+          .clone();
+  std::lock_guard<std::mutex> lock(_policy_generator.mutex());
+  _policy_generator.set_state(state_tensor);
+}
+
+void
+LibtorchDRLControl::savePolicyGeneratorState()
+{
+  if (!_stochastic)
+  {
+    _policy_generator_state.clear();
+    return;
+  }
 
-  return -((action - output_tensor) * (action - output_tensor)) / (2.0 * var) - torch::log(_std) -
-         std::log(std::sqrt(2.0 * M_PI));
+  std::lock_guard<std::mutex> lock(_policy_generator.mutex());
+  const auto state_tensor = _policy_generator.get_state().contiguous();
+  const auto * data = state_tensor.data_ptr<std::uint8_t>();
+  _policy_generator_state.assign(data, data + static_cast<std::size_t>(state_tensor.numel()));
 }
 
 Real
diff --git a/modules/stochastic_tools/src/libtorch/reporters/DRLRewardReporter.C b/modules/stochastic_tools/src/libtorch/reporters/DRLRewardReporter.C
index acfef49acdc2..c91caf5736c9 100644
--- a/modules/stochastic_tools/src/libtorch/reporters/DRLRewardReporter.C
+++ b/modules/stochastic_tools/src/libtorch/reporters/DRLRewardReporter.C
@@ -21,7 +21,7 @@ DRLRewardReporter::validParams()
 
   params.addClassDescription("Reporter containing the reward values of a DRL controller trainer.");
   params.addRequiredParam<UserObjectName>(
-      "drl_trainer_name", "The name of the RDL controller trainer which computes the rewards.");
+      "drl_trainer_name", "The name of the DRL controller trainer which computes the rewards.");
 
   return params;
 }
@@ -29,7 +29,12 @@ DRLRewardReporter::validParams()
 DRLRewardReporter::DRLRewardReporter(const InputParameters & parameters)
   : GeneralReporter(parameters),
     SurrogateModelInterface(this),
-    _reward(declareValueByName<Real>("average_reward", REPORTER_MODE_ROOT)),
+    _average_reward(declareValueByName<Real>("average_reward", REPORTER_MODE_ROOT)),
+    _std_reward(declareValueByName<Real>("std_reward", REPORTER_MODE_ROOT)),
+    _sample_average_reward(
+        declareValueByName<std::vector<Real>>("sample_average_reward", REPORTER_MODE_ROOT)),
+    _sample_std_reward(
+        declareValueByName<std::vector<Real>>("sample_std_reward", REPORTER_MODE_ROOT)),
     _trainer(getSurrogateTrainer<LibtorchDRLControlTrainer>("drl_trainer_name"))
 {
 }
@@ -37,7 +42,10 @@ DRLRewardReporter::DRLRewardReporter(const InputParameters & parameters)
 void
 DRLRewardReporter::execute()
 {
-  _reward = _trainer.averageEpisodeReward();
+  _average_reward = _trainer.averageEpisodeReward();
+  _std_reward = _trainer.stdEpisodeReward();
+  _sample_average_reward = _trainer.sampleAverageEpsiodeRewards();
+  _sample_std_reward = _trainer.sampleStdEpsiodeRewards();
 }
 
 #endif
diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchANNTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchANNTrainer.C
index e9951ed7277a..c2818b3570eb 100644
--- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchANNTrainer.C
+++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchANNTrainer.C
@@ -128,7 +128,7 @@ LibtorchANNTrainer::postTrain()
   if (_read_from_file)
     try
     {
-      torch::load(_nn, _nn_filename);
+      Moose::loadLibtorchArtificialNeuralNetState(*_nn, _nn_filename);
       _console << "Loaded requested .pt file." << std::endl;
     }
     catch (const c10::Error & e)
diff --git a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C
index c7461db9baa9..d24799c3a08a 100644
--- a/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C
+++ b/modules/stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C
@@ -9,12 +9,11 @@
 
 #ifdef MOOSE_LIBTORCH_ENABLED
 
-#include "LibtorchDataset.h"
-#include "LibtorchArtificialNeuralNetTrainer.h"
-#include "LibtorchUtils.h"
 #include "LibtorchDRLControlTrainer.h"
-#include "Sampler.h"
-#include "Function.h"
+#include "LibtorchRandomUtils.h"
+
+#include <cmath>
+#include <numeric>
 
 registerMooseObject("StochasticToolsApp", LibtorchDRLControlTrainer);
 
@@ -24,22 +23,27 @@ LibtorchDRLControlTrainer::validParams()
   InputParameters params = SurrogateTrainerBase::validParams();
 
   params.addClassDescription(
-      "Trains a neural network controller using the Proximal Policy Optimization (PPO) algorithm.");
+      "Trains a neural network controller using fixed-horizon PPO on top of the libtorch RL core.");
 
   params.addRequiredParam<std::vector<ReporterName>>(
-      "response", "Reporter values containing the response values from the model.");
+      "observation", "Reporter values containing the observation values from the model.");
   params.addParam<std::vector<Real>>(
-      "response_shift_factors",
-      "A shift constant which will be used to shift the response values. This is used for the "
-      "manipulation of the neural net inputs for better training efficiency.");
+      "observation_shift_factors",
+      {},
+      "Optional offsets applied to the observed state values before scaling.");
   params.addParam<std::vector<Real>>(
-      "response_scaling_factors",
-      "A normalization constant which will be used to divide the response values. This is used for "
-      "the manipulation of the neural net inputs for better training efficiency.");
+      "observation_scaling_factors",
+      {},
+      "Optional multipliers applied after shifting the observed state values.");
   params.addRequiredParam<std::vector<ReporterName>>(
       "control",
       "Reporters containing the values of the controlled quantities (control signals) from the "
       "model simulations.");
+  params.addParam<std::vector<Real>>(
+      "action_scaling_factors",
+      {},
+      "Scale factors embedded into the trained policy outputs so transferred and checkpointed "
+      "controllers operate in physical units.");
   params.addRequiredParam<std::vector<ReporterName>>(
       "log_probability",
       "Reporters containing the log probabilities of the actions taken during the simulations.");
@@ -51,48 +55,36 @@ LibtorchDRLControlTrainer::validParams()
       "1<=input_timesteps",
       "Number of time steps to use in the input data, if larger than 1, "
       "data from the previous timesteps will be used as inputs in the training.");
-  params.addParam<unsigned int>("skip_num_rows",
-                                1,
-                                "Number of rows to ignore from training. We usually skip the 1st "
-                                "row from the reporter since it contains only initial values.");
 
   params.addRequiredParam<unsigned int>("num_epochs", "Number of epochs for the training.");
 
-  params.addRequiredRangeCheckedParam<Real>(
-      "critic_learning_rate",
-      "0<critic_learning_rate",
-      "Learning rate (relaxation) for the emulator training.");
-  params.addRequiredParam<std::vector<unsigned int>>(
-      "num_critic_neurons_per_layer", "Number of neurons per layer in the emulator neural net.");
+  params.addRequiredRangeCheckedParam<Real>("critic_learning_rate",
+                                            "0<critic_learning_rate",
+                                            "Learning rate used by the critic optimizer.");
+  params.addRequiredParam<std::vector<unsigned int>>("num_critic_neurons_per_layer",
+                                                     "Hidden-layer widths for the critic network.");
   params.addParam<std::vector<std::string>>(
       "critic_activation_functions",
       std::vector<std::string>({"relu"}),
-      "The type of activation functions to use in the emulator neural net. It is either one value "
-      "or one value per hidden layer.");
+      "Activation name for each critic hidden layer, or one shared value for all layers.");
 
-  params.addRequiredRangeCheckedParam<Real>(
-      "control_learning_rate",
-      "0<control_learning_rate",
-      "Learning rate (relaxation) for the control neural net training.");
+  params.addRequiredRangeCheckedParam<Real>("control_learning_rate",
+                                            "0<control_learning_rate",
+                                            "Learning rate used by the actor optimizer.");
   params.addRequiredParam<std::vector<unsigned int>>(
       "num_control_neurons_per_layer",
       "Number of neurons per layer for the control neural network.");
   params.addParam<std::vector<std::string>>(
       "control_activation_functions",
       std::vector<std::string>({"relu"}),
-      "The type of activation functions to use in the control neural net. It "
-      "is either one value "
-      "or one value per hidden layer.");
+      "Activation name for each actor hidden layer, or one shared value for all layers.");
 
   params.addParam<std::string>("filename_base",
-                               "Filename used to output the neural net parameters.");
+                               "Base filename used when writing actor and critic checkpoints.");
 
   params.addParam<unsigned int>(
       "seed", 11, "Random number generator seed for stochastic optimizers.");
 
-  params.addRequiredParam<std::vector<Real>>(
-      "action_standard_deviations", "Standard deviation value used while sampling the actions.");
-
   params.addParam<Real>(
       "clip_parameter", 0.2, "Clip parameter used while clamping the advantage value.");
   params.addRangeCheckedParam<unsigned int>(
@@ -105,45 +97,61 @@ LibtorchDRLControlTrainer::validParams()
       "decay_factor",
       1.0,
       "0.0<=decay_factor<=1.0",
-      "Decay factor for calculating the return. This accounts for decreased "
-      "reward values from the later steps.");
+      "Discount factor used when building PPO return and GAE targets.");
+
+  params.addRangeCheckedParam<Real>("lambda_factor", 1.0, "0.0<=lambda_factor<=1.0", "GAE lambda.");
 
   params.addParam<bool>(
       "read_from_file", false, "Switch to read the neural network parameters from a file.");
   params.addParam<bool>(
       "shift_outputs",
       true,
-      "If we would like to shift the outputs the realign the input-output pairs.");
+      "Whether to shift rollout outputs so observations and actions line up in time.");
   params.addParam<bool>(
       "standardize_advantage",
       true,
       "Switch to enable the shifting and normalization of the advantages in the PPO algorithm.");
-  params.addParam<unsigned int>("loss_print_frequency",
-                                0,
-                                "The frequency which is used to print the loss values. If 0, the "
-                                "loss values are not printed.");
+  params.addParam<unsigned int>(
+      "loss_print_frequency", 0, "Print PPO loss values every N updates. Use 0 to stay quiet.");
+  params.addParam<unsigned int>("batch_size", 100, "Number of flattened samples per mini-batch.");
+  params.addParam<std::vector<Real>>(
+      "min_control_value", {}, "Optional lower bounds for each control signal.");
+  params.addParam<std::vector<Real>>(
+      "max_control_value", {}, "Optional upper bounds for each control signal.");
+  params.addParam<bool>(
+      "state_independent_std",
+      true,
+      "If true, learn one Gaussian log-std parameter per action dimension. If false, learn a "
+      "state-dependent std head as in the older MOOSE actor implementation.");
+
+  params.addParam<Real>(
+      "entropy_coeff", 0.01, "Entropy bonus coefficient used in the PPO actor loss.");
+
+  params.addParam<unsigned int>(
+      "timestep_window", 1, "Use every nth reporter timestep when assembling trajectories.");
+
   return params;
 }
 
 LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & parameters)
   : SurrogateTrainerBase(parameters),
-    _response_names(getParam<std::vector<ReporterName>>("response")),
-    _response_shift_factors(isParamValid("response_shift_factors")
-                                ? getParam<std::vector<Real>>("response_shift_factors")
-                                : std::vector<Real>(_response_names.size(), 0.0)),
-    _response_scaling_factors(isParamValid("response_scaling_factors")
-                                  ? getParam<std::vector<Real>>("response_scaling_factors")
-                                  : std::vector<Real>(_response_names.size(), 1.0)),
-    _control_names(getParam<std::vector<ReporterName>>("control")),
+    _state_names(getParam<std::vector<ReporterName>>("observation")),
+    _state_shift_factors(isParamSetByUser("observation_shift_factors")
+                             ? getParam<std::vector<Real>>("observation_shift_factors")
+                             : std::vector<Real>(_state_names.size(), 0.0)),
+    _state_scaling_factors(isParamSetByUser("observation_scaling_factors")
+                               ? getParam<std::vector<Real>>("observation_scaling_factors")
+                               : std::vector<Real>(_state_names.size(), 1.0)),
+    _action_names(getParam<std::vector<ReporterName>>("control")),
+    _action_scaling_factors(isParamSetByUser("action_scaling_factors")
+                                ? getParam<std::vector<Real>>("action_scaling_factors")
+                                : std::vector<Real>(_action_names.size(), 1.0)),
     _log_probability_names(getParam<std::vector<ReporterName>>("log_probability")),
     _reward_name(getParam<ReporterName>("reward")),
-    _reward_value_pointer(&getReporterValueByName<std::vector<Real>>(_reward_name)),
+    _reward_value_pointer(&getReporterValueByName<std::vector<std::vector<Real>>>(_reward_name)),
     _input_timesteps(getParam<unsigned int>("input_timesteps")),
-    _num_inputs(_input_timesteps * _response_names.size()),
-    _num_outputs(_control_names.size()),
-    _input_data(std::vector<std::vector<Real>>(_num_inputs)),
-    _output_data(std::vector<std::vector<Real>>(_num_outputs)),
-    _log_probability_data(std::vector<std::vector<Real>>(_num_outputs)),
+    _num_inputs(_input_timesteps * _state_names.size()),
+    _num_outputs(_action_names.size()),
     _num_epochs(getParam<unsigned int>("num_epochs")),
     _num_critic_neurons_per_layer(
         getParam<std::vector<unsigned int>>("num_critic_neurons_per_layer")),
@@ -154,60 +162,76 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par
     _update_frequency(getParam<unsigned int>("update_frequency")),
     _clip_param(getParam<Real>("clip_parameter")),
     _decay_factor(getParam<Real>("decay_factor")),
-    _action_std(getParam<std::vector<Real>>("action_standard_deviations")),
+    _lambda_factor(getParam<Real>("lambda_factor")),
     _filename_base(isParamValid("filename_base") ? getParam<std::string>("filename_base") : ""),
     _read_from_file(getParam<bool>("read_from_file")),
     _shift_outputs(getParam<bool>("shift_outputs")),
+    _average_episode_reward(0.0),
     _standardize_advantage(getParam<bool>("standardize_advantage")),
     _loss_print_frequency(getParam<unsigned int>("loss_print_frequency")),
-    _update_counter(_update_frequency)
+    _seed(getParam<unsigned int>("seed")),
+    _min_values(getParam<std::vector<Real>>("min_control_value")),
+    _max_values(getParam<std::vector<Real>>("max_control_value")),
+    _highest_reward(-1e8),
+    _entropy_coeff(getParam<Real>("entropy_coeff")),
+    _update_counter(_update_frequency),
+    _timestep_window(getParam<unsigned int>("timestep_window")),
+    _observation_history(_input_timesteps),
+    _value_estimator(_decay_factor, _lambda_factor),
+    _ppo_loss(_clip_param, _entropy_coeff)
 {
-  if (_response_names.size() != _response_shift_factors.size())
-    paramError("response_shift_factors",
-               "The number of shift factors is not the same as the number of responses!");
+  if (_state_names.size() != _state_shift_factors.size())
+    paramError("observation_shift_factors",
+               "The number of shift factors is not the same as the number of observations!");
 
-  if (_response_names.size() != _response_scaling_factors.size())
-    paramError(
-        "response_scaling_factors",
-        "The number of normalization coefficients is not the same as the number of responses!");
+  if (_state_names.size() != _state_scaling_factors.size())
+    paramError("observation_scaling_factors",
+               "The number of normalization coefficients is not the same as the number of "
+               "observations!");
 
-  // We establish the links with the chosen reporters
-  getReporterPointers(_response_names, _response_value_pointers);
-  getReporterPointers(_control_names, _control_value_pointers);
-  getReporterPointers(_log_probability_names, _log_probability_value_pointers);
+  if (_action_names.size() != _log_probability_names.size())
+    paramError("log_probability",
+               "The number of log-probability reporters must match the number of control "
+               "reporters.");
 
-  // Fixing the RNG seed to make sure every experiment is the same.
-  // Otherwise sampling / stochastic gradient descent would be different.
-  torch::manual_seed(getParam<unsigned int>("seed"));
+  if (_action_names.size() != _action_scaling_factors.size())
+    paramError("action_scaling_factors",
+               "The number of action scaling factors must match the number of control "
+               "reporters.");
 
-  // Convert the user input standard deviations to a diagonal tensor
-  _std = torch::eye(_control_names.size());
-  for (unsigned int i = 0; i < _control_names.size(); ++i)
-    _std[i][i] = _action_std[i];
+  // We establish the links with the chosen reporters
+  getReporterPointers(_state_names, _state_value_pointers);
+  getReporterPointers(_action_names, _action_value_pointers);
+  getReporterPointers(_log_probability_names, _log_probability_value_pointers);
 
   bool filename_valid = isParamValid("filename_base");
+  const auto input_shift_factors =
+      _observation_history.expandObservationFactors(_state_shift_factors);
+  const auto input_scaling_factors =
+      _observation_history.expandObservationFactors(_state_scaling_factors);
 
   // Initializing the control neural net so that the control can grab it right away
-  _control_nn = std::make_shared<Moose::LibtorchArtificialNeuralNet>(
+  _control_nn = std::make_shared<Moose::LibtorchActorNeuralNet>(
       filename_valid ? _filename_base + "_control.net" : "control.net",
       _num_inputs,
       _num_outputs,
       _num_control_neurons_per_layer,
-      getParam<std::vector<std::string>>("control_activation_functions"));
+      getParam<std::vector<std::string>>("control_activation_functions"),
+      _min_values,
+      _max_values,
+      torch::kCPU,
+      torch::kDouble,
+      true,
+      input_shift_factors,
+      input_scaling_factors,
+      _action_scaling_factors,
+      getParam<bool>("state_independent_std"));
 
   // We read parameters for the control neural net if it is requested
   if (_read_from_file)
   {
-    try
-    {
-      torch::load(_control_nn, _control_nn->name());
-      _console << "Loaded requested .pt file." << std::endl;
-    }
-    catch (const c10::Error & e)
-    {
-      mooseError("The requested pytorch file could not be loaded for the control neural net.\n",
-                 e.msg());
-    }
+    Moose::loadLibtorchActorNeuralNetState(*_control_nn, _control_nn->name());
+    _console << "Loaded requested .pt file." << std::endl;
   }
   else if (filename_valid)
     torch::save(_control_nn, _control_nn->name());
@@ -218,14 +242,24 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par
       _num_inputs,
       1,
       _num_critic_neurons_per_layer,
-      getParam<std::vector<std::string>>("critic_activation_functions"));
+      getParam<std::vector<std::string>>("critic_activation_functions"),
+      torch::kCPU,
+      torch::kDouble,
+      true,
+      input_shift_factors,
+      input_scaling_factors);
+
+  _actor_optimizer = std::make_unique<torch::optim::Adam>(
+      _control_nn->parameters(), torch::optim::AdamOptions(_control_learning_rate));
+  _critic_optimizer = std::make_unique<torch::optim::Adam>(
+      _critic_nn->parameters(), torch::optim::AdamOptions(_critic_learning_rate));
 
   // We read parameters for the critic neural net if it is requested
   if (_read_from_file)
   {
     try
     {
-      torch::load(_critic_nn, _critic_nn->name());
+      Moose::loadLibtorchArtificialNeuralNetState(*_critic_nn, _critic_nn->name());
       _console << "Loaded requested .pt file." << std::endl;
     }
     catch (const c10::Error & e)
@@ -236,131 +270,146 @@ LibtorchDRLControlTrainer::LibtorchDRLControlTrainer(const InputParameters & par
   }
   else if (filename_valid)
     torch::save(_critic_nn, _critic_nn->name());
+
+  _control_nn->initializeNeuralNetwork(
+      Moose::makeLibtorchCPUGenerator(static_cast<uint64_t>(_seed)));
+  _critic_nn->initializeNeuralNetwork(
+      Moose::makeLibtorchCPUGenerator(static_cast<uint64_t>(_seed) + 1));
 }
 
 void
 LibtorchDRLControlTrainer::execute()
 {
-  // Extract data from the reporters
-  getInputDataFromReporter(_input_data, _response_value_pointers, _input_timesteps);
-  getOutputDataFromReporter(_output_data, _control_value_pointers);
-  getOutputDataFromReporter(_log_probability_data, _log_probability_value_pointers);
-  getRewardDataFromReporter(_reward_data, _reward_value_pointer);
-
-  // Calculate return from the reward (discounting the reward)
-  computeRewardToGo();
+  collectTrajectoriesFromReporters();
 
   _update_counter--;
 
-  // Only update the NNs when
-  if (_update_counter == 0)
-  {
-    // We compute the average reward first
-    computeAverageEpisodeReward();
+  if (_update_counter != 0 || _trajectory_buffer.empty())
+    return;
 
-    // Transform input/output/return data to torch::Tensor
-    convertDataToTensor(_input_data, _input_tensor);
-    convertDataToTensor(_output_data, _output_tensor);
-    convertDataToTensor(_log_probability_data, _log_probability_tensor);
+  computeEpisodeRewardStatistics();
 
-    // Discard (detach) the gradient info for return data
-    LibtorchUtils::vectorToTensor<Real>(_return_data, _return_tensor, true);
+  if (_average_episode_reward > _highest_reward)
+  {
+    torch::save(_control_nn, _control_nn->name() + "_best");
+    _highest_reward = _average_episode_reward;
+  }
 
-    // We train the controller using the emulator to get a good control strategy
-    trainController();
+  _value_estimator.computeValueTargets(_trajectory_buffer, *_critic_nn);
+  const auto batch = _trajectory_buffer.flatten();
 
-    // We clean the training data after contoller update and reset the counter
-    resetData();
-  }
+  trainController(batch);
+  resetData();
 }
 
 void
-LibtorchDRLControlTrainer::computeAverageEpisodeReward()
+LibtorchDRLControlTrainer::computeEpisodeRewardStatistics()
 {
-  if (_reward_data.size())
-    _average_episode_reward =
-        std::accumulate(_reward_data.begin(), _reward_data.end(), 0.0) / _reward_data.size();
-  else
+  if (_trajectory_buffer.empty())
+  {
     _average_episode_reward = 0.0;
-}
+    _std_episode_reward = 0.0;
+    _sample_average_episode_reward.clear();
+    _sample_std_episode_reward.clear();
+    return;
+  }
 
-void
-LibtorchDRLControlTrainer::computeRewardToGo()
-{
-  // Get reward data from one simulation
-  std::vector<Real> reward_data_per_sim;
-  std::vector<Real> return_data_per_sim;
-  getRewardDataFromReporter(reward_data_per_sim, _reward_value_pointer);
-
-  // Discount the reward to get the return value, we need this to be able to anticipate
-  // rewards based on the current behavior.
-  Real discounted_reward(0.0);
-  for (int i = reward_data_per_sim.size() - 1; i >= 0; --i)
+  _average_episode_reward = 0.0;
+  _std_episode_reward = 0.0;
+  unsigned int combined_sizes = 0;
+
+  _sample_average_episode_reward.clear();
+  _sample_std_episode_reward.clear();
+
+  for (const auto & trajectory : _trajectory_buffer.trajectories())
   {
-    discounted_reward = reward_data_per_sim[i] + discounted_reward * _decay_factor;
+    const auto & sample = trajectory.rewards;
+    const unsigned int sample_size = sample.size();
+    if (!sample_size)
+      continue;
+
+    const Real sum = std::accumulate(sample.begin(), sample.end(), 0.0);
+    const Real mean = sum / sample_size;
+    _sample_average_episode_reward.push_back(mean);
+
+    const Real variance =
+        std::transform_reduce(sample.begin(),
+                              sample.end(),
+                              0.0,
+                              std::plus<>(),
+                              [mean](const Real value) { return (value - mean) * (value - mean); });
+    _sample_std_episode_reward.push_back(std::sqrt(variance / sample_size));
+
+    _average_episode_reward += sum;
+    _std_episode_reward += variance;
+    combined_sizes += sample_size;
+  }
 
-    // We are inserting to the front of the vector and push the rest back, this will
-    // ensure that the first element of the vector is the discounter reward for the whole transient
-    return_data_per_sim.insert(return_data_per_sim.begin(), discounted_reward);
+  if (!combined_sizes)
+  {
+    _average_episode_reward = 0.0;
+    _std_episode_reward = 0.0;
+    return;
   }
 
-  // Save and accumulate the return values
-  _return_data.insert(_return_data.end(), return_data_per_sim.begin(), return_data_per_sim.end());
+  _average_episode_reward /= combined_sizes;
+  _std_episode_reward = std::sqrt(_std_episode_reward / combined_sizes);
 }
 
 void
-LibtorchDRLControlTrainer::trainController()
+LibtorchDRLControlTrainer::trainController(const LibtorchRLTrajectoryBuffer::TensorBatch & batch)
 {
-  // Define the optimizers for the training
-  torch::optim::Adam actor_optimizer(_control_nn->parameters(),
-                                     torch::optim::AdamOptions(_control_learning_rate));
+  if (!batch.size())
+    return;
 
-  torch::optim::Adam critic_optimizer(_critic_nn->parameters(),
-                                      torch::optim::AdamOptions(_critic_learning_rate));
+  // We only train on the rank 0 partition. Libtorch should still be able to
+  // fetch the local threads which are available.
+  if (processor_id() == 0)
+  {
+    auto shuffle_generator = Moose::makeLibtorchCPUGenerator(
+        static_cast<uint64_t>(_seed) + static_cast<uint64_t>(_fe_problem.timeStep()));
 
-  // Compute the approximate value (return) from the critic neural net and use it to compute an
-  // advantage
-  auto value = evaluateValue(_input_tensor).detach();
-  auto advantage = _return_tensor - value;
+    for (unsigned int epoch = 0; epoch < _num_epochs; ++epoch)
+    {
+      const auto mini_batches = _sampler.sample(
+          batch, getParam<unsigned int>("batch_size"), _standardize_advantage, shuffle_generator);
+      bool printed_losses = false;
+      for (const auto & mini_batch : mini_batches)
+      {
+        const auto losses = _ppo_loss.compute(*_control_nn, *_critic_nn, mini_batch);
+
+        _actor_optimizer->zero_grad();
+        losses.actor_loss.backward();
+        _actor_optimizer->step();
+
+        _critic_optimizer->zero_grad();
+        losses.critic_loss.backward();
+        _critic_optimizer->step();
+
+        if (_loss_print_frequency && epoch % _loss_print_frequency == 0 && !printed_losses)
+        {
+          _console << "Epoch: " << epoch << " | Actor Loss: " << COLOR_GREEN
+                   << losses.actor_loss.item<double>() << COLOR_DEFAULT
+                   << " | Critic Loss: " << COLOR_GREEN << losses.critic_loss.item<double>()
+                   << COLOR_DEFAULT << std::endl;
+          printed_losses = true;
+        }
+      }
+    }
 
-  // If requested, standardize the advantage
-  if (_standardize_advantage)
-    advantage = (advantage - advantage.mean()) / (advantage.std() + 1e-10);
+    _console << "Best model so far: " << _highest_reward << std::endl;
+  }
 
-  for (unsigned int epoch = 0; epoch < _num_epochs; ++epoch)
+  // It is time to send the trained data to every other processor so that the neural networks
+  // are the same on all ranks. TODO: Make sure this can be done on a GPU as well.
+  for (auto & param : _control_nn->named_parameters())
   {
-    // Get the approximate return from the neural net again (this one does have an associated
-    // gradient)
-    value = evaluateValue(_input_tensor);
-    // Get the approximate logarithmic action probability using the control neural net
-    auto curr_log_probability = evaluateAction(_input_tensor, _output_tensor);
-
-    // Prepare the ratio by using the e^(logx-logy)=x/y expression
-    auto ratio = (curr_log_probability - _log_probability_tensor).exp();
-
-    // Use clamping for limiting
-    auto surr1 = ratio * advantage;
-    auto surr2 = torch::clamp(ratio, 1.0 - _clip_param, 1.0 + _clip_param) * advantage;
-
-    // Compute loss values for the critic and the control neural net
-    auto actor_loss = -torch::min(surr1, surr2).mean();
-    auto critic_loss = torch::mse_loss(value, _return_tensor);
-
-    // Update the weights in the neural nets
-    actor_optimizer.zero_grad();
-    actor_loss.backward();
-    actor_optimizer.step();
-
-    critic_optimizer.zero_grad();
-    critic_loss.backward();
-    critic_optimizer.step();
-
-    // print loss per epoch
-    if (_loss_print_frequency)
-      if (epoch % _loss_print_frequency == 0)
-        _console << "Epoch: " << epoch << " | Actor Loss: " << COLOR_GREEN
-                 << actor_loss.item<double>() << COLOR_DEFAULT << " | Critic Loss: " << COLOR_GREEN
-                 << critic_loss.item<double>() << COLOR_DEFAULT << std::endl;
+    MPI_Bcast(param.value().data_ptr(), param.value().numel(), MPI_DOUBLE, 0, _communicator.get());
+  }
+
+  for (auto & param : _critic_nn->named_parameters())
+  {
+    MPI_Bcast(param.value().data_ptr(), param.value().numel(), MPI_DOUBLE, 0, _communicator.get());
   }
 
   // Save the controller neural net so our controller can read it, we also save the critic if we
@@ -369,120 +418,107 @@ LibtorchDRLControlTrainer::trainController()
   torch::save(_critic_nn, _critic_nn->name());
 }
 
-void
-LibtorchDRLControlTrainer::convertDataToTensor(std::vector<std::vector<Real>> & vector_data,
-                                               torch::Tensor & tensor_data,
-                                               const bool detach)
-{
-  for (unsigned int i = 0; i < vector_data.size(); ++i)
-  {
-    torch::Tensor input_row;
-    LibtorchUtils::vectorToTensor(vector_data[i], input_row, detach);
-
-    if (i == 0)
-      tensor_data = input_row;
-    else
-      tensor_data = torch::cat({tensor_data, input_row}, 1);
-  }
-
-  if (detach)
-    tensor_data.detach();
-}
-
-torch::Tensor
-LibtorchDRLControlTrainer::evaluateValue(torch::Tensor & input)
-{
-  return _critic_nn->forward(input);
-}
-
-torch::Tensor
-LibtorchDRLControlTrainer::evaluateAction(torch::Tensor & input, torch::Tensor & output)
-{
-  torch::Tensor var = torch::matmul(_std, _std);
-
-  // Compute an action and get it's logarithmic proability based on an assumed Gaussian distribution
-  torch::Tensor action = _control_nn->forward(input);
-  return -((action - output) * (action - output)) / (2 * var) - torch::log(_std) -
-         std::log(std::sqrt(2 * M_PI));
-}
-
 void
 LibtorchDRLControlTrainer::resetData()
 {
-  for (auto & data : _input_data)
-    data.clear();
-  for (auto & data : _output_data)
-    data.clear();
-  for (auto & data : _log_probability_data)
-    data.clear();
-
-  _reward_data.clear();
-  _return_data.clear();
-
+  _trajectory_buffer.clear();
   _update_counter = _update_frequency;
 }
 
 void
-LibtorchDRLControlTrainer::getInputDataFromReporter(
-    std::vector<std::vector<Real>> & data,
-    const std::vector<const std::vector<Real> *> & reporter_links,
-    const unsigned int num_timesteps)
+LibtorchDRLControlTrainer::collectTrajectoriesFromReporters()
 {
-  for (const auto & rep_i : index_range(reporter_links))
+  for (const auto sample_i : index_range(*_reward_value_pointer))
   {
-    std::vector<Real> reporter_data = *reporter_links[rep_i];
-
-    // We shift and scale the inputs to get better training efficiency
-    std::transform(
-        reporter_data.begin(),
-        reporter_data.end(),
-        reporter_data.begin(),
-        [this, &rep_i](Real value) -> Real
-        { return (value - _response_shift_factors[rep_i]) * _response_scaling_factors[rep_i]; });
-
-    // Fill the corresponding containers
-    for (const auto & start_step : make_range(num_timesteps))
+    const auto & reward_sample = (*_reward_value_pointer)[sample_i];
+    const auto num_transitions = computeNumTransitions(reward_sample.size());
+    if (!num_transitions)
+      continue;
+
+    std::vector<std::vector<Real>> component_trajectories(_state_names.size());
+    for (const auto state_i : index_range(_state_value_pointers))
+      component_trajectories[state_i] = extractDownsampledSequence(
+          (*_state_value_pointers[state_i])[sample_i], 0, num_transitions + 1);
+
+    LibtorchRLTrajectoryBuffer::Trajectory trajectory;
+    trajectory.observations.reserve(num_transitions);
+    trajectory.next_observations.reserve(num_transitions);
+    trajectory.actions.assign(num_transitions, std::vector<Real>());
+    trajectory.log_probabilities.assign(num_transitions, std::vector<Real>());
+
+    for (auto & action_row : trajectory.actions)
+      action_row.reserve(_action_names.size());
+    for (auto & log_probability_row : trajectory.log_probabilities)
+      log_probability_row.reserve(_log_probability_names.size());
+
+    for (const auto step_i : make_range(num_transitions))
+    {
+      trajectory.observations.push_back(
+          _observation_history.stackTrajectoryObservation(component_trajectories, step_i));
+      trajectory.next_observations.push_back(
+          _observation_history.stackTrajectoryObservation(component_trajectories, step_i + 1));
+    }
+
+    for (const auto action_i : index_range(_action_value_pointers))
     {
-      unsigned int row = reporter_links.size() * start_step + rep_i;
-      for (unsigned int fill_i = 1; fill_i < num_timesteps - start_step; ++fill_i)
-        data[row].push_back(reporter_data[0]);
-
-      data[row].insert(data[row].end(),
-                       reporter_data.begin(),
-                       reporter_data.begin() + start_step + reporter_data.size() -
-                           (num_timesteps - 1) - _shift_outputs);
+      const auto action_sequence = extractDownsampledSequence(
+          (*_action_value_pointers[action_i])[sample_i], _shift_outputs, num_transitions);
+      const auto log_probability_sequence = extractDownsampledSequence(
+          (*_log_probability_value_pointers[action_i])[sample_i], _shift_outputs, num_transitions);
+
+      for (const auto step_i : make_range(num_transitions))
+      {
+        trajectory.actions[step_i].push_back(action_sequence[step_i]);
+        trajectory.log_probabilities[step_i].push_back(log_probability_sequence[step_i]);
+      }
     }
+
+    trajectory.rewards =
+        extractDownsampledSequence(reward_sample, _timestep_window, num_transitions);
+
+    _trajectory_buffer.addTrajectory(std::move(trajectory));
   }
 }
 
-void
-LibtorchDRLControlTrainer::getOutputDataFromReporter(
-    std::vector<std::vector<Real>> & data,
-    const std::vector<const std::vector<Real> *> & reporter_links)
+unsigned int
+LibtorchDRLControlTrainer::computeNumTransitions(const std::size_t raw_sequence_size) const
 {
-  for (const auto & rep_i : index_range(reporter_links))
-    // Fill the corresponding containers
-    data[rep_i].insert(data[rep_i].end(),
-                       reporter_links[rep_i]->begin() + _shift_outputs,
-                       reporter_links[rep_i]->end());
+  unsigned int num_transitions = 0;
+  for (std::size_t raw_index = 0; raw_index + _timestep_window < raw_sequence_size;
+       raw_index += _timestep_window)
+    ++num_transitions;
+
+  return num_transitions;
 }
 
-void
-LibtorchDRLControlTrainer::getRewardDataFromReporter(std::vector<Real> & data,
-                                                     const std::vector<Real> * const reporter_link)
+std::vector<Real>
+LibtorchDRLControlTrainer::extractDownsampledSequence(const std::vector<Real> & sample,
+                                                      const unsigned int offset,
+                                                      const unsigned int num_entries) const
 {
-  // Fill the corresponding container
-  data.insert(data.end(), reporter_link->begin() + _shift_outputs, reporter_link->end());
+  std::vector<Real> values;
+  values.reserve(num_entries);
+
+  for (const auto entry_i : make_range(num_entries))
+  {
+    const auto raw_index = offset + entry_i * _timestep_window;
+    if (raw_index >= sample.size())
+      mooseError("Reporter data is shorter than required by the configured timestep window and "
+                 "history stacking.");
+    values.push_back(sample[raw_index]);
+  }
+
+  return values;
 }
 
 void
 LibtorchDRLControlTrainer::getReporterPointers(
     const std::vector<ReporterName> & reporter_names,
-    std::vector<const std::vector<Real> *> & pointer_storage)
+    std::vector<const std::vector<std::vector<Real>> *> & pointer_storage)
 {
   pointer_storage.clear();
   for (const auto & name : reporter_names)
-    pointer_storage.push_back(&getReporterValueByName<std::vector<Real>>(name));
+    pointer_storage.push_back(&getReporterValueByName<std::vector<std::vector<Real>>>(name));
 }
 
 #endif
diff --git a/modules/stochastic_tools/src/libtorch/transfers/SamplerDRLControlTransfer.C b/modules/stochastic_tools/src/libtorch/transfers/SamplerDRLControlTransfer.C
new file mode 100644
index 000000000000..713a7c10da2b
--- /dev/null
+++ b/modules/stochastic_tools/src/libtorch/transfers/SamplerDRLControlTransfer.C
@@ -0,0 +1,132 @@
+//* This file is part of the MOOSE framework
+//* https://www.mooseframework.org
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#ifdef MOOSE_LIBTORCH_ENABLED
+
+#include "SamplerDRLControlTransfer.h"
+#include "LibtorchDRLControl.h"
+
+registerMooseObject("StochasticToolsApp", SamplerDRLControlTransfer);
+
+InputParameters
+SamplerDRLControlTransfer::validParams()
+{
+  InputParameters params = StochasticToolsTransfer::validParams();
+  params += SurrogateModelInterface::validParams();
+
+  params.addClassDescription(
+      "Copies a DRL actor from a trainer object on the main app to a LibtorchDRLControl object "
+      "on the subapp.");
+
+  params.suppressParameter<MultiAppName>("from_multi_app");
+
+  params.addRequiredParam<UserObjectName>(
+      "trainer_name", "Trainer object that owns the latest controller network.");
+  params.addRequiredParam<std::string>("control_name", "Controller object name.");
+  return params;
+}
+
+SamplerDRLControlTransfer::SamplerDRLControlTransfer(const InputParameters & parameters)
+  : StochasticToolsTransfer(parameters),
+    SurrogateModelInterface(this),
+    _control_name(getParam<std::string>("control_name")),
+    _trainer(getSurrogateTrainerByName<LibtorchDRLControlTrainer>(
+        getParam<UserObjectName>("trainer_name")))
+{
+}
+
+void
+SamplerDRLControlTransfer::initialSetup()
+{
+}
+
+void
+SamplerDRLControlTransfer::execute()
+{
+  const auto n = getToMultiApp()->numGlobalApps();
+  for (MooseIndex(n) i = 0; i < n; i++)
+  {
+    if (getToMultiApp()->hasLocalApp(i))
+    {
+      // Get the control neural net from the trainer
+      const Moose::LibtorchActorNeuralNet & trainer_nn = _trainer.controlNeuralNet();
+
+      // Get the control object from the other app
+      FEProblemBase & app_problem = _multi_app->appProblemBase(i);
+      auto & control_warehouse = app_problem.getControlWarehouse();
+      std::shared_ptr<Control> control_ptr = control_warehouse.getActiveObject(_control_name);
+      LibtorchDRLControl * control_object = dynamic_cast<LibtorchDRLControl *>(control_ptr.get());
+
+      if (!control_object)
+        paramError("control_name", "The given control is not a LibtorchDRLControl!");
+
+      // Copy and the neural net and execute it to get the initial values
+      control_object->loadControlNeuralNet(trainer_nn);
+      control_object->execute();
+    }
+  }
+}
+
+void
+SamplerDRLControlTransfer::initializeFromMultiapp()
+{
+}
+
+void
+SamplerDRLControlTransfer::executeFromMultiapp()
+{
+}
+
+void
+SamplerDRLControlTransfer::finalizeFromMultiapp()
+{
+}
+
+void
+SamplerDRLControlTransfer::initializeToMultiapp()
+{
+}
+
+void
+SamplerDRLControlTransfer::executeToMultiapp()
+{
+  if (getToMultiApp()->hasLocalApp(_app_index))
+  {
+    // Use a rank-invariant seed based on the configured trainer seed, the current main-app
+    // training step, and the sampler row being executed. This keeps the stochastic rollout path
+    // tied to the actual sample instead of the transient local app slot chosen by batch-reset.
+    const uint64_t sample_seed = static_cast<uint64_t>(_trainer.seed()) +
+                                 static_cast<uint64_t>(_global_index) +
+                                 static_cast<uint64_t>(_sampler_ptr->getNumberOfRows()) *
+                                     static_cast<uint64_t>(_fe_problem.timeStep());
+    // Get the control neural net from the trainer
+    const Moose::LibtorchActorNeuralNet & trainer_nn = _trainer.controlNeuralNet();
+
+    // Get the control object from the other app
+    FEProblemBase & app_problem = _multi_app->appProblemBase(_app_index);
+    auto & control_warehouse = app_problem.getControlWarehouse();
+    std::shared_ptr<Control> control_ptr = control_warehouse.getActiveObject(_control_name);
+    LibtorchDRLControl * control_object = dynamic_cast<LibtorchDRLControl *>(control_ptr.get());
+
+    if (!control_object)
+      paramError("control_name", "The given control is not a LibtorchDRLControl!");
+
+    // Copy and the neural net and execute it to get the initial values
+    control_object->loadControlNeuralNet(trainer_nn);
+    control_object->setPolicySampleSeed(sample_seed);
+    control_object->execute();
+  }
+}
+
+void
+SamplerDRLControlTransfer::finalizeToMultiapp()
+{
+}
+
+#endif
diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C
new file mode 100644
index 000000000000..311d9676f4a5
--- /dev/null
+++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchActionDistribution.C
@@ -0,0 +1,321 @@
+//* This file is part of the MOOSE framework
+//* https://www.mooseframework.org
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#ifdef MOOSE_LIBTORCH_ENABLED
+
+#include "LibtorchActionDistribution.h"
+
+#include "LibtorchRandomUtils.h"
+#include "LibtorchUtils.h"
+#include "MooseError.h"
+
+#include <cmath>
+#include <limits>
+#include "libmesh/utility.h"
+
+namespace
+{
+
+/**
+ * Fill in default action scaling and catch shape mistakes early.
+ * @param factors User-provided scaling factors.
+ * @param expected_size Number of action outputs expected by the distribution.
+ * @return A fully populated and validated scaling-factor vector.
+ */
+std::vector<Real>
+setActionScalingFactors(const std::vector<Real> & factors, const unsigned int expected_size)
+{
+  const auto normalized = factors.empty() ? std::vector<Real>(expected_size, 1.0) : factors;
+
+  if (normalized.size() != expected_size)
+    mooseError("The number of output_scaling_factors entries must match the number of action "
+               "outputs.");
+
+  for (const auto factor : normalized)
+    if (std::abs(factor) == 0.0)
+      mooseError("The output_scaling_factors entries must be non-zero.");
+
+  return normalized;
+}
+
+} // namespace
+
+namespace Moose
+{
+
+LibtorchActionDistribution::LibtorchActionDistribution(
+    const std::string & name,
+    const unsigned int num_inputs,
+    const unsigned int num_outputs,
+    const torch::DeviceType device_type,
+    const torch::ScalarType data_type,
+    const std::vector<Real> & output_scaling_factors)
+  : _name(name),
+    _num_inputs(num_inputs),
+    _num_outputs(num_outputs),
+    _device_type(device_type),
+    _data_type(data_type),
+    _output_scaling_factors(setActionScalingFactors(output_scaling_factors, num_outputs))
+{
+  auto action_scale = _output_scaling_factors;
+  LibtorchUtils::vectorToTensor(action_scale, _action_scale_tensor);
+  _action_scale_tensor = register_buffer(
+      "action_scale", _action_scale_tensor.transpose(0, 1).to(_data_type).to(_device_type));
+}
+
+void
+LibtorchActionDistribution::synchronizeScalingFactorsFromBuffer()
+{
+  auto action_scale =
+      _action_scale_tensor.detach().reshape({-1}).to(torch::kCPU).to(torch::kDouble);
+  LibtorchUtils::tensorToVector(action_scale, _output_scaling_factors);
+}
+
+torch::Tensor
+LibtorchActionDistribution::prepareFeatures(const torch::Tensor & input) const
+{
+  auto features = input;
+  if (_data_type != features.scalar_type())
+    features = features.to(_data_type);
+  if (_device_type != features.device().type())
+    features = features.to(_device_type);
+  return features;
+}
+
+torch::Tensor
+LibtorchActionDistribution::prepareAction(const torch::Tensor & action) const
+{
+  auto scaled_action = action;
+  if (_data_type != scaled_action.scalar_type())
+    scaled_action = scaled_action.to(_data_type);
+  if (_device_type != scaled_action.device().type())
+    scaled_action = scaled_action.to(_device_type);
+  return scaled_action;
+}
+
+LibtorchGaussianActionDistribution::LibtorchGaussianActionDistribution(
+    const std::string & name,
+    const unsigned int num_inputs,
+    const unsigned int num_outputs,
+    const torch::DeviceType device_type,
+    const torch::ScalarType data_type,
+    const bool build_on_construct,
+    const std::vector<Real> & output_scaling_factors,
+    const bool state_independent_std)
+  : LibtorchActionDistribution(
+        name, num_inputs, num_outputs, device_type, data_type, output_scaling_factors),
+    _state_independent_std(state_independent_std)
+{
+  if (build_on_construct)
+    constructDistribution();
+}
+
+void
+LibtorchGaussianActionDistribution::constructDistribution()
+{
+  _mean_module = register_module(
+      "mean", torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true)));
+  _std_module = register_module(
+      "std", torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true)));
+
+  _mean_module->to(_device_type, _data_type);
+  _std_module->to(_device_type, _data_type);
+}
+
+void
+LibtorchGaussianActionDistribution::initialize(const c10::optional<at::Generator> generator)
+{
+  Moose::orthogonalInitializeTensor(_mean_module->weight, 1.0, generator);
+  torch::nn::init::zeros_(_mean_module->bias);
+
+  if (_state_independent_std)
+  {
+    _std_module->weight.data().zero_();
+    torch::nn::init::zeros_(_std_module->bias);
+    return;
+  }
+
+  Moose::orthogonalInitializeTensor(_std_module->weight, 1.0, generator);
+  torch::nn::init::zeros_(_std_module->bias);
+}
+
+void
+LibtorchGaussianActionDistribution::reset(const torch::Tensor & input)
+{
+  const auto features = prepareFeatures(input);
+  _mean = _mean_module->forward(features);
+
+  if (_state_independent_std)
+  {
+    if (_mean.dim() <= 1)
+      _log_std_tensor = _std_module->bias;
+    else
+      _log_std_tensor = _std_module->bias.view({1, -1}).expand(_mean.sizes());
+  }
+  else
+    _log_std_tensor = _std_module->forward(features);
+
+  _log_std_tensor = torch::clamp(_log_std_tensor, std::log(1e-12), -std::log(1e-12));
+  _std_tensor = torch::exp(_log_std_tensor);
+}
+
+torch::Tensor
+LibtorchGaussianActionDistribution::sample(const c10::optional<at::Generator> generator) const
+{
+  return at::normal(_mean, _std_tensor, generator) * actionScaleTensor();
+}
+
+torch::Tensor
+LibtorchGaussianActionDistribution::deterministicAction() const
+{
+  return _mean * actionScaleTensor();
+}
+
+torch::Tensor
+LibtorchGaussianActionDistribution::logProbability(const torch::Tensor & action) const
+{
+  const auto scaled_action = prepareAction(action);
+  const auto log_action_scale = torch::log(torch::abs(actionScaleTensor()));
+  const auto unscaled_action = scaled_action / actionScaleTensor();
+
+  constexpr Real pi = 3.14159265358979323846;
+  const torch::Tensor var = _std_tensor * _std_tensor;
+  return -((unscaled_action - _mean) * (unscaled_action - _mean)) / (2.0 * var) - _log_std_tensor -
+         0.5 * std::log(2.0 * pi) - log_action_scale;
+}
+
+torch::Tensor
+LibtorchGaussianActionDistribution::entropy() const
+{
+  const auto log_action_scale = torch::log(torch::abs(actionScaleTensor()));
+  constexpr Real pi = 3.14159265358979323846;
+  return 0.5 * std::log(2.0 * pi) + _log_std_tensor + 0.5 + log_action_scale;
+}
+
+LibtorchBetaActionDistribution::LibtorchBetaActionDistribution(
+    const std::string & name,
+    const unsigned int num_inputs,
+    const unsigned int num_outputs,
+    const std::vector<Real> & minimum_values,
+    const std::vector<Real> & maximum_values,
+    const torch::DeviceType device_type,
+    const torch::ScalarType data_type,
+    const bool build_on_construct,
+    const std::vector<Real> & output_scaling_factors)
+  : LibtorchActionDistribution(
+        name, num_inputs, num_outputs, device_type, data_type, output_scaling_factors),
+    _minimum_values(minimum_values),
+    _maximum_values(maximum_values)
+{
+  if (_minimum_values.size() != _num_outputs || _maximum_values.size() != _num_outputs)
+    mooseError("The number of minimum_values and maximum_values entries must match the number "
+               "of action outputs.");
+
+  for (const auto i : make_range(_minimum_values.size()))
+    if (!(_maximum_values[i] > _minimum_values[i]))
+      mooseError("maximum_values entries must be strictly greater than minimum_values entries.");
+
+  auto min_value = _minimum_values;
+  LibtorchUtils::vectorToTensor(min_value, _min_tensor);
+  _min_tensor = _min_tensor.transpose(0, 1).to(_data_type).to(_device_type);
+  auto max_value = _maximum_values;
+  LibtorchUtils::vectorToTensor(max_value, _max_tensor);
+  _max_tensor = _max_tensor.transpose(0, 1).to(_data_type).to(_device_type);
+
+  if (build_on_construct)
+    constructDistribution();
+}
+
+void
+LibtorchBetaActionDistribution::constructDistribution()
+{
+  _alpha_module = register_module(
+      "alpha", torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true)));
+  _beta_module = register_module(
+      "beta", torch::nn::Linear(torch::nn::LinearOptions(_num_inputs, _num_outputs).bias(true)));
+
+  _alpha_module->to(_device_type, _data_type);
+  _beta_module->to(_device_type, _data_type);
+}
+
+void
+LibtorchBetaActionDistribution::initialize(const c10::optional<at::Generator> generator)
+{
+  Moose::orthogonalInitializeTensor(_alpha_module->weight, 1.0, generator);
+  torch::nn::init::zeros_(_alpha_module->bias);
+
+  Moose::orthogonalInitializeTensor(_beta_module->weight, 1.0, generator);
+  torch::nn::init::zeros_(_beta_module->bias);
+}
+
+void
+LibtorchBetaActionDistribution::reset(const torch::Tensor & input)
+{
+  const auto features = prepareFeatures(input);
+  const auto alpha = _alpha_module->forward(features);
+  _alpha_tensor = torch::log(torch::exp(alpha) + 1.0) + 1.0;
+  const auto beta = _beta_module->forward(features);
+  _beta_tensor = torch::log(torch::exp(beta) + 1.0) + 1.0;
+
+  _alpha_beta_tensor = torch::clamp_min(_alpha_tensor + _beta_tensor, 1e-8);
+  _mean = _alpha_tensor / _alpha_beta_tensor;
+  _log_norm = at::lgamma(_alpha_tensor) + at::lgamma(_beta_tensor) - at::lgamma(_alpha_beta_tensor);
+}
+
+torch::Tensor
+LibtorchBetaActionDistribution::sample(const c10::optional<at::Generator> generator) const
+{
+  const auto alpha_sample = at::_standard_gamma(_alpha_tensor, generator);
+  const auto beta_sample = at::_standard_gamma(_beta_tensor, generator);
+  const auto sampled = alpha_sample / (alpha_sample + beta_sample);
+  return (_min_tensor + (_max_tensor - _min_tensor) * sampled) * actionScaleTensor();
+}
+
+torch::Tensor
+LibtorchBetaActionDistribution::deterministicAction() const
+{
+  return (_min_tensor + (_max_tensor - _min_tensor) * _mean) * actionScaleTensor();
+}
+
+torch::Tensor
+LibtorchBetaActionDistribution::logProbability(const torch::Tensor & action) const
+{
+  const auto scaled_action = prepareAction(action);
+  const auto log_action_scale = torch::log(torch::abs(actionScaleTensor()));
+  const auto unscaled_action = scaled_action / actionScaleTensor();
+  const auto scale = torch::clamp_min(_max_tensor - _min_tensor, 1e-8);
+  const auto normalized = (unscaled_action - _min_tensor) / scale;
+  const auto clipped = torch::clamp(normalized, 1e-8, 1.0 - 1e-8);
+  auto log_prob = (_alpha_tensor - 1.0) * torch::log(clipped) +
+                  (_beta_tensor - 1.0) * torch::log1p(-clipped) - _log_norm - torch::log(scale) -
+                  log_action_scale;
+
+  const auto out_of_bounds = (normalized < 0.0) | (normalized > 1.0);
+  if (out_of_bounds.any().item<bool>())
+    log_prob = torch::where(out_of_bounds,
+                            torch::full_like(log_prob, -std::numeric_limits<Real>::infinity()),
+                            log_prob);
+
+  return log_prob;
+}
+
+torch::Tensor
+LibtorchBetaActionDistribution::entropy() const
+{
+  const auto log_action_scale = torch::log(torch::abs(actionScaleTensor()));
+  const auto scale = torch::clamp_min(_max_tensor - _min_tensor, 1e-8);
+  return _log_norm - (_beta_tensor - 1.0) * torch::digamma(_beta_tensor) -
+         (_alpha_tensor - 1.0) * torch::digamma(_alpha_tensor) +
+         (_alpha_beta_tensor - 2.0) * torch::digamma(_alpha_beta_tensor) + torch::log(scale) +
+         log_action_scale;
+}
+
+} // namespace Moose
+
+#endif
diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C
new file mode 100644
index 000000000000..036517259627
--- /dev/null
+++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchActorNeuralNet.C
@@ -0,0 +1,348 @@
+//* This file is part of the MOOSE framework
+//* https://www.mooseframework.org
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#ifdef MOOSE_LIBTORCH_ENABLED
+
+#include "LibtorchActorNeuralNet.h"
+#include "LibtorchRandomUtils.h"
+#include "MooseError.h"
+#include "libmesh/utility.h"
+
+namespace
+{
+
+template <typename NamedTensorList>
+auto
+captureTensorShapes(const NamedTensorList & tensors)
+{
+  std::vector<std::pair<std::string, std::vector<int64_t>>> shapes;
+  shapes.reserve(tensors.size());
+
+  for (const auto & tensor : tensors)
+    shapes.emplace_back(
+        tensor.key(),
+        std::vector<int64_t>(tensor.value().sizes().begin(), tensor.value().sizes().end()));
+
+  return shapes;
+}
+
+template <typename NamedTensorList>
+void
+verifyTensorShapes(const NamedTensorList & tensors,
+                   const std::vector<std::pair<std::string, std::vector<int64_t>>> & expected,
+                   const char * tensor_kind)
+{
+  if (tensors.size() != expected.size())
+    mooseError("The loaded DRL actor ", tensor_kind, " count does not match the generated schema.");
+
+  for (const auto tensor_i : make_range(tensors.size()))
+  {
+    const auto actual_shape = std::vector<int64_t>(tensors[tensor_i].value().sizes().begin(),
+                                                   tensors[tensor_i].value().sizes().end());
+
+    if (tensors[tensor_i].key() != expected[tensor_i].first ||
+        actual_shape != expected[tensor_i].second)
+      mooseError("The loaded DRL actor ",
+                 tensor_kind,
+                 " '",
+                 tensors[tensor_i].key(),
+                 "' does not match the generated schema.");
+  }
+}
+
+} // namespace
+
+namespace Moose
+{
+
+LibtorchActorNeuralNet::LibtorchActorNeuralNet(
+    const std::string name,
+    const unsigned int num_inputs,
+    const unsigned int num_outputs,
+    const std::vector<unsigned int> & num_neurons_per_layer,
+    const std::vector<std::string> & activation_function,
+    const std::vector<Real> & minimum_values,
+    const std::vector<Real> & maximum_values,
+    const torch::DeviceType device_type,
+    const torch::ScalarType data_type,
+    const bool build_on_construct,
+    const std::vector<Real> & input_shift_factors,
+    const std::vector<Real> & input_scaling_factors,
+    const std::vector<Real> & output_scaling_factors,
+    const bool state_independent_std)
+  : LibtorchArtificialNeuralNet(name,
+                                num_inputs,
+                                num_outputs,
+                                num_neurons_per_layer,
+                                activation_function,
+                                device_type,
+                                data_type,
+                                false,
+                                input_shift_factors,
+                                input_scaling_factors,
+                                output_scaling_factors),
+    _minimum_values(minimum_values),
+    _maximum_values(maximum_values),
+    _state_independent_std(state_independent_std)
+{
+  const bool has_minimum_values = !_minimum_values.empty();
+  const bool has_maximum_values = !_maximum_values.empty();
+  if (has_minimum_values != has_maximum_values)
+    mooseError("Bounded action distributions require both minimum_values and maximum_values.");
+
+  if (has_minimum_values)
+  {
+    if (_minimum_values.size() != _num_outputs || _maximum_values.size() != _num_outputs)
+      mooseError("The number of minimum_values and maximum_values entries must match the number "
+                 "of action outputs.");
+
+    for (const auto i : make_range(_minimum_values.size()))
+      if (!(_maximum_values[i] > _minimum_values[i]))
+        mooseError("maximum_values entries must be strictly greater than minimum_values entries.");
+  }
+
+  if (build_on_construct)
+    constructNeuralNetwork();
+}
+
+LibtorchActorNeuralNet::LibtorchActorNeuralNet(const Moose::LibtorchActorNeuralNet & nn,
+                                               const bool build_on_construct)
+  : LibtorchArtificialNeuralNet(dynamic_cast<const LibtorchArtificialNeuralNet &>(nn), false),
+    _minimum_values(nn.minValues()),
+    _maximum_values(nn.maxValues()),
+    _state_independent_std(nn.stateIndependentStd())
+{
+  // We construct the NN architecture
+  if (build_on_construct)
+  {
+    constructNeuralNetwork();
+    // We fill it up with the current parameter values
+    const auto & from_params = nn.named_parameters();
+    auto to_params = this->named_parameters();
+    for (unsigned int param_i : make_range(from_params.size()))
+      to_params[param_i].value().data() = from_params[param_i].value().data().clone();
+
+    const auto & from_buffers = nn.named_buffers();
+    auto to_buffers = this->named_buffers();
+    for (unsigned int buffer_i : make_range(from_buffers.size()))
+      to_buffers[buffer_i].value().data() = from_buffers[buffer_i].value().data().clone();
+  }
+}
+
+void
+LibtorchActorNeuralNet::initializeNeuralNetwork(const c10::optional<at::Generator> generator)
+{
+  for (unsigned int i = 0; i < numHiddenLayers(); ++i)
+  {
+    const auto & activation =
+        _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0];
+    const Real gain = determineGain(activation);
+    Moose::orthogonalInitializeTensor(_weights[i]->weight, gain, generator);
+    torch::nn::init::zeros_(_weights[i]->bias);
+  }
+
+  _action_distribution->initialize(generator);
+}
+
+void
+LibtorchActorNeuralNet::constructNeuralNetwork()
+{
+  // Adding hidden layers
+  unsigned int inp_neurons = _num_inputs;
+  for (unsigned int i = 0; i < numHiddenLayers(); ++i)
+  {
+    std::unordered_map<std::string, unsigned int> parameters = {
+        {"inp_neurons", inp_neurons}, {"out_neurons", _num_neurons_per_layer[i]}};
+    addLayer("hidden_layer_" + std::to_string(i + 1), parameters);
+
+    // Necessary to retain double precision (and error-free runs)
+    _weights[i]->to(_device_type, _data_type);
+    inp_neurons = _num_neurons_per_layer[i];
+  }
+
+  if (_minimum_values.empty() && _maximum_values.empty())
+    _action_distribution =
+        std::make_shared<LibtorchGaussianActionDistribution>("action_distribution",
+                                                             inp_neurons,
+                                                             _num_outputs,
+                                                             _device_type,
+                                                             _data_type,
+                                                             true,
+                                                             _output_scaling_factors,
+                                                             _state_independent_std);
+  else
+    _action_distribution =
+        std::make_shared<LibtorchBetaActionDistribution>("action_distribution",
+                                                         inp_neurons,
+                                                         _num_outputs,
+                                                         _minimum_values,
+                                                         _maximum_values,
+                                                         _device_type,
+                                                         _data_type,
+                                                         true,
+                                                         _output_scaling_factors);
+
+  // Keep the serialized module name stable so existing checkpoints continue to load.
+  register_module("action_head", _action_distribution);
+}
+
+torch::Tensor
+LibtorchActorNeuralNet::entropy()
+{
+  return _action_distribution->entropy();
+}
+
+const LibtorchGaussianActionDistribution *
+LibtorchActorNeuralNet::gaussianActionDistributionPtr() const
+{
+  return dynamic_cast<const LibtorchGaussianActionDistribution *>(_action_distribution.get());
+}
+
+LibtorchGaussianActionDistribution *
+LibtorchActorNeuralNet::gaussianActionDistributionPtr()
+{
+  return dynamic_cast<LibtorchGaussianActionDistribution *>(_action_distribution.get());
+}
+
+const LibtorchGaussianActionDistribution &
+LibtorchActorNeuralNet::gaussianActionDistribution() const
+{
+  const auto * distribution = gaussianActionDistributionPtr();
+  if (!distribution)
+    mooseError("Requested a Gaussian action distribution from a bounded actor.");
+  return *distribution;
+}
+
+LibtorchGaussianActionDistribution &
+LibtorchActorNeuralNet::gaussianActionDistribution()
+{
+  auto * distribution = gaussianActionDistributionPtr();
+  if (!distribution)
+    mooseError("Requested a Gaussian action distribution from a bounded actor.");
+  return *distribution;
+}
+
+const LibtorchBetaActionDistribution *
+LibtorchActorNeuralNet::betaActionDistributionPtr() const
+{
+  return dynamic_cast<const LibtorchBetaActionDistribution *>(_action_distribution.get());
+}
+
+LibtorchBetaActionDistribution *
+LibtorchActorNeuralNet::betaActionDistributionPtr()
+{
+  return dynamic_cast<LibtorchBetaActionDistribution *>(_action_distribution.get());
+}
+
+const LibtorchBetaActionDistribution &
+LibtorchActorNeuralNet::betaActionDistribution() const
+{
+  const auto * distribution = betaActionDistributionPtr();
+  if (!distribution)
+    mooseError("Requested a Beta action distribution from an unbounded actor.");
+  return *distribution;
+}
+
+LibtorchBetaActionDistribution &
+LibtorchActorNeuralNet::betaActionDistribution()
+{
+  auto * distribution = betaActionDistributionPtr();
+  if (!distribution)
+    mooseError("Requested a Beta action distribution from an unbounded actor.");
+  return *distribution;
+}
+
+void
+LibtorchActorNeuralNet::resetDistributionParams(torch::Tensor input)
+{
+  _action_distribution->reset(input);
+}
+
+torch::Tensor
+LibtorchActorNeuralNet::forward(const torch::Tensor & x)
+{
+  torch::Tensor output = preprocessInput(x);
+
+  for (unsigned int i = 0; i < _weights.size(); ++i)
+  {
+    std::string activation =
+        _activation_function.size() > 1 ? _activation_function[i] : _activation_function[0];
+    if (activation == "relu")
+      output = torch::relu(_weights[i]->forward(output));
+    else if (activation == "sigmoid")
+      output = torch::sigmoid(_weights[i]->forward(output));
+    else if (activation == "tanh")
+      output = torch::tanh(_weights[i]->forward(output));
+    else if (activation == "elu")
+      output = torch::elu(_weights[i]->forward(output));
+    else if (activation == "gelu")
+      output = torch::gelu(_weights[i]->forward(output));
+    else if (activation == "linear")
+      output = _weights[i]->forward(output);
+  }
+
+  return output;
+}
+
+torch::Tensor
+LibtorchActorNeuralNet::evaluate(torch::Tensor & x,
+                                 const bool sampled,
+                                 const c10::optional<at::Generator> generator)
+{
+  torch::Tensor output = forward(x);
+
+  resetDistributionParams(output);
+
+  if (sampled)
+    return sample(generator);
+
+  return _action_distribution->deterministicAction();
+}
+
+torch::Tensor
+LibtorchActorNeuralNet::sample(const c10::optional<at::Generator> generator)
+{
+  return _action_distribution->sample(generator);
+}
+
+torch::Tensor
+LibtorchActorNeuralNet::logProbability(const torch::Tensor & action)
+{
+  return _action_distribution->logProbability(action);
+}
+
+void
+loadLibtorchActorNeuralNetState(Moose::LibtorchActorNeuralNet & nn, const std::string & filename)
+{
+  try
+  {
+    const auto expected_parameters = captureTensorShapes(nn.named_parameters());
+    const auto expected_buffers = captureTensorShapes(nn.named_buffers());
+
+    torch::serialize::InputArchive archive;
+    archive.load_from(filename);
+    nn.load(archive);
+
+    verifyTensorShapes(nn.named_parameters(), expected_parameters, "parameter");
+    verifyTensorShapes(nn.named_buffers(), expected_buffers, "buffer");
+
+    nn.synchronizeAffineFactorsFromBuffers();
+    nn.actionDistribution().synchronizeScalingFactorsFromBuffer();
+  }
+  catch (const c10::Error & e)
+  {
+    mooseError("The requested DRL actor checkpoint could not be loaded as a native libtorch "
+               "archive. Make sure the file exists and matches the generated actor schema.\n",
+               e.msg());
+  }
+}
+
+}
+
+#endif
diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchRLMiniBatchSampler.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLMiniBatchSampler.C
new file mode 100644
index 000000000000..f25b6fefe9d5
--- /dev/null
+++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLMiniBatchSampler.C
@@ -0,0 +1,92 @@
+//* This file is part of the MOOSE framework
+//* https://mooseframework.inl.gov
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#ifdef MOOSE_LIBTORCH_ENABLED
+
+#include "LibtorchRLMiniBatchSampler.h"
+
+#include "MooseError.h"
+
+#include <algorithm>
+#include <string_view>
+
+std::vector<LibtorchRLMiniBatch>
+LibtorchRLMiniBatchSampler::sample(const LibtorchRLTrajectoryBuffer::TensorBatch & batch,
+                                   const unsigned int batch_size,
+                                   const bool standardize_advantage,
+                                   const c10::optional<at::Generator> generator) const
+{
+  std::vector<LibtorchRLMiniBatch> mini_batches;
+
+  if (!batch.size())
+    return mini_batches;
+
+  validateBatch(batch);
+
+  const auto effective_batch_size = std::max<unsigned int>(1, batch_size);
+  auto permutation =
+      at::randperm(batch.size(), generator, torch::TensorOptions().dtype(torch::kLong));
+
+  for (std::int64_t batch_begin = 0; batch_begin < batch.size();
+       batch_begin += effective_batch_size)
+  {
+    const auto batch_end = std::min<std::int64_t>(
+        batch.size(), batch_begin + static_cast<std::int64_t>(effective_batch_size));
+    const auto indices = permutation.narrow(0, batch_begin, batch_end - batch_begin);
+    mini_batches.push_back(makeMiniBatch(batch, indices, standardize_advantage));
+  }
+
+  return mini_batches;
+}
+
+void
+LibtorchRLMiniBatchSampler::validateBatch(const LibtorchRLTrajectoryBuffer::TensorBatch & batch)
+{
+  if (!batch.actions.defined() || !batch.log_probabilities.defined() ||
+      !batch.value_targets.defined() || !batch.advantages.defined())
+    mooseError("RL tensor batches must define observations, actions, log probabilities, value "
+               "targets, and advantages before mini-batch sampling.");
+
+  const auto batch_size = batch.size();
+  const auto validate_rows = [batch_size](const torch::Tensor & tensor, std::string_view name)
+  {
+    if (!tensor.defined() || tensor.size(0) != batch_size)
+      mooseError(
+          "RL tensor batch field ", name, " must have the same number of rows as observations.");
+  };
+
+  validate_rows(batch.actions, "actions");
+  validate_rows(batch.log_probabilities, "log_probabilities");
+  validate_rows(batch.value_targets, "value_targets");
+  validate_rows(batch.advantages, "advantages");
+}
+
+LibtorchRLMiniBatch
+LibtorchRLMiniBatchSampler::makeMiniBatch(const LibtorchRLTrajectoryBuffer::TensorBatch & batch,
+                                          const torch::Tensor & indices,
+                                          const bool standardize_advantage)
+{
+  LibtorchRLMiniBatch mini_batch;
+
+  mini_batch.observations = batch.observations.index({indices});
+  mini_batch.actions = batch.actions.index({indices});
+  mini_batch.old_log_probabilities = batch.log_probabilities.index({indices});
+  mini_batch.value_targets = batch.value_targets.index({indices});
+  mini_batch.advantages = batch.advantages.index({indices});
+
+  if (standardize_advantage)
+  {
+    const auto std = mini_batch.advantages.std(false);
+    mini_batch.advantages = (mini_batch.advantages - mini_batch.advantages.mean()) / (std + 1e-10);
+  }
+
+  return mini_batch;
+}
+
+#endif
diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchRLPPOLoss.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLPPOLoss.C
new file mode 100644
index 000000000000..51d36135aaaa
--- /dev/null
+++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLPPOLoss.C
@@ -0,0 +1,60 @@
+//* This file is part of the MOOSE framework
+//* https://mooseframework.inl.gov
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#ifdef MOOSE_LIBTORCH_ENABLED
+
+#include "LibtorchRLPPOLoss.h"
+
+LibtorchRLPPOLoss::LibtorchRLPPOLoss(const Real clip_parameter, const Real entropy_coeff)
+  : _clip_parameter(clip_parameter), _entropy_coeff(entropy_coeff)
+{
+}
+
+LibtorchRLPPOLossOutput
+LibtorchRLPPOLoss::compute(Moose::LibtorchActorNeuralNet & policy_network,
+                           Moose::LibtorchArtificialNeuralNet & value_network,
+                           const LibtorchRLMiniBatch & batch) const
+{
+  auto observations = batch.observations;
+  policy_network.evaluate(observations, false);
+
+  const auto current_log_probability =
+      reduceActionDimension(policy_network.logProbability(batch.actions));
+  const auto previous_log_probability = reduceActionDimension(batch.old_log_probabilities);
+  const auto entropy = reduceActionDimension(policy_network.entropy());
+
+  const auto ratio = (current_log_probability - previous_log_probability).exp();
+  const auto surr1 = ratio * batch.advantages;
+  const auto surr2 =
+      torch::clamp(ratio, 1.0 - _clip_parameter, 1.0 + _clip_parameter) * batch.advantages;
+
+  LibtorchRLPPOLossOutput output;
+  output.actor_loss = -(torch::min(surr1, surr2) + _entropy_coeff * entropy).mean();
+  output.critic_loss =
+      torch::mse_loss(value_network.forward(batch.observations), batch.value_targets);
+  output.entropy = entropy.mean();
+  return output;
+}
+
+torch::Tensor
+LibtorchRLPPOLoss::reduceActionDimension(const torch::Tensor & tensor)
+{
+  if (!tensor.defined())
+    return tensor;
+
+  if (tensor.dim() == 1)
+    return tensor.unsqueeze(1);
+
+  if (tensor.size(-1) == 1)
+    return tensor;
+
+  return tensor.sum(-1, true);
+}
+
+#endif
diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchRLTrajectoryBuffer.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLTrajectoryBuffer.C
new file mode 100644
index 000000000000..d6ee0a60da81
--- /dev/null
+++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLTrajectoryBuffer.C
@@ -0,0 +1,166 @@
+//* This file is part of the MOOSE framework
+//* https://mooseframework.inl.gov
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#ifdef MOOSE_LIBTORCH_ENABLED
+
+#include "LibtorchRLTrajectoryBuffer.h"
+
+#include "MooseError.h"
+
+#include "libmesh/utility.h"
+
+namespace
+{
+
+torch::Tensor
+bufferMatrixToTensor(const std::vector<std::vector<Real>> & rows)
+{
+  if (rows.empty())
+    return {};
+
+  const auto num_columns = rows.front().size();
+  auto tensor = torch::zeros({static_cast<long>(rows.size()), static_cast<long>(num_columns)},
+                             torch::TensorOptions().dtype(torch::kDouble));
+  auto accessor = tensor.accessor<Real, 2>();
+
+  for (const auto row_i : make_range(rows.size()))
+  {
+    if (rows[row_i].size() != num_columns)
+      mooseError("All rows must have the same number of entries when flattening an RL batch.");
+
+    for (const auto column_i : make_range(num_columns))
+      accessor[row_i][column_i] = rows[row_i][column_i];
+  }
+
+  return tensor;
+}
+
+torch::Tensor
+bufferVectorToColumnTensor(const std::vector<Real> & values)
+{
+  if (values.empty())
+    return {};
+
+  auto tensor = torch::zeros({static_cast<long>(values.size()), 1},
+                             torch::TensorOptions().dtype(torch::kDouble));
+  auto accessor = tensor.accessor<Real, 2>();
+
+  for (const auto value_i : make_range(values.size()))
+    accessor[value_i][0] = values[value_i];
+
+  return tensor;
+}
+
+} // namespace
+
+void
+LibtorchRLTrajectoryBuffer::addTrajectory(Trajectory trajectory)
+{
+  validateTrajectory(trajectory);
+  _trajectories.push_back(std::move(trajectory));
+}
+
+void
+LibtorchRLTrajectoryBuffer::clear()
+{
+  _trajectories.clear();
+}
+
+std::size_t
+LibtorchRLTrajectoryBuffer::numTransitions() const
+{
+  std::size_t transitions = 0;
+  for (const auto & trajectory : _trajectories)
+    transitions += trajectory.rewards.size();
+  return transitions;
+}
+
+LibtorchRLTrajectoryBuffer::TensorBatch
+LibtorchRLTrajectoryBuffer::flatten() const
+{
+  TensorBatch batch;
+
+  if (_trajectories.empty())
+    return batch;
+
+  std::vector<std::vector<Real>> observations;
+  std::vector<std::vector<Real>> next_observations;
+  std::vector<std::vector<Real>> actions;
+  std::vector<std::vector<Real>> log_probabilities;
+  std::vector<Real> rewards;
+  std::vector<Real> value_targets;
+  std::vector<Real> advantages;
+
+  observations.reserve(numTransitions());
+  next_observations.reserve(numTransitions());
+  actions.reserve(numTransitions());
+  log_probabilities.reserve(numTransitions());
+  rewards.reserve(numTransitions());
+  value_targets.reserve(numTransitions());
+  advantages.reserve(numTransitions());
+
+  for (const auto & trajectory : _trajectories)
+  {
+    if (!trajectory.value_targets.empty() &&
+        trajectory.value_targets.size() != trajectory.rewards.size())
+      mooseError("Value targets must match the reward length of the trajectory.");
+    if (!trajectory.advantages.empty() && trajectory.advantages.size() != trajectory.rewards.size())
+      mooseError("Advantages must match the reward length of the trajectory.");
+
+    observations.insert(
+        observations.end(), trajectory.observations.begin(), trajectory.observations.end());
+    next_observations.insert(next_observations.end(),
+                             trajectory.next_observations.begin(),
+                             trajectory.next_observations.end());
+    actions.insert(actions.end(), trajectory.actions.begin(), trajectory.actions.end());
+    log_probabilities.insert(log_probabilities.end(),
+                             trajectory.log_probabilities.begin(),
+                             trajectory.log_probabilities.end());
+    rewards.insert(rewards.end(), trajectory.rewards.begin(), trajectory.rewards.end());
+    value_targets.insert(
+        value_targets.end(), trajectory.value_targets.begin(), trajectory.value_targets.end());
+    advantages.insert(advantages.end(), trajectory.advantages.begin(), trajectory.advantages.end());
+  }
+
+  batch.observations = bufferMatrixToTensor(observations);
+  batch.next_observations = bufferMatrixToTensor(next_observations);
+  batch.actions = bufferMatrixToTensor(actions);
+  batch.log_probabilities = bufferMatrixToTensor(log_probabilities);
+  batch.rewards = bufferVectorToColumnTensor(rewards);
+  batch.value_targets = bufferVectorToColumnTensor(value_targets);
+  batch.advantages = bufferVectorToColumnTensor(advantages);
+
+  return batch;
+}
+
+void
+LibtorchRLTrajectoryBuffer::validateTrajectory(const Trajectory & trajectory)
+{
+  const auto num_steps = trajectory.rewards.size();
+
+  if (trajectory.observations.size() != num_steps)
+    mooseError("RL trajectory observations must match the reward sequence length.");
+
+  if (trajectory.next_observations.size() != num_steps)
+    mooseError("RL trajectory next observations must match the reward sequence length.");
+
+  if (trajectory.actions.size() != num_steps)
+    mooseError("RL trajectory actions must match the reward sequence length.");
+
+  if (trajectory.log_probabilities.size() != num_steps)
+    mooseError("RL trajectory log probabilities must match the reward sequence length.");
+
+  if (!trajectory.value_targets.empty() && trajectory.value_targets.size() != num_steps)
+    mooseError("RL trajectory value targets must match the reward sequence length.");
+
+  if (!trajectory.advantages.empty() && trajectory.advantages.size() != num_steps)
+    mooseError("RL trajectory advantages must match the reward sequence length.");
+}
+
+#endif
diff --git a/modules/stochastic_tools/src/libtorch/utils/LibtorchRLValueEstimator.C b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLValueEstimator.C
new file mode 100644
index 000000000000..dd87f584cf29
--- /dev/null
+++ b/modules/stochastic_tools/src/libtorch/utils/LibtorchRLValueEstimator.C
@@ -0,0 +1,104 @@
+//* This file is part of the MOOSE framework
+//* https://mooseframework.inl.gov
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#ifdef MOOSE_LIBTORCH_ENABLED
+
+#include "LibtorchRLValueEstimator.h"
+
+#include "LibtorchUtils.h"
+
+#include "libmesh/utility.h"
+
+namespace
+{
+
+torch::Tensor
+valueEstimatorMatrixToTensor(const std::vector<std::vector<Real>> & rows)
+{
+  if (rows.empty())
+    return {};
+
+  const auto num_columns = rows.front().size();
+  auto tensor = torch::zeros({static_cast<long>(rows.size()), static_cast<long>(num_columns)},
+                             torch::TensorOptions().dtype(torch::kDouble));
+  auto accessor = tensor.accessor<Real, 2>();
+
+  for (const auto row_i : make_range(rows.size()))
+    for (const auto column_i : make_range(num_columns))
+      accessor[row_i][column_i] = rows[row_i][column_i];
+
+  return tensor;
+}
+
+} // namespace
+
+LibtorchRLValueEstimator::LibtorchRLValueEstimator(const Real discount_factor,
+                                                   const Real lambda_factor)
+  : _discount_factor(discount_factor), _lambda_factor(lambda_factor)
+{
+}
+
+void
+LibtorchRLValueEstimator::computeValueTargets(
+    LibtorchRLTrajectoryBuffer & buffer, Moose::LibtorchArtificialNeuralNet & value_network) const
+{
+  for (auto & trajectory : buffer.trajectories())
+  {
+    const auto targets = estimate(trajectory, value_network);
+    trajectory.advantages = targets.advantages;
+    trajectory.value_targets = targets.value_targets;
+  }
+}
+
+LibtorchRLValueEstimator::Targets
+LibtorchRLValueEstimator::estimate(const LibtorchRLTrajectoryBuffer::Trajectory & trajectory,
+                                   Moose::LibtorchArtificialNeuralNet & value_network) const
+{
+  Targets targets;
+
+  const auto values = evaluate(trajectory.observations, value_network);
+  const auto next_values = evaluate(trajectory.next_observations, value_network);
+
+  const auto num_steps = trajectory.rewards.size();
+  targets.advantages.resize(num_steps, 0.0);
+  targets.value_targets.resize(num_steps, 0.0);
+
+  Real gae = 0.0;
+  for (const auto reverse_step : make_range(num_steps))
+  {
+    const auto step = num_steps - reverse_step - 1;
+    const auto delta =
+        trajectory.rewards[step] + _discount_factor * next_values[step] - values[step];
+    gae = delta + _discount_factor * _lambda_factor * gae;
+    targets.advantages[step] = gae;
+    targets.value_targets[step] = gae + values[step];
+  }
+
+  return targets;
+}
+
+std::vector<Real>
+LibtorchRLValueEstimator::evaluate(const std::vector<std::vector<Real>> & observations,
+                                   Moose::LibtorchArtificialNeuralNet & value_network) const
+{
+  if (observations.empty())
+    return {};
+
+  torch::NoGradGuard no_grad;
+
+  auto tensor = valueEstimatorMatrixToTensor(observations);
+  auto value_tensor = value_network.forward(tensor);
+  auto flattened_value_tensor = value_tensor.reshape({-1});
+
+  std::vector<Real> values;
+  LibtorchUtils::tensorToVector(flattened_value_tensor, values);
+  return values;
+}
+
+#endif
diff --git a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/gold/parameter_read.csv b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/gold/parameter_read.csv
index 454a22e116dc..cd7202f5163b 100644
--- a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/gold/parameter_read.csv
+++ b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/gold/parameter_read.csv
@@ -1,12 +1,12 @@
 time,center_temp,center_temp_tend,env_temp,left_flux,log_prob_left_flux
 0,300,300,273,0,0
-1800,300,270.35724380953,273.98104693845,-0.13793264034647,1.1115979988596
-3600,270.35724380953,274.51295145974,274.9578928833,-0.016781115951807,-0.50967346391778
-5400,274.51295145974,270.06590276071,275.92635483024,-0.22228241099481,1.1389673700336
-7200,270.06590276071,269.76297060624,276.88228567654,-0.26991716569164,0.53565739363602
-9000,269.76297060624,272.93107942871,277.82159197955,-0.18534271617753,1.2519275533428
-10800,272.93107942871,274.5965017921,278.74025148548,-0.15706446553999,1.3472992379419
-12600,274.5965017921,276.7588668075,279.63433035329,-0.10897031971508,1.353343147668
-14400,276.7588668075,278.0802103911,280.5,-0.091713226427664,1.2665547503991
-16200,278.0802103911,278.66431523589,281.33355349529,-0.10118636668795,1.2822084987121
-18000,278.66431523589,275.59443714345,282.13142143513,-0.24790007933615,0.92588151005597
+1800,300,283.69849346609,273.98104693845,73.630502862132,-5.8011541981082
+3600,283.69849346609,300.67435328161,274.9578928833,195.06575617849,-7.4224257273223
+5400,300.67435328161,267.4643690803,275.92635483024,-64.308701931811,-5.7737848258126
+7200,267.4643690803,259.55941422837,276.88228567654,-131.38405718619,-6.3770948269327
+9000,259.55941422837,272.14524323322,277.82159197955,-42.985660495371,-5.6608246378745
+10800,272.14524323322,275.68007219983,278.74025148548,-23.188639562158,-5.5654529493672
+12600,275.68007219983,282.82714752336,279.63433035329,24.239841099254,-5.5594090393934
+14400,282.82714752336,286.84577029599,280.5,48.132997254754,-5.6461974402188
+16200,286.84577029599,287.23729606816,281.33355349529,44.765462438375,-5.6305436912643
+18000,287.23729606816,269.39326109815,282.13142143513,-96.664605550218,-5.9868706945221
diff --git a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/libtorch_drl_control.i b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/libtorch_drl_control.i
index e4621ac75239..e85f3d7fc132 100644
--- a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/libtorch_drl_control.i
+++ b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/libtorch_drl_control.i
@@ -93,17 +93,15 @@
     seed = 11
     type = LibtorchDRLControl
     parameters = "BCs/left_flux/value"
-    responses = 'center_temp env_temp'
+    observations = 'center_temp env_temp'
 
     input_timesteps = 2
-    response_scaling_factors = '0.03 0.03'
-    response_shift_factors = '270 270'
-    action_standard_deviations = '0.1'
-    action_scaling_factors = 200
+    observation_scaling_factors = '0.03 0.03'
+    observation_shift_factors = '270 270'
+    action_scaling_factors = 100
 
     filename = 'mynet_control.net'
-    torch_script_format = false
-    num_neurons_per_layer = '16 6'
+    num_neurons_per_layer = '4 2'
     activation_function = 'relu'
 
     execute_on = 'TIMESTEP_BEGIN'
@@ -111,12 +109,11 @@
   [src_control_empty]
     type = LibtorchDRLControl
     parameters = "BCs/left_flux/value"
-    responses = 'center_temp env_temp'
+    observations = 'center_temp env_temp'
 
     input_timesteps = 2
-    response_scaling_factors = '0.03 0.03'
-    response_shift_factors = '270 270'
-    action_standard_deviations = '0.1'
+    observation_scaling_factors = '0.03 0.03'
+    observation_shift_factors = '270 270'
     action_scaling_factors = 100
 
     execute_on = 'TIMESTEP_BEGIN'
diff --git a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/mynet_control.net b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/mynet_control.net
index 66f9c6f3bd3e..ea50e03c46ef 100644
Binary files a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/mynet_control.net and b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/mynet_control.net differ
diff --git a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/tests b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/tests
index 551b53122de5..add6e3b26270 100644
--- a/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/tests
+++ b/modules/stochastic_tools/test/tests/controls/libtorch_drl_control/tests
@@ -10,6 +10,31 @@
                   "it to control a transient process."
     capabilities = 'libtorch'
   []
+  [read-parameters-half-transient]
+    type = RunApp
+    input = libtorch_drl_control.i
+    cli_args = "Outputs/file_base='parameter_read' Outputs/checkpoint=true "
+               "--test-checkpoint-half-transient"
+    recover = false
+    restep = false
+    prereq = read-parameters
+    requirement = "The system shall be able to recover a seeded DRL controller from a checkpointed "
+                  "transient."
+    capabilities = 'libtorch'
+  []
+  [read-parameters-recover]
+    type = CSVDiff
+    input = libtorch_drl_control.i
+    csvdiff = parameter_read.csv
+    cli_args = "Outputs/file_base='parameter_read' --recover parameter_read_cp/LATEST"
+    recover = false
+    restep = false
+    delete_output_before_running = false
+    prereq = read-parameters-half-transient
+    requirement = "The system shall reproduce the same transient control history after recovering "
+                  "a seeded DRL controller from a checkpoint."
+    capabilities = 'libtorch'
+  []
   [without-nn]
     type = CSVDiff
     input = libtorch_drl_control.i
@@ -21,4 +46,12 @@
                   "initialized in it."
     capabilities = 'libtorch'
   []
+  [invalid-execute-on]
+    type = RunException
+    input = libtorch_drl_control.i
+    cli_args = "Controls/src_control/execute_on='TIMESTEP_END'"
+    expect_err = "LibtorchDRLControl only supports 'TIMESTEP_BEGIN' for 'execute_on'."
+    requirement = "The system shall reject DRL controller execute flags other than timestep begin."
+    capabilities = 'libtorch'
+  []
 []
diff --git a/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/control.net_best b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/control.net_best
new file mode 100644
index 000000000000..45792df0dce1
Binary files /dev/null and b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/control.net_best differ
diff --git a/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_sub.i b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_sub.i
index 0352b4cea3a1..0efff026d328 100644
--- a/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_sub.i
+++ b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_sub.i
@@ -121,13 +121,12 @@
   [src_control]
     type = LibtorchDRLControl
     parameters = "BCs/left_flux/value"
-    responses = 'center_temp env_temp'
+    observations = 'center_temp env_temp'
 
     # keep consistent with LibtorchDRLControlTrainer
     input_timesteps = 2
-    response_scaling_factors = '0.03 0.03'
-    response_shift_factors = '270 270'
-    action_standard_deviations = '0.1'
+    observation_scaling_factors = '0.03 0.03'
+    observation_shift_factors = '270 270'
     action_scaling_factors = 100
 
     execute_on = 'TIMESTEP_BEGIN'
diff --git a/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_trainer.i b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_trainer.i
index 1660e9ed3aa1..f13f07fa8565 100644
--- a/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_trainer.i
+++ b/modules/stochastic_tools/test/tests/transfers/libtorch_nn_transfer/libtorch_drl_control_trainer.i
@@ -24,20 +24,21 @@
     control_name = src_control
   []
   [r_transfer]
-    type = MultiAppReporterTransfer
+    type = SamplerReporterTransfer
     from_multi_app = runner
-    to_reporters = 'results/center_temp results/env_temp results/reward results/left_flux results/log_prob_left_flux'
-    from_reporters = 'T_reporter/center_temp_tend:value T_reporter/env_temp:value T_reporter/reward:value T_reporter/left_flux:value T_reporter/log_prob_left_flux:value'
+    sampler = dummy
+    stochastic_reporter = storage
+    from_reporter = 'T_reporter/center_temp_tend:value T_reporter/env_temp:value T_reporter/reward:value T_reporter/left_flux:value T_reporter/log_prob_left_flux:value'
   []
 []
 
 [Trainers]
   [nn_trainer]
     type = LibtorchDRLControlTrainer
-    response = 'results/center_temp results/env_temp'
-    control = 'results/left_flux'
-    log_probability = 'results/log_prob_left_flux'
-    reward = 'results/reward'
+    observation = 'storage/r_transfer:T_reporter:center_temp_tend:value storage/r_transfer:T_reporter:env_temp:value'
+    control = 'storage/r_transfer:T_reporter:left_flux:value'
+    log_probability = 'storage/r_transfer:T_reporter:log_prob_left_flux:value'
+    reward = 'storage/r_transfer:T_reporter:reward:value'
 
     num_epochs = 10
     update_frequency = 2
@@ -53,21 +54,20 @@
 
     # keep consistent with LibtorchNeuralNetControl
     input_timesteps = 2
-    response_scaling_factors = '0.03 0.03'
-    response_shift_factors = '270 270'
-    action_standard_deviations = '0.1'
+    observation_scaling_factors = '0.03 0.03'
+    observation_shift_factors = '270 270'
+    action_scaling_factors = 100
 
     read_from_file = false
+    shift_outputs = false
   []
 []
 
 [Reporters]
-  [results]
-    type = ConstantReporter
-    real_vector_names = 'center_temp env_temp reward left_flux log_prob_left_flux'
-    real_vector_values = '0; 0; 0; 0; 0'
-    outputs = 'csv_out'
-    execute_on = timestep_begin
+  [storage]
+    type = StochasticReporter
+    parallel_type = ROOT
+    outputs = none
   []
   [nn_parameters]
     type = DRLControlNeuralNetParameters
diff --git a/modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C b/modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C
new file mode 100644
index 000000000000..a5a2b77a5f30
--- /dev/null
+++ b/modules/stochastic_tools/unit/src/TestLibtorchActorNeuralNet.C
@@ -0,0 +1,421 @@
+//* This file is part of the MOOSE framework
+//* https://mooseframework.inl.gov
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#ifdef MOOSE_LIBTORCH_ENABLED
+
+#include "gtest/gtest.h"
+#include "LibtorchActorNeuralNet.h"
+#include "LibtorchRandomUtils.h"
+#include "MooseUnitUtils.h"
+
+#include <cmath>
+
+namespace
+{
+
+class TestableLibtorchArtificialNeuralNet : public Moose::LibtorchArtificialNeuralNet
+{
+public:
+  using Moose::LibtorchArtificialNeuralNet::_weights;
+  using Moose::LibtorchArtificialNeuralNet::LibtorchArtificialNeuralNet;
+};
+
+class TestableLibtorchActorNeuralNet : public Moose::LibtorchActorNeuralNet
+{
+public:
+  using Moose::LibtorchActorNeuralNet::_weights;
+  using Moose::LibtorchActorNeuralNet::LibtorchActorNeuralNet;
+};
+
+Real
+inverseSoftplusPlusOne(const Real target)
+{
+  return std::log(std::exp(target - 1.0) - 1.0);
+}
+
+} // namespace
+
+TEST(LibtorchActorNeuralNetTest, artificialNetAppliesAffineInputAndOutputScaling)
+{
+  TestableLibtorchArtificialNeuralNet network("test_ann",
+                                              2,
+                                              1,
+                                              {},
+                                              {"linear"},
+                                              torch::kCPU,
+                                              torch::kDouble,
+                                              true,
+                                              {1.0, 2.0},
+                                              {2.0, 3.0},
+                                              {10.0});
+
+  ASSERT_EQ(network._weights.size(), 1u);
+
+  network._weights[0]->weight.data().fill_(0.0);
+  network._weights[0]->weight.data()[0][0] = 1.0;
+  network._weights[0]->weight.data()[0][1] = -1.0;
+  network._weights[0]->bias.data().fill_(0.0);
+
+  auto input = torch::tensor({{2.0, 6.0}}, at::kDouble);
+  const Real actual = network.forward(input).item<Real>();
+
+  EXPECT_NEAR(actual, -100.0, 1e-12);
+}
+
+TEST(LibtorchActorNeuralNetTest, boundedBetaLogProbability)
+{
+  constexpr Real min_value = -2.0;
+  constexpr Real max_value = 4.0;
+  constexpr Real alpha_target = 2.3;
+  constexpr Real beta_target = 3.7;
+  constexpr Real action_value = 1.2;
+
+  TestableLibtorchActorNeuralNet network(
+      "test_beta", 1, 1, {1}, {"linear"}, {min_value}, {max_value});
+
+  ASSERT_EQ(network._weights.size(), 1u);
+
+  network._weights[0]->weight.data().fill_(0.0);
+  network._weights[0]->bias.data().fill_(1.0);
+  network.betaActionDistribution().alphaModule()->weight.data().fill_(
+      inverseSoftplusPlusOne(alpha_target));
+  network.betaActionDistribution().betaModule()->weight.data().fill_(
+      inverseSoftplusPlusOne(beta_target));
+
+  auto input = torch::zeros({1, 1}, at::kDouble);
+  network.evaluate(input, false);
+
+  const Real alpha = network.betaActionDistribution().alphaTensor().item<Real>();
+  const Real beta = network.betaActionDistribution().betaTensor().item<Real>();
+  const Real normalized = (action_value - min_value) / (max_value - min_value);
+  const Real log_norm = std::lgamma(alpha) + std::lgamma(beta) - std::lgamma(alpha + beta);
+  const Real expected = (alpha - 1.0) * std::log(normalized) +
+                        (beta - 1.0) * std::log1p(-normalized) - log_norm -
+                        std::log(max_value - min_value);
+
+  auto action = torch::tensor({{action_value}}, at::kDouble);
+  const Real actual = network.logProbability(action).item<Real>();
+
+  EXPECT_NEAR(actual, expected, 1e-12);
+}
+
+TEST(LibtorchActorNeuralNetTest, gaussianActorUsesPhysicalActionScalingAndStateIndependentStd)
+{
+  constexpr Real input_shift = 1.0;
+  constexpr Real input_scale = 2.0;
+  constexpr Real action_scale = 5.0;
+  const Real log_std = std::log(2.0);
+  constexpr Real physical_action = 20.0;
+  constexpr Real expected_deterministic_action = 15.0;
+
+  TestableLibtorchActorNeuralNet network("test_gaussian",
+                                         1,
+                                         1,
+                                         {},
+                                         {"linear"},
+                                         {},
+                                         {},
+                                         torch::kCPU,
+                                         torch::kDouble,
+                                         true,
+                                         {input_shift},
+                                         {input_scale},
+                                         {action_scale});
+
+  network.gaussianActionDistribution().meanModule()->weight.data().fill_(1.5);
+  network.gaussianActionDistribution().meanModule()->bias.data().fill_(0.0);
+  network.gaussianActionDistribution().stdModule()->weight.data().fill_(123.0);
+  network.gaussianActionDistribution().stdModule()->bias.data().fill_(log_std);
+
+  auto input = torch::tensor({{2.0}}, at::kDouble);
+  const Real deterministic_action = network.evaluate(input, false).item<Real>();
+  EXPECT_NEAR(deterministic_action, expected_deterministic_action, 1e-12);
+  EXPECT_NEAR(
+      network.gaussianActionDistribution().stdTensor().item<Real>(), std::exp(log_std), 1e-12);
+
+  const Real unscaled_mean = expected_deterministic_action / action_scale;
+  const Real unscaled_action = physical_action / action_scale;
+  const Real expected_log_probability =
+      -std::pow(unscaled_action - unscaled_mean, 2) / (2.0 * 4.0) - log_std -
+      0.5 * std::log(2.0 * libMesh::pi) - std::log(action_scale);
+
+  auto action = torch::tensor({{physical_action}}, at::kDouble);
+  const Real actual_log_probability = network.logProbability(action).item<Real>();
+
+  EXPECT_NEAR(actual_log_probability, expected_log_probability, 1e-12);
+
+  auto second_input = torch::tensor({{4.0}}, at::kDouble);
+  network.evaluate(second_input, false);
+  EXPECT_NEAR(
+      network.gaussianActionDistribution().stdTensor().item<Real>(), std::exp(log_std), 1e-12);
+}
+
+TEST(LibtorchActorNeuralNetTest, gaussianActorCanUseStateDependentStdWhenRequested)
+{
+  TestableLibtorchActorNeuralNet network("test_state_dependent_gaussian",
+                                         1,
+                                         1,
+                                         {},
+                                         {"linear"},
+                                         {},
+                                         {},
+                                         torch::kCPU,
+                                         torch::kDouble,
+                                         true,
+                                         {1.0},
+                                         {2.0},
+                                         {1.0},
+                                         false);
+
+  network.gaussianActionDistribution().meanModule()->weight.data().fill_(0.0);
+  network.gaussianActionDistribution().meanModule()->bias.data().fill_(0.0);
+  network.gaussianActionDistribution().stdModule()->weight.data().fill_(0.5);
+  network.gaussianActionDistribution().stdModule()->bias.data().fill_(0.0);
+
+  auto first_input = torch::tensor({{2.0}}, at::kDouble);
+  network.evaluate(first_input, false);
+  const Real first_std = network.gaussianActionDistribution().stdTensor().item<Real>();
+
+  auto second_input = torch::tensor({{4.0}}, at::kDouble);
+  network.evaluate(second_input, false);
+  const Real second_std = network.gaussianActionDistribution().stdTensor().item<Real>();
+
+  EXPECT_NEAR(first_std, std::exp(1.0), 1e-12);
+  EXPECT_NEAR(second_std, std::exp(3.0), 1e-12);
+  EXPECT_GT(second_std, first_std);
+}
+
+TEST(LibtorchActorNeuralNetTest, explicitGeneratorKeepsGaussianSamplingStableAcrossCopies)
+{
+  TestableLibtorchActorNeuralNet original("test_gaussian",
+                                          1,
+                                          1,
+                                          {},
+                                          {"linear"},
+                                          {},
+                                          {},
+                                          torch::kCPU,
+                                          torch::kDouble,
+                                          true,
+                                          {0.0},
+                                          {1.0},
+                                          {1.0});
+
+  original.gaussianActionDistribution().meanModule()->weight.data().fill_(0.75);
+  original.gaussianActionDistribution().meanModule()->bias.data().fill_(1.25);
+  original.gaussianActionDistribution().stdModule()->weight.data().fill_(0.0);
+  original.gaussianActionDistribution().stdModule()->bias.data().fill_(std::log(0.5));
+
+  TestableLibtorchActorNeuralNet copied(original);
+
+  auto original_input = torch::tensor({{2.0}}, at::kDouble);
+  auto copied_input = torch::tensor({{2.0}}, at::kDouble);
+  auto original_generator = Moose::makeLibtorchCPUGenerator(12345);
+  auto copied_generator = Moose::makeLibtorchCPUGenerator(12345);
+
+  const auto original_action = original.evaluate(original_input, true, original_generator);
+  const auto copied_action = copied.evaluate(copied_input, true, copied_generator);
+
+  EXPECT_TRUE(torch::allclose(original_action, copied_action, /* rtol = */ 0.0, /* atol = */ 0.0));
+}
+
+TEST(LibtorchActorNeuralNetTest, explicitGeneratorMakesInitializationIndependentOfConstructionOrder)
+{
+  TestableLibtorchActorNeuralNet first("first_actor",
+                                       2,
+                                       1,
+                                       {3},
+                                       {"relu"},
+                                       {},
+                                       {},
+                                       torch::kCPU,
+                                       torch::kDouble,
+                                       true,
+                                       {0.0, 0.0},
+                                       {1.0, 1.0},
+                                       {1.0});
+  TestableLibtorchActorNeuralNet second("second_actor",
+                                        2,
+                                        1,
+                                        {3},
+                                        {"relu"},
+                                        {},
+                                        {},
+                                        torch::kCPU,
+                                        torch::kDouble,
+                                        true,
+                                        {0.0, 0.0},
+                                        {1.0, 1.0},
+                                        {1.0});
+
+  first.initializeNeuralNetwork(Moose::makeLibtorchCPUGenerator(2468));
+  second.initializeNeuralNetwork(Moose::makeLibtorchCPUGenerator(2468));
+
+  const auto first_parameters = first.named_parameters();
+  const auto second_parameters = second.named_parameters();
+  ASSERT_EQ(first_parameters.size(), second_parameters.size());
+  for (const auto i : index_range(first_parameters))
+  {
+    EXPECT_EQ(first_parameters[i].key(), second_parameters[i].key());
+    EXPECT_TRUE(torch::allclose(first_parameters[i].value(),
+                                second_parameters[i].value(),
+                                /* rtol = */ 0.0,
+                                /* atol = */ 0.0));
+  }
+}
+
+TEST(LibtorchActorNeuralNetTest, loadActorStateAcceptsTorchSaveArchive)
+{
+  TestableLibtorchActorNeuralNet saved("saved_actor",
+                                       2,
+                                       1,
+                                       {2},
+                                       {"linear"},
+                                       {},
+                                       {},
+                                       torch::kCPU,
+                                       torch::kDouble,
+                                       true,
+                                       {1.0, -2.0},
+                                       {0.5, 3.0},
+                                       {4.0});
+
+  saved._weights[0]->weight.data() = torch::tensor({{1.0, 2.0}, {3.0, 4.0}}, at::kDouble);
+  saved._weights[0]->bias.data() = torch::tensor({5.0, 6.0}, at::kDouble);
+  saved.gaussianActionDistribution().meanModule()->weight.data() =
+      torch::tensor({{7.0, 8.0}}, at::kDouble);
+  saved.gaussianActionDistribution().meanModule()->bias.data() = torch::tensor({9.0}, at::kDouble);
+  saved.gaussianActionDistribution().stdModule()->weight.data() =
+      torch::tensor({{-1.5, 2.5}}, at::kDouble);
+  saved.gaussianActionDistribution().stdModule()->bias.data() = torch::tensor({-3.5}, at::kDouble);
+
+  Moose::UnitUtils::TempFile archive;
+  torch::save(std::make_shared<Moose::LibtorchActorNeuralNet>(saved), archive.path().string());
+
+  TestableLibtorchActorNeuralNet restored("restored_actor",
+                                          2,
+                                          1,
+                                          {2},
+                                          {"linear"},
+                                          {},
+                                          {},
+                                          torch::kCPU,
+                                          torch::kDouble,
+                                          true,
+                                          {1.0, -2.0},
+                                          {0.5, 3.0},
+                                          {4.0});
+
+  Moose::loadLibtorchActorNeuralNetState(restored, archive.path().string());
+
+  const auto saved_parameters = saved.named_parameters();
+  const auto restored_parameters = restored.named_parameters();
+  ASSERT_EQ(saved_parameters.size(), restored_parameters.size());
+  for (std::size_t i = 0; i < saved_parameters.size(); ++i)
+  {
+    EXPECT_EQ(saved_parameters[i].key(), restored_parameters[i].key());
+    EXPECT_TRUE(torch::allclose(saved_parameters[i].value(),
+                                restored_parameters[i].value(),
+                                /*rtol=*/0.0,
+                                /*atol=*/0.0));
+  }
+
+  const auto saved_buffers = saved.named_buffers();
+  const auto restored_buffers = restored.named_buffers();
+  ASSERT_EQ(saved_buffers.size(), restored_buffers.size());
+  for (std::size_t i = 0; i < saved_buffers.size(); ++i)
+  {
+    EXPECT_EQ(saved_buffers[i].key(), restored_buffers[i].key());
+    EXPECT_TRUE(torch::allclose(saved_buffers[i].value(),
+                                restored_buffers[i].value(),
+                                /*rtol=*/0.0,
+                                /*atol=*/0.0));
+  }
+
+  auto saved_input = torch::tensor({{3.0, -1.0}}, at::kDouble);
+  auto restored_input = saved_input.clone();
+  EXPECT_TRUE(torch::allclose(saved.evaluate(saved_input, false),
+                              restored.evaluate(restored_input, false),
+                              /*rtol=*/0.0,
+                              /*atol=*/0.0));
+}
+
+TEST(LibtorchActorNeuralNetTest, loadActorStateRejectsHiddenLayerMismatch)
+{
+  TestableLibtorchActorNeuralNet saved("saved_actor",
+                                       2,
+                                       1,
+                                       {2},
+                                       {"linear"},
+                                       {},
+                                       {},
+                                       torch::kCPU,
+                                       torch::kDouble,
+                                       true,
+                                       {1.0, -2.0},
+                                       {0.5, 3.0},
+                                       {4.0});
+
+  Moose::UnitUtils::TempFile archive;
+  torch::save(std::make_shared<Moose::LibtorchActorNeuralNet>(saved), archive.path().string());
+
+  TestableLibtorchActorNeuralNet restored("restored_actor",
+                                          2,
+                                          1,
+                                          {3},
+                                          {"linear"},
+                                          {},
+                                          {},
+                                          torch::kCPU,
+                                          torch::kDouble,
+                                          true,
+                                          {1.0, -2.0},
+                                          {0.5, 3.0},
+                                          {4.0});
+
+  EXPECT_ANY_THROW(Moose::loadLibtorchActorNeuralNetState(restored, archive.path().string()));
+}
+
+TEST(LibtorchActorNeuralNetTest, loadActorStateRejectsBoundednessMismatch)
+{
+  TestableLibtorchActorNeuralNet saved("saved_actor",
+                                       1,
+                                       1,
+                                       {2},
+                                       {"linear"},
+                                       {},
+                                       {},
+                                       torch::kCPU,
+                                       torch::kDouble,
+                                       true,
+                                       {1.0},
+                                       {2.0},
+                                       {1.0});
+
+  Moose::UnitUtils::TempFile archive;
+  torch::save(std::make_shared<Moose::LibtorchActorNeuralNet>(saved), archive.path().string());
+
+  TestableLibtorchActorNeuralNet restored("restored_actor",
+                                          1,
+                                          1,
+                                          {2},
+                                          {"linear"},
+                                          {-2.0},
+                                          {2.0},
+                                          torch::kCPU,
+                                          torch::kDouble,
+                                          true,
+                                          {1.0},
+                                          {2.0},
+                                          {1.0});
+
+  EXPECT_ANY_THROW(Moose::loadLibtorchActorNeuralNetState(restored, archive.path().string()));
+}
+
+#endif
diff --git a/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C b/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C
new file mode 100644
index 000000000000..a45a16455d33
--- /dev/null
+++ b/modules/stochastic_tools/unit/src/TestLibtorchRLCore.C
@@ -0,0 +1,202 @@
+//* This file is part of the MOOSE framework
+//* https://mooseframework.inl.gov
+//*
+//* All rights reserved, see COPYRIGHT for full restrictions
+//* https://github.com/idaholab/moose/blob/master/COPYRIGHT
+//*
+//* Licensed under LGPL 2.1, please see LICENSE for details
+//* https://www.gnu.org/licenses/lgpl-2.1.html
+
+#ifdef MOOSE_LIBTORCH_ENABLED
+
+#include "gtest/gtest.h"
+
+#include "LibtorchActorNeuralNet.h"
+#include "LibtorchArtificialNeuralNet.h"
+#include "LibtorchObservationHistoryHelper.h"
+#include "LibtorchRandomUtils.h"
+#include "LibtorchRLMiniBatchSampler.h"
+#include "LibtorchRLPPOLoss.h"
+#include "LibtorchRLTrajectoryBuffer.h"
+#include "LibtorchRLValueEstimator.h"
+
+#include <cmath>
+
+namespace
+{
+
+TEST(LibtorchRLCoreTest, ObservationHistoryStacksCurrentAndTrajectoryData)
+{
+  LibtorchObservationHistoryHelper history(3);
+
+  const auto expanded_observation_factors = history.expandObservationFactors({0.5, 2.0});
+  EXPECT_EQ(expanded_observation_factors, std::vector<Real>({0.5, 2.0, 0.5, 2.0, 0.5, 2.0}));
+
+  std::vector<std::vector<Real>> old_observations;
+  history.initializeHistory({1.0, 6.0}, old_observations);
+
+  const auto stacked_current = history.stackCurrentObservation({3.0, 2.0}, old_observations);
+  EXPECT_EQ(stacked_current, std::vector<Real>({3.0, 2.0, 1.0, 6.0, 1.0, 6.0}));
+
+  std::vector<std::vector<Real>> component_trajectories = {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}};
+  const auto stacked_trajectory = history.stackTrajectoryObservation(component_trajectories, 2);
+  EXPECT_EQ(stacked_trajectory, std::vector<Real>({3.0, 6.0, 2.0, 5.0, 1.0, 4.0}));
+}
+
+TEST(LibtorchRLCoreTest, ObservationHistoryRejectsMalformedStoredHistory)
+{
+  LibtorchObservationHistoryHelper history(3);
+
+  EXPECT_THROW(
+      {
+        try
+        {
+          history.stackCurrentObservation({3.0, 2.0}, {{1.0, 6.0}});
+        }
+        catch (const std::exception & e)
+        {
+          EXPECT_EQ(std::string(e.what()),
+                    "Observation history must contain 2 stored entries, but got 1.");
+          throw;
+        }
+      },
+      std::exception);
+}
+
+TEST(LibtorchRLCoreTest, ObservationHistoryRejectsZeroTimesteps)
+{
+  EXPECT_THROW(
+      {
+        try
+        {
+          LibtorchObservationHistoryHelper history(0);
+          static_cast<void>(history);
+        }
+        catch (const std::exception & e)
+        {
+          EXPECT_EQ(std::string(e.what()),
+                    "Observation history requires at least one input timestep.");
+          throw;
+        }
+      },
+      std::exception);
+}
+
+TEST(LibtorchRLCoreTest, ValueEstimatorComputesGAETargets)
+{
+  Moose::LibtorchArtificialNeuralNet value_network("value", 1, 1, {}, {"linear"});
+  auto value_params = value_network.named_parameters();
+  value_params[0].value().data().fill_(1.0);
+  value_params[1].value().data().fill_(0.0);
+
+  LibtorchRLTrajectoryBuffer::Trajectory trajectory;
+  trajectory.observations = {{1.0}, {2.0}};
+  trajectory.next_observations = {{2.0}, {3.0}};
+  trajectory.rewards = {0.5, 1.0};
+
+  LibtorchRLValueEstimator estimator(0.9, 0.95);
+  const auto targets = estimator.estimate(trajectory, value_network);
+
+  ASSERT_EQ(targets.advantages.size(), 2u);
+  ASSERT_EQ(targets.value_targets.size(), 2u);
+  EXPECT_NEAR(targets.advantages[0], 2.7535, 1e-12);
+  EXPECT_NEAR(targets.advantages[1], 1.7, 1e-12);
+  EXPECT_NEAR(targets.value_targets[0], 3.7535, 1e-12);
+  EXPECT_NEAR(targets.value_targets[1], 3.7, 1e-12);
+}
+
+TEST(LibtorchRLCoreTest, PPOLossUsesStoredLogProbabilityAndValueTarget)
+{
+  Moose::LibtorchActorNeuralNet policy_network("policy", 1, 1, {}, {"linear"});
+  policy_network.gaussianActionDistribution().meanModule()->weight.data().fill_(0.0);
+  policy_network.gaussianActionDistribution().meanModule()->bias.data().fill_(0.0);
+  policy_network.gaussianActionDistribution().stdModule()->weight.data().fill_(0.0);
+  policy_network.gaussianActionDistribution().stdModule()->bias.data().fill_(0.0);
+
+  Moose::LibtorchArtificialNeuralNet value_network("value", 1, 1, {}, {"linear"});
+  auto value_params = value_network.named_parameters();
+  value_params[0].value().data().fill_(0.0);
+  value_params[1].value().data().fill_(1.0);
+
+  LibtorchRLMiniBatch batch;
+  batch.observations = torch::zeros({1, 1}, torch::TensorOptions().dtype(torch::kDouble));
+  batch.actions = torch::zeros({1, 1}, torch::TensorOptions().dtype(torch::kDouble));
+  batch.old_log_probabilities =
+      torch::tensor({{-0.5 * std::log(2.0 * libMesh::pi)}},
+                    torch::TensorOptions().dtype(torch::kDouble));
+  batch.value_targets = torch::tensor({{1.5}}, torch::TensorOptions().dtype(torch::kDouble));
+  batch.advantages = torch::tensor({{2.0}}, torch::TensorOptions().dtype(torch::kDouble));
+
+  LibtorchRLPPOLoss loss(0.2, 0.01);
+  const auto loss_values = loss.compute(policy_network, value_network, batch);
+
+  const Real expected_entropy = 0.5 * std::log(2.0 * libMesh::pi) + 0.5;
+  const Real expected_actor_loss = -(2.0 + 0.01 * expected_entropy);
+  const Real expected_critic_loss = 0.25;
+
+  EXPECT_NEAR(loss_values.entropy.item<Real>(), expected_entropy, 1e-12);
+  EXPECT_NEAR(loss_values.actor_loss.item<Real>(), expected_actor_loss, 1e-12);
+  EXPECT_NEAR(loss_values.critic_loss.item<Real>(), expected_critic_loss, 1e-12);
+}
+
+TEST(LibtorchRLCoreTest, MiniBatchSamplerStandardizesAdvantagesPerBatch)
+{
+  LibtorchRLTrajectoryBuffer::TensorBatch batch;
+  batch.observations =
+      torch::tensor({{0.0}, {1.0}, {2.0}, {3.0}}, torch::TensorOptions().dtype(torch::kDouble));
+  batch.actions =
+      torch::tensor({{0.1}, {0.2}, {0.3}, {0.4}}, torch::TensorOptions().dtype(torch::kDouble));
+  batch.log_probabilities =
+      torch::tensor({{-1.0}, {-1.1}, {-1.2}, {-1.3}}, torch::TensorOptions().dtype(torch::kDouble));
+  batch.value_targets =
+      torch::tensor({{1.0}, {2.0}, {3.0}, {4.0}}, torch::TensorOptions().dtype(torch::kDouble));
+  batch.advantages =
+      torch::tensor({{1.0}, {2.0}, {3.0}, {4.0}}, torch::TensorOptions().dtype(torch::kDouble));
+
+  LibtorchRLMiniBatchSampler sampler;
+  const auto mini_batches = sampler.sample(batch, 2, true);
+
+  ASSERT_EQ(mini_batches.size(), 2u);
+  for (const auto & mini_batch : mini_batches)
+  {
+    ASSERT_EQ(mini_batch.size(), 2);
+    EXPECT_NEAR(mini_batch.advantages.mean().item<Real>(), 0.0, 1e-12);
+  }
+}
+
+TEST(LibtorchRLCoreTest, MiniBatchSamplerUsesExplicitGeneratorForDeterministicShuffling)
+{
+  LibtorchRLTrajectoryBuffer::TensorBatch batch;
+  batch.observations =
+      torch::tensor({{0.0}, {1.0}, {2.0}, {3.0}}, torch::TensorOptions().dtype(torch::kDouble));
+  batch.actions =
+      torch::tensor({{0.1}, {0.2}, {0.3}, {0.4}}, torch::TensorOptions().dtype(torch::kDouble));
+  batch.log_probabilities =
+      torch::tensor({{-1.0}, {-1.1}, {-1.2}, {-1.3}}, torch::TensorOptions().dtype(torch::kDouble));
+  batch.value_targets =
+      torch::tensor({{1.0}, {2.0}, {3.0}, {4.0}}, torch::TensorOptions().dtype(torch::kDouble));
+  batch.advantages =
+      torch::tensor({{1.0}, {2.0}, {3.0}, {4.0}}, torch::TensorOptions().dtype(torch::kDouble));
+
+  LibtorchRLMiniBatchSampler sampler;
+  const auto first_batches = sampler.sample(batch, 2, false, Moose::makeLibtorchCPUGenerator(9876));
+  const auto second_batches =
+      sampler.sample(batch, 2, false, Moose::makeLibtorchCPUGenerator(9876));
+
+  ASSERT_EQ(first_batches.size(), second_batches.size());
+  for (const auto i : index_range(first_batches))
+  {
+    EXPECT_TRUE(torch::allclose(first_batches[i].observations,
+                                second_batches[i].observations,
+                                /* rtol = */ 0.0,
+                                /* atol = */ 0.0));
+    EXPECT_TRUE(torch::allclose(first_batches[i].actions,
+                                second_batches[i].actions,
+                                /* rtol = */ 0.0,
+                                /* atol = */ 0.0));
+  }
+}
+
+} // namespace
+
+#endif
diff --git a/test/tests/controls/libtorch_nn_control/gold/torch_parameter_read.json b/test/tests/controls/libtorch_nn_control/gold/torch_parameter_read.json
index f04acdb645fe..5f2e6e84d21e 100644
--- a/test/tests/controls/libtorch_nn_control/gold/torch_parameter_read.json
+++ b/test/tests/controls/libtorch_nn_control/gold/torch_parameter_read.json
@@ -112,12 +112,23 @@
                         0.3051220898124913,
                         0.2652967572212219
                     ],
+                    "input_scaling_factors": [
+                        1.0,
+                        1.0
+                    ],
+                    "input_shift_factors": [
+                        0.0,
+                        0.0
+                    ],
                     "output_layer_.bias": [
                         0.4794089532666399
                     ],
                     "output_layer_.weight": [
                         0.47736589074351915,
                         0.11757504436858923
+                    ],
+                    "output_scaling_factors": [
+                        1.0
                     ]
                 }
             },
diff --git a/test/tests/controls/libtorch_nn_control/read_control.i b/test/tests/controls/libtorch_nn_control/read_control.i
index 5c8aff04a390..ac9a711a9ac6 100644
--- a/test/tests/controls/libtorch_nn_control/read_control.i
+++ b/test/tests/controls/libtorch_nn_control/read_control.i
@@ -97,7 +97,7 @@ cp = 1.0
   [src_control]
     type = LibtorchNeuralNetControl
     parameters = "Kernels/anti_source/value"
-    responses = 'T_max'
+    observations = 'T_max'
     execute_on = 'TIMESTEP_BEGIN'
   []
 []