diff --git a/docs/sphinx/user_guide/run.rst b/docs/sphinx/user_guide/run.rst
index 3c83650cd..e1ce415d2 100644
--- a/docs/sphinx/user_guide/run.rst
+++ b/docs/sphinx/user_guide/run.rst
@@ -51,29 +51,12 @@ kernel.
 Running the Suite
 ==================
 
-After compilation, the main executable will reside in the ``bin`` subdirectory 
-of the build space. The executable will be able to run all kernels and 
-variants that have been built depending on which CMake options were specified
-to configure the build.
+After compilation, the RAJA Performance Suite executable will reside in the
+``bin`` subdirectory of the build space. The executable will be able to run
+all kernels and variants that have been built depending on which CMake options
+were specified to configure the build.
 
-To run the Suite in its default mode, type the executable name with no 
-command-line arguments::
-
-  $ ./bin/raja-perf.exe
-
-This will run all kernels and variants that have been built in their default
-configurations. Information describing how the Suite will run along with
-some information about each kernel will appear on the screen. More information
-about kernel and execution details will also appear in a run report files 
-generated in the run directory after Suite execution completes. 
-
-.. note:: You can pass the ``--dryrun`` command-line option to the executable
-          to see a summary of how the Suite will execute, by showing default
-          run parameters, without actually running it. You can also pass 
-          other command-line options when doing a "dry run" and you will see
-          that the given options are represented in the screen output.
-
-The Suite can be run in a variety of ways determined by the command-line 
+The Suite can be run in many different ways chosen by the command-line 
 options passed to the executable. For example, you can run or exclude subsets 
 of kernels, variants, or groups. You can also pass options to set problem 
 sizes, number of times each kernel is run (sampled), and many other run 
@@ -95,6 +78,27 @@ or::
 .. note:: To see all available Suite execution options, pass the `--help` or 
           `-h` option to the executable.
 
+.. important:: We do not describe most of the Suite execution options in this
+               guide since the runtime help output is the main reference for
+               available options, defaults, and arguments they accept.
+
+To run the Suite in its default mode, type the executable name with no 
+command-line arguments::
+
+  $ ./bin/raja-perf.exe
+
+This will run all kernels and variants that have been built in their default
+configurations. Information describing how the Suite will run along with
+some information about each kernel will appear on the screen. More information
+about kernel and execution details will also appear in a run report files 
+generated in the run directory after Suite execution completes. 
+
+.. note:: You can pass the ``--dryrun`` command-line option to the executable
+          to see a summary of how the Suite will execute by showing run
+          parameters without actually running it. You can pass any other
+          command-line options when doing a "dry run" and you will see
+          that the given options are represented in the screen output.
+
 Lastly, the program will report specific errors if given incorrect input, such
 as an option that requires a value and no value is provided. It will also emit 
 a summary of command-line arguments it was given if the input contains 
@@ -116,7 +120,8 @@ will report the following in the screen output::
     See run parameters or option messages above.
 
 The output indicates that the kernel input is invalid because the string Foo
-is not the name of a kernel in the Suite, while DAXPY is the name of a kernel. 
+is not the name of a kernel in the Suite, while DAXPY is the name of a kernel
+in the Suite.
 
 .. note:: The Suite executable will attempt to provide helpful information
           if it is given incorrect input, such as command-line arguments that 
@@ -130,8 +135,18 @@ is not the name of a kernel in the Suite, while DAXPY is the name of a kernel.
 Running with MPI
 ==================
 
-Running the Suite with MPI is just like running any other MPI application.
-For example, issuing the following command on a machine with slurm scheduling::
+The Suite can be configured and compiled to run in a distributed memory
+parallel mode using MPI. Running the Suite on multiple MPI ranks will execute
+the same code for each kernel on each rank with minimal synchronization points
+to gather execution timing data from all ranks. This capability is provided so
+that individual kernel performance more closely aligns with how such kernels 
+would perform in a real application. For example, compute node memory bandwidth
+may be different when running on a many core system using OpenMP multithreading
+to exercise all cores than when each core is mapped to an MPI rank.
+
+Running the Suite on multiple MPI ranks is just like running any other MPI
+application. For example, issuing the following command on a machine with
+slurm scheduling::
 
   $ srun -n 2 ./bin/raja-perf.exe
 
diff --git a/src/common/Executor.cpp b/src/common/Executor.cpp
index 7805376bb..e90ebd149 100644
--- a/src/common/Executor.cpp
+++ b/src/common/Executor.cpp
@@ -19,7 +19,7 @@
 #include "CudaDataUtils.hpp"
 #include "HipDataUtils.hpp"
 
-// Warmup kernels to run first to help reduce startup overheads in timings
+// Warmup kernels for default warmup mode
 #include "basic/DAXPY.hpp"
 #include "basic/REDUCE3_INT.hpp"
 #include "basic/INDEXLIST_3LOOP.hpp"
@@ -754,7 +754,9 @@ void Executor::runKernel(KernelBase* kernel, bool print_kernel_name)
 
 void Executor::runWarmupKernels()
 {
-  if ( run_params.getDisableWarmup() ) {
+  RunParams::WarmupMode warmup_mode = run_params.getWarmupMode();
+
+  if ( warmup_mode == RunParams::WarmupMode::Disable ) {
     return;
   } 
 
@@ -763,16 +765,28 @@ void Executor::runWarmupKernels()
   //
   // Get warmup kernels to run from input
   //
-  std::set<KernelID> kernel_ids = run_params.getWarmupKernelIDsToRun();
+  std::set<KernelID> warmup_kernel_ids;
+
+  if ( warmup_mode == RunParams::WarmupMode::Explicit ) {
 
-  if ( kernel_ids.empty() ) {
+    warmup_kernel_ids = run_params.getSpecifiedWarmupKernelIDs();
+
+  } else if ( warmup_mode == RunParams::WarmupMode::PerfRunSame ) {
 
     //
-    // If no warmup kernels were given, choose a warmup kernel for each feature
+    // Warmup kernels will be same as kernels specified to run in the suite
     //
+    for (size_t ik = 0; ik < kernels.size(); ++ik) {
+      KernelBase* kernel = kernels[ik];
+      warmup_kernel_ids.insert( kernel->getKernelID() );
+    } // iterate over kernels to run
+
+  } else if ( warmup_mode == RunParams::WarmupMode::Default ) {
 
     //
-    // For kernels to be run, assemble a set of feature IDs
+    // No warmup kernel input given, choose a warmup kernel for each feature
+    //
+    // First, assemble a set of feature IDs
     //
     std::set<FeatureID> feature_ids;
     for (size_t ik = 0; ik < kernels.size(); ++ik) {
@@ -788,7 +802,7 @@ void Executor::runWarmupKernels()
     } // iterate over kernels
 
     //
-    // Map feature IDs to set of warmup kernel IDs
+    // Map feature IDs to rudimentary set of warmup kernel IDs
     //
     for ( auto fid = feature_ids.begin(); fid != feature_ids.end(); ++ fid ) {
 
@@ -797,29 +811,29 @@ void Executor::runWarmupKernels()
         case Forall:
         case Kernel:
         case Launch:
-          kernel_ids.insert(Basic_DAXPY); break;
+          warmup_kernel_ids.insert(Basic_DAXPY); break;
 
         case Sort:
-          kernel_ids.insert(Algorithm_SORT); break;
+          warmup_kernel_ids.insert(Algorithm_SORT); break;
 
         case Scan:
-          kernel_ids.insert(Basic_INDEXLIST_3LOOP); break;
+          warmup_kernel_ids.insert(Basic_INDEXLIST_3LOOP); break;
 
         case Workgroup:
-          kernel_ids.insert(Comm_HALO_PACKING_FUSED); break;
+          warmup_kernel_ids.insert(Comm_HALO_PACKING_FUSED); break;
 
         case Reduction:
-          kernel_ids.insert(Basic_REDUCE3_INT); break;
+          warmup_kernel_ids.insert(Basic_REDUCE3_INT); break;
 
         case Atomic:
-          kernel_ids.insert(Basic_PI_ATOMIC); break;
+          warmup_kernel_ids.insert(Basic_PI_ATOMIC); break;
 
         case View:
           break;
 
   #ifdef RAJA_PERFSUITE_ENABLE_MPI
         case MPI:
-          kernel_ids.insert(Comm_HALO_EXCHANGE_FUSED); break;
+          warmup_kernel_ids.insert(Comm_HALO_EXCHANGE_FUSED); break;
   #endif
 
         default:
@@ -835,7 +849,15 @@ void Executor::runWarmupKernels()
   //
   // Run warmup kernels
   //
-  for ( auto kid = kernel_ids.begin(); kid != kernel_ids.end(); ++ kid ) {
+  bool prev_state = KernelBase::setWarmupRun(true);
+
+  for ( auto kid = warmup_kernel_ids.begin();
+             kid != warmup_kernel_ids.end(); ++ kid ) {
+    //  
+    // Note that we create a new kernel object for each kernel to run
+    // in warmup so we don't pollute timing data, checksum data, etc.
+    // for kernels that will run for real later...
+    //
     KernelBase* kernel = getKernelObject(*kid, run_params);
 #if defined(RAJA_PERFSUITE_USE_CALIPER)
     kernel->caliperOff();
@@ -847,6 +869,8 @@ void Executor::runWarmupKernels()
     delete kernel;
   }
 
+  KernelBase::setWarmupRun(prev_state);
+
 }
 
 void Executor::outputRunData()
@@ -933,10 +957,12 @@ void Executor::writeCSVReport(ostream& file, CSVRepMode mode,
     //
     // Set basic table formatting parameters.
     //
-    const string kernel_col_name("Kernel  ");
+    const string kernel_name_col_header_variant("Variant  ");
+    const string kernel_name_col_header_tuning("Tuning  ");
     const string sepchr(" , ");
 
-    size_t kercol_width = kernel_col_name.size();
+    size_t kercol_width = max(kernel_name_col_header_variant.size(),
+                              kernel_name_col_header_tuning.size());
     for (size_t ik = 0; ik < kernels.size(); ++ik) {
       kercol_width = max(kercol_width, kernels[ik]->getName().size());
     }
@@ -969,7 +995,7 @@ void Executor::writeCSVReport(ostream& file, CSVRepMode mode,
     //
     // Print column variant name line.
     //
-    file <<left<< setw(kercol_width) << kernel_col_name;
+    file <<left<< setw(kercol_width) << kernel_name_col_header_variant;
     for (size_t iv = 0; iv < variant_ids.size(); ++iv) {
       for (size_t it = 0; it < tuning_names[variant_ids[iv]].size(); ++it) {
         file << sepchr <<left<< setw(vartuncol_width[iv][it])
@@ -981,7 +1007,7 @@ void Executor::writeCSVReport(ostream& file, CSVRepMode mode,
     //
     // Print column tuning name line.
     //
-    file <<left<< setw(kercol_width) << kernel_col_name;
+    file <<left<< setw(kercol_width) << kernel_name_col_header_tuning;
     for (size_t iv = 0; iv < variant_ids.size(); ++iv) {
       for (size_t it = 0; it < tuning_names[variant_ids[iv]].size(); ++it) {
         file << sepchr <<left<< setw(vartuncol_width[iv][it])
diff --git a/src/common/KernelBase.cpp b/src/common/KernelBase.cpp
index 2e47d4ccb..9cefeaa66 100644
--- a/src/common/KernelBase.cpp
+++ b/src/common/KernelBase.cpp
@@ -19,6 +19,16 @@
 
 namespace rajaperf {
 
+//
+// Static method to set whether kernels are used for warmup purposes or not
+//
+bool KernelBase::setWarmupRun(bool warmup_run)
+{
+  bool previous_state = s_warmup_run;
+  s_warmup_run = warmup_run;
+  return previous_state;
+}
+
 KernelBase::KernelBase(KernelID kid, const RunParams& params)
   : run_params(params)
 #if defined(RAJA_ENABLE_TARGET_OPENMP)
@@ -133,7 +143,9 @@ Index_type KernelBase::getTargetProblemSize() const
 Index_type KernelBase::getRunReps() const
 {
   Index_type run_reps = static_cast<Index_type>(0);
-  if (run_params.getInputState() == RunParams::CheckRun) {
+  if (s_warmup_run) {
+    run_reps = static_cast<Index_type>(1);
+  } else if (run_params.getInputState() == RunParams::CheckRun) {
     run_reps = static_cast<Index_type>(run_params.getCheckRunReps());
   } else {
     run_reps = static_cast<Index_type>(default_reps*run_params.getRepFactor());
diff --git a/src/common/KernelBase.hpp b/src/common/KernelBase.hpp
index 1e8aad93a..1fa6f695b 100644
--- a/src/common/KernelBase.hpp
+++ b/src/common/KernelBase.hpp
@@ -90,6 +90,15 @@ class KernelBase
     { return std::numeric_limits<size_t>::max(); }
   static std::string getDefaultTuningName() { return "default"; }
 
+  //
+  // Method to set state of all Kernel objects to indicate kernel runs 
+  // are for warmup purposes if true is passed, else false.
+  //
+  // The warmup state before the method call is returned to facilitate 
+  // reset mechanics. 
+  //
+  static bool setWarmupRun(bool warmup_run);
+
   KernelBase(KernelID kid, const RunParams& params);
 
   virtual ~KernelBase();
@@ -631,7 +640,13 @@ class KernelBase
                         variant_tuning_method_pointer method);
 
   //
-  // Static properties of kernel, independent of run
+  // Boolean member shared by all kernel objects indicating whether they
+  // will be run for warmup purposes (true) or not (false).
+  //
+  static inline bool s_warmup_run = false;
+
+  //
+  // Persistent properties of kernel, independent of run
   //
   KernelID    kernel_id;
   std::string name;
diff --git a/src/common/RunParams.cpp b/src/common/RunParams.cpp
index 0f585d6cd..c62642949 100644
--- a/src/common/RunParams.cpp
+++ b/src/common/RunParams.cpp
@@ -58,6 +58,7 @@ RunParams::RunParams(int argc, char** argv)
    checkrun_reps(1),
    reference_variant(),
    reference_vid(NumVariants),
+   warmup_mode(WarmupMode::Default),
    warmup_kernel_input(),
    invalid_warmup_kernel_input(),
    kernel_input(),
@@ -83,7 +84,6 @@ RunParams::RunParams(int argc, char** argv)
 #if defined(RAJA_PERFSUITE_USE_CALIPER)
    add_to_spot_config(),
 #endif
-   disable_warmup(false),
    run_kernels(),
    run_variants()
 {
@@ -176,8 +176,6 @@ void RunParams::print(std::ostream& str) const
   }
 #endif
 
-  str << "\n disable_warmup = " << disable_warmup;
-
   str << "\n seq data space = " << getDataSpaceName(seqDataSpace);
   str << "\n omp data space = " << getDataSpaceName(ompDataSpace);
   str << "\n omp target data space = " << getDataSpaceName(ompTargetDataSpace);
@@ -200,6 +198,8 @@ void RunParams::print(std::ostream& str) const
   str << "\n hip MPI data space = " << getDataSpaceName(hipMPIDataSpace);
   str << "\n kokkos MPI data space = " << getDataSpaceName(kokkosMPIDataSpace);
 
+  str << "\n warmup_mode = " << WarmupModeToStr(warmup_mode);
+
   str << "\n warmup_kernel_input = ";
   for (size_t j = 0; j < warmup_kernel_input.size(); ++j) {
     str << "\n\t" << warmup_kernel_input[j];
@@ -863,6 +863,8 @@ void RunParams::parseCommandLineOptions(int argc, char** argv)
         }
       }
 
+      warmup_mode = WarmupMode::Explicit;
+
     } else if ( opt == std::string("--kernels") ||
                 opt == std::string("-k") ) {
 
@@ -1158,9 +1160,13 @@ void RunParams::parseCommandLineOptions(int argc, char** argv)
         input_state = DryRun;
       }
 
-    } else if ( std::string(argv[i]) == std::string("--disable-warmup") ) {
+    } else if ( std::string(argv[i]) == std::string("--warmup-disable") ) {
+
+      warmup_mode = WarmupMode::Disable;
 
-      disable_warmup = true;
+    } else if ( std::string(argv[i]) == std::string("--warmup-perfrun-same") ) {
+
+      warmup_mode = WarmupMode::PerfRunSame;
 
     } else if ( std::string(argv[i]) == std::string("--checkrun") ) {
 
@@ -1374,11 +1380,16 @@ void RunParams::printHelpMessage(std::ostream& str) const
       << "\t\t -of dat (output data will be in files 'dat*')\n\n";
 
   str << "\t Options for selecting kernels to run....\n"
-      << "\t ========================================\n\n";;
+      << "\t ========================================\n\n";
+
+  str << "\t For warmup kernels, the default (no option specified) will run a minimal set of warmup kernels based on\n"
+      << "\t RAJA features exercised in kernels specified for perf run. Other options are:\n\n";
+
+  str << "\t --warmup-disable (do not run any warmup kernels)\n\n";
 
-  str << "\t --disable-warmup (disable warmup kernels) [Default is run warmup kernels that are relevant to kernels selected to run]\n\n";
+  str << "\t --warmup-perfrun-same (run same set of kernels for warmup as specified for perf run)\n\n";
 
-  str << "\t --warmup-kernels, -wk <space-separated strings> [Default is run warmup kernels that are relevant to kernels selected to run]\n"
+  str << "\t --warmup-kernels, -wk <space-separated strings> [if no kernel names specified, none will be run for warmup]\n"
       << "\t      (names of individual kernels and/or groups of kernels to warmup)\n"
       << "\t      See '--print-kernels'/'-pk' option for list of valid kernel and group names.\n"
       << "\t      Kernel names are listed as <group name>_<kernel name>.\n";
@@ -2136,7 +2147,7 @@ void RunParams::processKernelInput()
   //
   // ================================================================
 
-  run_warmup_kernels.clear();
+  specified_warmup_kernel_ids.clear();
 
   if ( !warmup_kernel_input.empty() ) {
 
@@ -2174,7 +2185,7 @@ void RunParams::processKernelInput()
         KernelID tkid = static_cast<KernelID>(kid);
         if ( getFullKernelName(tkid).find(gname) != std::string::npos &&
              exclude_kernels.find(tkid) == exclude_kernels.end()) {
-          run_warmup_kernels.insert(tkid);
+          specified_warmup_kernel_ids.insert(tkid);
         }
       }
 
@@ -2192,7 +2203,7 @@ void RunParams::processKernelInput()
         KernelID tkid = static_cast<KernelID>(kid);
         if ( getKernelName(tkid) == *it || getFullKernelName(tkid) == *it ) {
           if (exclude_kernels.find(tkid) == exclude_kernels.end()) {
-            run_warmup_kernels.insert(tkid);
+            specified_warmup_kernel_ids.insert(tkid);
           }
           found_it = true;
         }
diff --git a/src/common/RunParams.hpp b/src/common/RunParams.hpp
index 8603c2da1..d8bcfe523 100644
--- a/src/common/RunParams.hpp
+++ b/src/common/RunParams.hpp
@@ -132,6 +132,35 @@ class RunParams {
     }
   }
 
+  /*!
+   * \brief Enumeration indicating how to run warmup kernels
+   */
+  enum WarmupMode {
+    Disable,       /*!< no warmup kernels will be run */
+    Default,       /*!< run minimal set of warmup kernels based kernels to run */
+    PerfRunSame,   /*!< run warmup pass of each kernel to run */
+    Explicit,      /*!< run warmup pass of each kernel explicitly named for warmup in input */
+  };
+
+  /*!
+   * \brief Translate SizeMeaning enum value to string
+   */
+  static std::string WarmupModeToStr(WarmupMode wm)
+  {
+    switch (wm) {
+      case WarmupMode::Disable:
+        return "Disable";
+      case WarmupMode::Default:
+        return "Default";
+      case WarmupMode::PerfRunSame:
+        return "PerfRunSame";
+      case WarmupMode::Explicit:
+        return "Explicit";
+      default:
+        return "Unknown";
+    }
+  }
+
   /*!
    * \brief Return state of input parsed to this point.
    */
@@ -252,9 +281,10 @@ class RunParams {
   const std::string& getAddToCaliperConfig() const { return add_to_cali_config; }
 #endif
 
-  bool getDisableWarmup() const { return disable_warmup; }
+  WarmupMode getWarmupMode() const { return warmup_mode; }
 
-  const std::set<KernelID>& getWarmupKernelIDsToRun() const { return run_warmup_kernels; }
+  const std::set<KernelID>& getSpecifiedWarmupKernelIDs() const
+    { return specified_warmup_kernel_ids; }
   const std::set<KernelID>& getKernelIDsToRun() const { return run_kernels; }
   const std::set<VariantID>& getVariantIDsToRun() const { return run_variants; }
   VariantID getReferenceVariantID() const { return reference_vid; }
@@ -367,6 +397,8 @@ class RunParams {
   DataSpace syclMPIDataSpace = DataSpace::SyclPinned;
   DataSpace kokkosMPIDataSpace = DataSpace::Copy;
 
+  WarmupMode warmup_mode;
+
   //
   // Arrays to hold input strings for valid/invalid input. Helpful for
   // debugging command line args.
@@ -401,9 +433,7 @@ class RunParams {
   std::string add_to_cali_config;
 #endif
 
-  bool disable_warmup;
-
-  std::set<KernelID>  run_warmup_kernels;
+  std::set<KernelID>  specified_warmup_kernel_ids;
   std::set<KernelID>  run_kernels;
   std::set<VariantID> run_variants;