diff --git a/CMakeLists.txt b/CMakeLists.txt
index f34947517..cda39d3ed 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -30,6 +30,10 @@ endif()
 if (ENABLE_KOKKOS)
   set(CMAKE_CXX_STANDARD 17)
   set(BLT_CXX_STD c++17)
+elseif (ENABLE_STDPAR)
+  set(CMAKE_CXX_STANDARD 20)
+  set(BLT_CXX_STD c++14)
+  add_definitions(-DBUILD_STDPAR)
 else()
   set(CMAKE_CXX_STANDARD 14)
   set(BLT_CXX_STD c++14)
@@ -94,6 +98,9 @@ endif ()
 if (ENABLE_OPENMP)
   add_definitions(-DRUN_OPENMP)
 endif ()
+if (ENABLE_STDPAR)
+  add_definitions(-DRUN_STDPAR)
+endif ()
 
 set(RAJA_PERFSUITE_VERSION_MAJOR 2022)
 set(RAJA_PERFSUITE_VERSION_MINOR 10)
diff --git a/README.stdpar b/README.stdpar
new file mode 100644
index 000000000..9b1f6ef99
--- /dev/null
+++ b/README.stdpar
@@ -0,0 +1,106 @@
+# GCC
+
+```
+cmake .. -DCMAKE_C_COMPILER=gcc-11  -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-volatile -Wno-unused-parameter" -DENABLE_STDPAR=1 && make -j`nproc`
+```
+
+# NVC++
+
+## Patches
+
+```
+$ diff /opt/nvidia/hpc_sdk/Linux_$(uname -m)/${V}/compilers/include/nvhpc/algorithm_execution.hpp
+1066c1066
+<     _ASSERT_RANDOM_ACCESS(_FIt);
+---
+>     //_ASSERT_RANDOM_ACCESS(_FIt);
+```
+
+```
+$ diff /opt/nvidia/hpc_sdk/Linux_$(uname -m)/${V}/compilers/include/nvhpc/numeric_execution.hpp
+386c386
+<     _ASSERT_RANDOM_ACCESS(_FIt);
+---
+>     //_ASSERT_RANDOM_ACCESS(_FIt);
+```
+
+## OpenMP/OpenACC for atomics
+
+```
+cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 --diag_suppress=volatile_inc_dec_deprecated -stdpar=multicore -acc=multicore -mp=multicore -tp=haswell" -DENABLE_STDPAR=1 && make -j8
+```
+
+```
+cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 --diag_suppress=volatile_inc_dec_deprecated -stdpar=gpu -tp=haswell -acc" -DENABLE_STDPAR=1 && make -j8
+```
+
+## CPU
+
+Just disable the lambda one I guess...
+
+-------------------------------------------------------
+Basic_MAT_MAT_SHARED
+........................................................
+Base_StdPar-default        1136.6199452543779141       0.0000000000000000000
+Lambda_StdPar-default      -nan                        -nan
+
+Probably just not atomic...
+
+-------------------------------------------------------
+Basic_PI_ATOMIC
+........................................................
+Base_StdPar-default        0.55899274342205662602      2.5825999101679185666
+Lambda_StdPar-default      3.1415926535899751926       0.0000000000000000000
+
+Check these to make sure no stupid float<->double stuff happening.
+
+-------------------------------------------------------
+Polybench_GEMVER
+........................................................
+Base_Seq-default           16695345.016927006001       0.0000000000000000000
+Lambda_Seq-default         16695345.016927005882       1.1914380593225359917e-10
+RAJA_Seq-default           16695345.016927006608       -6.0663296608254313469e-10
+Base_StdPar-default        16695345.016927005745       2.5647750589996576309e-10
+Lambda_StdPar-default      16695345.016927006608       -6.0663296608254313469e-10
+
+-------------------------------------------------------
+Polybench_MVT
+........................................................
+Base_Seq-default           6821556.1519041797419       0.0000000000000000000
+Lambda_Seq-default         6821556.1519041797419       0.0000000000000000000
+RAJA_Seq-default           6821556.1519041792999       4.4201442506164312363e-10
+Base_StdPar-default        6821556.1519041792999       4.4201442506164312363e-10
+Lambda_StdPar-default      6821556.1519041792999       4.4201442506164312363e-10
+
+-------------------------------------------------------
+Stream_DOT
+........................................................
+Base_Seq-default           39999973.379841431975       0.0000000000000000000
+Lambda_Seq-default         39999973.379841439426       -7.4505805969238281250e-09
+RAJA_Seq-default           39999973.379841662943       -2.3096799850463867188e-07
+Base_StdPar-default        39999973.379841439426       -7.4505805969238281250e-09
+Lambda_StdPar-default      39999973.379841439426       -7.4505805969238281250e-09
+
+-------------------------------------------------------
+Algorithm_REDUCE_SUM
+........................................................
+RAJA_Seq-default           268294.10758353886195       1.5483237802982330322e-08
+
+## GPU
+
+Lambda_Seq has the bug too so just disable the Lambda versions...
+
+-------------------------------------------------------
+Basic_MAT_MAT_SHARED
+........................................................
+Base_Seq-default           1136.6199452543779141       0.0000000000000000000
+Lambda_Seq-default         -6.0464819976872759102e+32  6.0464819976872759102e+32
+RAJA_Seq-default           1136.6199452543779141       0.0000000000000000000
+Base_StdPar-default        1136.6199452543779141       0.0000000000000000000
+Lambda_StdPar-default      -6.0464819976872759102e+32  6.0464819976872759102e+32
+
+# Intel
+
+```
+cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DCMAKE_CXX_FLAGS="-std=c++20  -Wno-unused-parameter -Wno-deprecated-volatile -tbb" -DENABLE_STDPAR=1 && make -j8
+```
diff --git a/src/algorithm/CMakeLists.txt b/src/algorithm/CMakeLists.txt
index 54334242e..232a1635d 100644
--- a/src/algorithm/CMakeLists.txt
+++ b/src/algorithm/CMakeLists.txt
@@ -10,34 +10,40 @@ blt_add_library(
   NAME algorithm
   SOURCES SCAN.cpp
           SCAN-Seq.cpp
+          SCAN-StdPar.cpp
           SCAN-Hip.cpp
           SCAN-Cuda.cpp
           SCAN-OMP.cpp
           SCAN-OMPTarget.cpp
           SORT.cpp
           SORT-Seq.cpp
+          SORT-StdPar.cpp
           SORT-Hip.cpp
           SORT-Cuda.cpp
           SORT-OMP.cpp
           SORTPAIRS.cpp
           SORTPAIRS-Seq.cpp
+          SORTPAIRS-StdPar.cpp
           SORTPAIRS-Hip.cpp
           SORTPAIRS-Cuda.cpp
           SORTPAIRS-OMP.cpp
           REDUCE_SUM.cpp
           REDUCE_SUM-Seq.cpp
+          REDUCE_SUM-StdPar.cpp
           REDUCE_SUM-Hip.cpp
           REDUCE_SUM-Cuda.cpp
           REDUCE_SUM-OMP.cpp
           REDUCE_SUM-OMPTarget.cpp
           MEMSET.cpp
           MEMSET-Seq.cpp
+          MEMSET-StdPar.cpp
           MEMSET-Hip.cpp
           MEMSET-Cuda.cpp
           MEMSET-OMP.cpp
           MEMSET-OMPTarget.cpp
           MEMCPY.cpp
           MEMCPY-Seq.cpp
+          MEMCPY-StdPar.cpp
           MEMCPY-Hip.cpp
           MEMCPY-Cuda.cpp
           MEMCPY-OMP.cpp
diff --git a/src/algorithm/MEMCPY-StdPar.cpp b/src/algorithm/MEMCPY-StdPar.cpp
new file mode 100644
index 000000000..4d36f161b
--- /dev/null
+++ b/src/algorithm/MEMCPY-StdPar.cpp
@@ -0,0 +1,154 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/LICENSE file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "MEMCPY.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf
+{
+namespace algorithm
+{
+
+
+void MEMCPY::runStdParVariantLibrary(VariantID vid)
+{
+#if defined(RUN_STDPAR)
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  MEMCPY_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::copy_n(std::execution::par_unseq,
+                    x+ibegin, iend-ibegin, y+ibegin);
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  MEMCPY : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+#endif
+}
+
+void MEMCPY::runStdParVariantDefault(VariantID vid)
+{
+#if defined(RUN_STDPAR)
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  MEMCPY_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          MEMCPY_BODY;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto memcpy_lambda = [=](Index_type i) {
+                             MEMCPY_BODY;
+                           };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          memcpy_lambda(i);
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  MEMCPY : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+void MEMCPY::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+  size_t t = 0;
+
+  if (vid == Base_StdPar) {
+
+    if (tune_idx == t) {
+
+      runStdParVariantLibrary(vid);
+
+    }
+
+    t += 1;
+
+  }
+
+  if (tune_idx == t) {
+
+    runStdParVariantDefault(vid);
+
+  }
+
+  t += 1;
+}
+
+void MEMCPY::setStdParTuningDefinitions(VariantID vid)
+{
+  if (vid == Base_StdPar) {
+    addVariantTuningName(vid, "library");
+  }
+
+  addVariantTuningName(vid, "default");
+}
+
+} // end namespace algorithm
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/algorithm/MEMCPY.cpp b/src/algorithm/MEMCPY.cpp
index 583a19dea..9e4017054 100644
--- a/src/algorithm/MEMCPY.cpp
+++ b/src/algorithm/MEMCPY.cpp
@@ -51,6 +51,9 @@ MEMCPY::MEMCPY(const RunParams& params)
   setVariantDefined( Base_HIP );
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 MEMCPY::~MEMCPY()
diff --git a/src/algorithm/MEMCPY.hpp b/src/algorithm/MEMCPY.hpp
index 9fa46ae9e..b35f4faaa 100644
--- a/src/algorithm/MEMCPY.hpp
+++ b/src/algorithm/MEMCPY.hpp
@@ -54,12 +54,16 @@ class MEMCPY : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setSeqTuningDefinitions(VariantID vid);
+  void setStdParTuningDefinitions(VariantID vid);
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
   void runSeqVariantDefault(VariantID vid);
   void runSeqVariantLibrary(VariantID vid);
+  void runStdParVariantDefault(VariantID vid);
+  void runStdParVariantLibrary(VariantID vid);
 
   template < size_t block_size >
   void runCudaVariantBlock(VariantID vid);
diff --git a/src/algorithm/MEMSET-StdPar.cpp b/src/algorithm/MEMSET-StdPar.cpp
new file mode 100644
index 000000000..e6903ec3b
--- /dev/null
+++ b/src/algorithm/MEMSET-StdPar.cpp
@@ -0,0 +1,154 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/LICENSE file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "MEMSET.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf
+{
+namespace algorithm
+{
+
+
+void MEMSET::runStdParVariantLibrary(VariantID vid)
+{
+#if defined(RUN_STDPAR)
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  MEMSET_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::fill_n(std::execution::par_unseq,
+                    x+ibegin, iend-ibegin, val);
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  MEMSET : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+#endif
+}
+
+void MEMSET::runStdParVariantDefault(VariantID vid)
+{
+#if defined(RUN_STDPAR)
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  MEMSET_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          MEMSET_BODY;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto memset_lambda = [=](Index_type i) {
+                             MEMSET_BODY;
+                           };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          memset_lambda(i);
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  MEMSET : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+void MEMSET::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+  size_t t = 0;
+
+  if (vid == Base_StdPar) {
+
+    if (tune_idx == t) {
+
+      runStdParVariantLibrary(vid);
+
+    }
+
+    t += 1;
+
+  }
+
+  if (tune_idx == t) {
+
+    runStdParVariantDefault(vid);
+
+  }
+
+  t += 1;
+}
+
+void MEMSET::setStdParTuningDefinitions(VariantID vid)
+{
+  if (vid == Base_StdPar) {
+    addVariantTuningName(vid, "library");
+  }
+
+  addVariantTuningName(vid, "default");
+}
+
+} // end namespace algorithm
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/algorithm/MEMSET.cpp b/src/algorithm/MEMSET.cpp
index fdc98b3fe..332fa0100 100644
--- a/src/algorithm/MEMSET.cpp
+++ b/src/algorithm/MEMSET.cpp
@@ -52,6 +52,9 @@ MEMSET::MEMSET(const RunParams& params)
   setVariantDefined( Base_HIP );
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 MEMSET::~MEMSET()
diff --git a/src/algorithm/MEMSET.hpp b/src/algorithm/MEMSET.hpp
index ebf2f867b..01687b35c 100644
--- a/src/algorithm/MEMSET.hpp
+++ b/src/algorithm/MEMSET.hpp
@@ -54,12 +54,16 @@ class MEMSET : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setSeqTuningDefinitions(VariantID vid);
+  void setStdParTuningDefinitions(VariantID vid);
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
   void runSeqVariantDefault(VariantID vid);
   void runSeqVariantLibrary(VariantID vid);
+  void runStdParVariantDefault(VariantID vid);
+  void runStdParVariantLibrary(VariantID vid);
 
   template < size_t block_size >
   void runCudaVariantBlock(VariantID vid);
diff --git a/src/algorithm/REDUCE_SUM-StdPar.cpp b/src/algorithm/REDUCE_SUM-StdPar.cpp
new file mode 100644
index 000000000..c35a6657a
--- /dev/null
+++ b/src/algorithm/REDUCE_SUM-StdPar.cpp
@@ -0,0 +1,94 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/LICENSE file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "REDUCE_SUM.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf
+{
+namespace algorithm
+{
+
+
+void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
+{
+#if defined(RUN_STDPAR)
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  auto begin = counting_iterator<Index_type>(ibegin);
+  auto end   = counting_iterator<Index_type>(iend);
+
+  REDUCE_SUM_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        Real_type sum = m_sum_init;
+
+        sum += std::reduce( std::execution::par_unseq,
+                            x+ibegin, x+iend,
+                            Real_type(0), std::plus<Real_type>() );
+
+        m_sum = sum;
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto reduce_sum_base_lam = [=](Index_type i) {
+                                 return x[i];
+                               };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        Real_type sum = m_sum_init;
+
+        sum += std::transform_reduce( std::execution::par_unseq,
+                                      begin, end,
+                                      Real_type(0), std::plus<Real_type>(), reduce_sum_base_lam);
+
+        m_sum = sum;
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  REDUCE_SUM : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace algorithm
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/algorithm/REDUCE_SUM.cpp b/src/algorithm/REDUCE_SUM.cpp
index f2f2b25d2..f72fd5005 100644
--- a/src/algorithm/REDUCE_SUM.cpp
+++ b/src/algorithm/REDUCE_SUM.cpp
@@ -51,6 +51,9 @@ REDUCE_SUM::REDUCE_SUM(const RunParams& params)
 
   setVariantDefined( Base_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  //setVariantDefined( Lambda_StdPar ); // exists but is not interesting
 }
 
 REDUCE_SUM::~REDUCE_SUM()
diff --git a/src/algorithm/REDUCE_SUM.hpp b/src/algorithm/REDUCE_SUM.hpp
index ba9e9308b..247c3efa6 100644
--- a/src/algorithm/REDUCE_SUM.hpp
+++ b/src/algorithm/REDUCE_SUM.hpp
@@ -58,6 +58,7 @@ class REDUCE_SUM : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/algorithm/SCAN-StdPar.cpp b/src/algorithm/SCAN-StdPar.cpp
new file mode 100644
index 000000000..510f6e181
--- /dev/null
+++ b/src/algorithm/SCAN-StdPar.cpp
@@ -0,0 +1,69 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "SCAN.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf
+{
+namespace algorithm
+{
+
+
+void SCAN::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
+{
+#if defined(RUN_STDPAR)
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  SCAN_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::exclusive_scan(
+#ifdef NVCXX_GPU_ENABLED
+// GPU implementation is wrong
+                             std::execution::seq,
+#else
+                             std::execution::par_unseq,
+#endif
+                             x+ibegin, x+iend, y, (Real_type)0 );
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  SCAN : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace algorithm
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/algorithm/SCAN.cpp b/src/algorithm/SCAN.cpp
index 7b2933084..e5fcc9a62 100644
--- a/src/algorithm/SCAN.cpp
+++ b/src/algorithm/SCAN.cpp
@@ -55,6 +55,8 @@ SCAN::SCAN(const RunParams& params)
 
   setVariantDefined( Base_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
 }
 
 SCAN::~SCAN()
diff --git a/src/algorithm/SCAN.hpp b/src/algorithm/SCAN.hpp
index 519789a55..51cc13325 100644
--- a/src/algorithm/SCAN.hpp
+++ b/src/algorithm/SCAN.hpp
@@ -61,6 +61,7 @@ class SCAN : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
 private:
   static const size_t default_gpu_block_size = 0;
diff --git a/src/algorithm/SORT-StdPar.cpp b/src/algorithm/SORT-StdPar.cpp
new file mode 100644
index 000000000..2f45b62ab
--- /dev/null
+++ b/src/algorithm/SORT-StdPar.cpp
@@ -0,0 +1,63 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "SORT.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf
+{
+namespace algorithm
+{
+
+
+void SORT::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  SORT_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::sort( std::execution::par_unseq,
+                   STD_SORT_ARGS);
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  SORT : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace algorithm
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/algorithm/SORT.cpp b/src/algorithm/SORT.cpp
index 049c03304..44828f3ad 100644
--- a/src/algorithm/SORT.cpp
+++ b/src/algorithm/SORT.cpp
@@ -41,6 +41,8 @@ SORT::SORT(const RunParams& params)
   setVariantDefined( RAJA_CUDA );
 
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
 }
 
 SORT::~SORT()
diff --git a/src/algorithm/SORT.hpp b/src/algorithm/SORT.hpp
index b51bf12f9..6ca3d877a 100644
--- a/src/algorithm/SORT.hpp
+++ b/src/algorithm/SORT.hpp
@@ -54,6 +54,7 @@ class SORT : public KernelBase
   {
     getCout() << "\n  SORT : Unknown OMP Target variant id = " << vid << std::endl;
   }
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
 private:
   static const size_t default_gpu_block_size = 0;
diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp
new file mode 100644
index 000000000..0a75f028a
--- /dev/null
+++ b/src/algorithm/SORTPAIRS-StdPar.cpp
@@ -0,0 +1,101 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/LICENSE file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "SORTPAIRS.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <vector>
+#include <utility>
+#include <iostream>
+
+namespace rajaperf
+{
+namespace algorithm
+{
+
+
+void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  SORTPAIRS_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        using pair_type = std::pair<Real_type, Real_type>;
+
+        std::vector<pair_type> vector_of_pairs;
+
+#if 0
+        vector_of_pairs.reserve(iend-ibegin);
+
+        std::for_each_n( //std::execution::par, // parallelism leads to incorrectness
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=,&vector_of_pairs](Index_type iemp) noexcept {
+          vector_of_pairs.emplace_back(x[iend*irep + iemp], i[iend*irep + iemp]);
+        });
+#else
+        vector_of_pairs.resize(iend-ibegin);
+
+        auto p = vector_of_pairs.data();
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type iemp) noexcept {
+          p[iemp] = std::make_pair(x[iend*irep + iemp], i[iend*irep + iemp]);
+        });
+#endif
+
+        std::sort( std::execution::par_unseq,
+                   vector_of_pairs.begin(), vector_of_pairs.end(),
+                   [](pair_type const& lhs, pair_type const& rhs) noexcept {
+                     return lhs.first < rhs.first;
+                   });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type iemp) noexcept {
+          //const pair_type &pair = vector_of_pairs[iemp - ibegin];
+          const pair_type &pair = p[iemp - ibegin];
+          x[iend*irep + iemp] = pair.first;
+          i[iend*irep + iemp] = pair.second;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  SORTPAIRS : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace algorithm
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/algorithm/SORTPAIRS.cpp b/src/algorithm/SORTPAIRS.cpp
index 96d79a7df..26f8e83be 100644
--- a/src/algorithm/SORTPAIRS.cpp
+++ b/src/algorithm/SORTPAIRS.cpp
@@ -41,6 +41,8 @@ SORTPAIRS::SORTPAIRS(const RunParams& params)
   setVariantDefined( RAJA_CUDA );
 
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
 }
 
 SORTPAIRS::~SORTPAIRS()
diff --git a/src/algorithm/SORTPAIRS.hpp b/src/algorithm/SORTPAIRS.hpp
index 4cfc3eb36..f79b3f39b 100644
--- a/src/algorithm/SORTPAIRS.hpp
+++ b/src/algorithm/SORTPAIRS.hpp
@@ -53,6 +53,7 @@ class SORTPAIRS : public KernelBase
   {
     getCout() << "\n  SORTPAIRS : Unknown OMP Target variant id = " << vid << std::endl;
   }
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
 private:
   static const size_t default_gpu_block_size = 0;
diff --git a/src/apps/CMakeLists.txt b/src/apps/CMakeLists.txt
index 6d521d1df..372e4dce5 100644
--- a/src/apps/CMakeLists.txt
+++ b/src/apps/CMakeLists.txt
@@ -13,6 +13,7 @@ blt_add_library(
           CONVECTION3DPA-Cuda.cpp
           CONVECTION3DPA-Hip.cpp
           CONVECTION3DPA-Seq.cpp
+          CONVECTION3DPA-StdPar.cpp
           CONVECTION3DPA-OMP.cpp
           CONVECTION3DPA-OMPTarget.cpp
           DEL_DOT_VEC_2D.cpp 
@@ -71,6 +72,7 @@ blt_add_library(
           MASS3DPA-OMPTarget.cpp
           NODAL_ACCUMULATION_3D.cpp
           NODAL_ACCUMULATION_3D-Seq.cpp
+          NODAL_ACCUMULATION_3D-StdPar.cpp
           NODAL_ACCUMULATION_3D-Hip.cpp
           NODAL_ACCUMULATION_3D-Cuda.cpp
           NODAL_ACCUMULATION_3D-OMP.cpp
@@ -88,5 +90,16 @@ blt_add_library(
           VOL3D-OMP.cpp 
           VOL3D-OMPTarget.cpp 
           WIP-COUPLE.cpp
+          DEL_DOT_VEC_2D-StdPar.cpp 
+          ENERGY-StdPar.cpp
+          FIR-StdPar.cpp
+          HALOEXCHANGE-StdPar.cpp
+          HALOEXCHANGE_FUSED-StdPar.cpp
+          LTIMES-StdPar.cpp
+          LTIMES_NOVIEW-StdPar.cpp
+          MASS3DPA-StdPar.cpp
+          PRESSURE-StdPar.cpp 
+          VOL3D-StdPar.cpp
+          DIFFUSION3DPA-StdPar.cpp
   DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS}
   )
diff --git a/src/apps/CONVECTION3DPA-StdPar.cpp b/src/apps/CONVECTION3DPA-StdPar.cpp
new file mode 100644
index 000000000..2b36d2dc3
--- /dev/null
+++ b/src/apps/CONVECTION3DPA-StdPar.cpp
@@ -0,0 +1,127 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/LICENSE file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "CONVECTION3DPA.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf {
+namespace apps {
+
+void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
+{
+#if defined(RUN_STDPAR)
+  const Index_type run_reps = getRunReps();
+
+  CONVECTION3DPA_DATA_SETUP;
+
+  switch (vid) {
+
+  case Base_StdPar: {
+
+    startTimer();
+    for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+      std::for_each_n( std::execution::par_unseq,
+                       counting_iterator<int>(0), NE,
+                       [=](int e) {
+
+        CONVECTION3DPA_0_CPU;
+
+        CPU_FOREACH(dz,z,CPA_D1D) {
+          CPU_FOREACH(dy,y,CPA_D1D) {
+            CPU_FOREACH(dx,x,CPA_D1D) {
+              CONVECTION3DPA_1;
+            }
+          }
+        }
+
+        CPU_FOREACH(dz,z,CPA_D1D) {
+          CPU_FOREACH(dy,y,CPA_D1D) {
+            CPU_FOREACH(qx,x,CPA_Q1D) {
+              CONVECTION3DPA_2;
+            }
+          }
+        }
+
+        CPU_FOREACH(dz,z,CPA_D1D) {
+          CPU_FOREACH(qx,x,CPA_Q1D) {
+            CPU_FOREACH(qy,y,CPA_Q1D) {
+              CONVECTION3DPA_3;
+            }
+          }
+        }
+
+        CPU_FOREACH(qx,x,CPA_Q1D) {
+          CPU_FOREACH(qy,y,CPA_Q1D) {
+            CPU_FOREACH(qz,z,CPA_Q1D) {
+              CONVECTION3DPA_4;
+            }
+          }
+        }
+
+        CPU_FOREACH(qz,z,CPA_Q1D) {
+          CPU_FOREACH(qy,y,CPA_Q1D) {
+            CPU_FOREACH(qx,x,CPA_Q1D) {
+              CONVECTION3DPA_5;
+            }
+          }
+        }
+
+        CPU_FOREACH(qx,x,CPA_Q1D) {
+          CPU_FOREACH(qy,y,CPA_Q1D) {
+            CPU_FOREACH(dz,z,CPA_D1D) {
+              CONVECTION3DPA_6;
+            }
+          }
+        }
+
+        CPU_FOREACH(dz,z,CPA_D1D) {
+           CPU_FOREACH(qx,x,CPA_Q1D) {
+              CPU_FOREACH(dy,y,CPA_D1D) {
+                CONVECTION3DPA_7;
+             }
+          }
+        }
+
+        CPU_FOREACH(dz,z,CPA_D1D) {
+          CPU_FOREACH(dy,y,CPA_D1D) {
+            CPU_FOREACH(dx,x,CPA_D1D) {
+              CONVECTION3DPA_8;
+            }
+          }
+        }
+
+      }); // element loop
+
+    }
+    stopTimer();
+
+    break;
+  }
+
+  default:
+    getCout() << "\n CONVECTION3DPA : Unknown StdPar variant id = " << vid << std::endl;
+  }
+
+#else
+  RAJA_UNUSED_VAR(vid);
+#endif
+}
+
+} // end namespace apps
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/apps/CONVECTION3DPA.cpp b/src/apps/CONVECTION3DPA.cpp
index dc4823482..bf52a6dcc 100644
--- a/src/apps/CONVECTION3DPA.cpp
+++ b/src/apps/CONVECTION3DPA.cpp
@@ -64,6 +64,7 @@ CONVECTION3DPA::CONVECTION3DPA(const RunParams& params)
   setVariantDefined( Base_HIP );
   setVariantDefined( RAJA_HIP );
 
+  setVariantDefined( Base_StdPar );
 }
 
 CONVECTION3DPA::~CONVECTION3DPA()
diff --git a/src/apps/CONVECTION3DPA.hpp b/src/apps/CONVECTION3DPA.hpp
index 784b2d4cd..bb9e716a1 100644
--- a/src/apps/CONVECTION3DPA.hpp
+++ b/src/apps/CONVECTION3DPA.hpp
@@ -378,6 +378,7 @@ class CONVECTION3DPA : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp
new file mode 100644
index 000000000..bbe987735
--- /dev/null
+++ b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp
@@ -0,0 +1,100 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "DEL_DOT_VEC_2D.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include "AppsData.hpp"
+
+#include "camp/resource.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace apps
+{
+
+
+void DEL_DOT_VEC_2D::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = m_domain->n_real_zones;
+
+
+  DEL_DOT_VEC_2D_DATA_SETUP;
+
+  NDSET2D(m_domain->jp, x,x1,x2,x3,x4) ;
+  NDSET2D(m_domain->jp, y,y1,y2,y3,y4) ;
+  NDSET2D(m_domain->jp, xdot,fx1,fx2,fx3,fx4) ;
+  NDSET2D(m_domain->jp, ydot,fy1,fy2,fy3,fy4) ;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend,
+                         [=](Index_type ii) {
+          DEL_DOT_VEC_2D_BODY_INDEX;
+          DEL_DOT_VEC_2D_BODY;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    } 
+
+    case Lambda_StdPar : {
+
+      auto deldotvec2d_base_lam = [=](Index_type ii) {
+                                    DEL_DOT_VEC_2D_BODY_INDEX;
+                                    DEL_DOT_VEC_2D_BODY;
+                                  };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend,
+                         [=](Index_type ii) {
+          deldotvec2d_base_lam(ii);
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  DEL_DOT_VEC_2D : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace apps
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/apps/DEL_DOT_VEC_2D.cpp b/src/apps/DEL_DOT_VEC_2D.cpp
index 9fe3c3e85..07858084f 100644
--- a/src/apps/DEL_DOT_VEC_2D.cpp
+++ b/src/apps/DEL_DOT_VEC_2D.cpp
@@ -62,6 +62,9 @@ DEL_DOT_VEC_2D::DEL_DOT_VEC_2D(const RunParams& params)
   setVariantDefined( Base_HIP );
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 DEL_DOT_VEC_2D::~DEL_DOT_VEC_2D()
diff --git a/src/apps/DEL_DOT_VEC_2D.hpp b/src/apps/DEL_DOT_VEC_2D.hpp
index 0e22bb399..2a3ab63be 100644
--- a/src/apps/DEL_DOT_VEC_2D.hpp
+++ b/src/apps/DEL_DOT_VEC_2D.hpp
@@ -113,6 +113,7 @@ class DEL_DOT_VEC_2D : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/apps/DIFFUSION3DPA-StdPar.cpp b/src/apps/DIFFUSION3DPA-StdPar.cpp
new file mode 100644
index 000000000..a05a4370a
--- /dev/null
+++ b/src/apps/DIFFUSION3DPA-StdPar.cpp
@@ -0,0 +1,137 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/LICENSE file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+// Uncomment to add compiler directives for loop unrolling
+//#define USE_RAJAPERF_UNROLL
+
+#include "DIFFUSION3DPA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "RAJA/RAJA.hpp"
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+// This is used below, which is bad for GPU
+//#define CPU_FOREACH(i, k, N) for (int i = 0; i < N; i++)
+
+namespace rajaperf {
+namespace apps {
+
+void DIFFUSION3DPA::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+  const Index_type run_reps = getRunReps();
+
+  DIFFUSION3DPA_DATA_SETUP;
+
+  switch (vid) {
+
+  case Base_StdPar: {
+
+    startTimer();
+    for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+      std::for_each_n( std::execution::par_unseq,
+                       counting_iterator<int>(0), NE,
+                       [=](int e) {
+
+        DIFFUSION3DPA_0_CPU;
+
+        CPU_FOREACH(dz, z, DPA_D1D) {
+          CPU_FOREACH(dy, y, DPA_D1D) {
+            CPU_FOREACH(dx, x, DPA_D1D) {
+              DIFFUSION3DPA_1;
+            }
+          }
+        }
+
+        CPU_FOREACH(dy, y, DPA_D1D) {
+          CPU_FOREACH(qx, x, DPA_Q1D) {
+            DIFFUSION3DPA_2;
+          }
+        }
+
+        CPU_FOREACH(dz, z, DPA_D1D) {
+          CPU_FOREACH(dy, y, DPA_D1D) {
+            CPU_FOREACH(qx, x, DPA_Q1D) {
+              DIFFUSION3DPA_3;
+            }
+          }
+        }
+
+        CPU_FOREACH(dz, z, DPA_D1D) {
+          CPU_FOREACH(qy, y, DPA_Q1D) {
+            CPU_FOREACH(qx, x, DPA_Q1D) {
+              DIFFUSION3DPA_4;
+            }
+          }
+        }
+
+        CPU_FOREACH(qz, z, DPA_Q1D) {
+          CPU_FOREACH(qy, y, DPA_Q1D) {
+            CPU_FOREACH(qx, x, DPA_Q1D) {
+              DIFFUSION3DPA_5;
+            }
+          }
+        }
+
+        CPU_FOREACH(d, y, DPA_D1D) {
+          CPU_FOREACH(q, x, DPA_Q1D) {
+            DIFFUSION3DPA_6;
+          }
+        }
+
+        CPU_FOREACH(qz, z, DPA_Q1D) {
+          CPU_FOREACH(qy, y, DPA_Q1D) {
+            CPU_FOREACH(dx, x, DPA_D1D) {
+              DIFFUSION3DPA_7;
+            }
+          }
+        }
+
+        CPU_FOREACH(qz, z, DPA_Q1D) {
+          CPU_FOREACH(dy, y, DPA_D1D) {
+            CPU_FOREACH(dx, x, DPA_D1D) {
+              DIFFUSION3DPA_8;
+            }
+          }
+        }
+
+        CPU_FOREACH(dz, z, DPA_D1D) {
+          CPU_FOREACH(dy, y, DPA_D1D) {
+            CPU_FOREACH(dx, x, DPA_D1D) {
+              DIFFUSION3DPA_9;
+            }
+          }
+        }
+
+      }); // element loop
+
+    }
+    stopTimer();
+
+    break;
+  }
+
+  default:
+    getCout() << "\n DIFFUSION3DPA : Unknown StdPar variant id = " << vid << std::endl;
+  }
+
+#else
+  RAJA_UNUSED_VAR(vid);
+#endif
+}
+
+} // end namespace apps
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/apps/DIFFUSION3DPA.cpp b/src/apps/DIFFUSION3DPA.cpp
index 1f78cafe3..d243bf330 100644
--- a/src/apps/DIFFUSION3DPA.cpp
+++ b/src/apps/DIFFUSION3DPA.cpp
@@ -65,6 +65,7 @@ DIFFUSION3DPA::DIFFUSION3DPA(const RunParams& params)
   setVariantDefined( Base_HIP );
   setVariantDefined( RAJA_HIP );
 
+  setVariantDefined( Base_StdPar );
 }
 
 DIFFUSION3DPA::~DIFFUSION3DPA()
diff --git a/src/apps/DIFFUSION3DPA.hpp b/src/apps/DIFFUSION3DPA.hpp
index 62967d5c0..5149f8d78 100644
--- a/src/apps/DIFFUSION3DPA.hpp
+++ b/src/apps/DIFFUSION3DPA.hpp
@@ -481,6 +481,7 @@ class DIFFUSION3DPA : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/apps/ENERGY-StdPar.cpp b/src/apps/ENERGY-StdPar.cpp
new file mode 100644
index 000000000..6d797f8ed
--- /dev/null
+++ b/src/apps/ENERGY-StdPar.cpp
@@ -0,0 +1,163 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "ENERGY.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace apps
+{
+
+
+void ENERGY::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  ENERGY_DATA_SETUP;
+  
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          ENERGY_BODY1;
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          ENERGY_BODY2;
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          ENERGY_BODY3;
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          ENERGY_BODY4;
+        });
+  
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          ENERGY_BODY5;
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          ENERGY_BODY6;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    } 
+
+    case Lambda_StdPar : {
+
+      auto energy_lam1 = [=](Index_type i) {
+                           ENERGY_BODY1;
+                         };
+      auto energy_lam2 = [=](Index_type i) {
+                           ENERGY_BODY2;
+                         };
+      auto energy_lam3 = [=](Index_type i) {
+                           ENERGY_BODY3;
+                         };
+      auto energy_lam4 = [=](Index_type i) {
+                           ENERGY_BODY4;
+                         };
+      auto energy_lam5 = [=](Index_type i) {
+                           ENERGY_BODY5;
+                         };
+      auto energy_lam6 = [=](Index_type i) {
+                           ENERGY_BODY6;
+                         };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          energy_lam1(i);
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          energy_lam2(i);
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          energy_lam3(i);
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          energy_lam4(i);
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          energy_lam5(i);
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          energy_lam6(i);
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  ENERGY : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // iend-ibegin namespace apps
+} // iend-ibegin namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/apps/ENERGY.cpp b/src/apps/ENERGY.cpp
index 8e77961b6..7c363bb4d 100644
--- a/src/apps/ENERGY.cpp
+++ b/src/apps/ENERGY.cpp
@@ -62,6 +62,9 @@ ENERGY::ENERGY(const RunParams& params)
 
   setVariantDefined( Base_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 ENERGY::~ENERGY()
diff --git a/src/apps/ENERGY.hpp b/src/apps/ENERGY.hpp
index 22af34867..dbb2141e4 100644
--- a/src/apps/ENERGY.hpp
+++ b/src/apps/ENERGY.hpp
@@ -203,6 +203,7 @@ class ENERGY : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/apps/FIR-StdPar.cpp b/src/apps/FIR-StdPar.cpp
new file mode 100644
index 000000000..2e70b8a38
--- /dev/null
+++ b/src/apps/FIR-StdPar.cpp
@@ -0,0 +1,95 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "FIR.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace apps
+{
+
+
+void FIR::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize() - m_coefflen;
+
+  FIR_COEFF;
+
+  FIR_DATA_SETUP;
+
+  Real_type coeff[FIR_COEFFLEN];
+  std::copy(std::begin(coeff_array), std::end(coeff_array), std::begin(coeff));
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend,
+                         [=](Index_type i) {
+          FIR_BODY;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    } 
+
+    case Lambda_StdPar : {
+
+      auto fir_lam = [=](Index_type i) {
+                       FIR_BODY;
+                     };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend,
+                         [=](Index_type i) {
+           fir_lam(i);
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  FIR : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#else
+  RAJA_UNUSED_VAR(vid);
+#endif
+}
+
+} // end namespace apps
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/apps/FIR.cpp b/src/apps/FIR.cpp
index 792f015d0..f8ba2239b 100644
--- a/src/apps/FIR.cpp
+++ b/src/apps/FIR.cpp
@@ -56,6 +56,9 @@ FIR::FIR(const RunParams& params)
 
   setVariantDefined( Base_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 FIR::~FIR()
diff --git a/src/apps/FIR.hpp b/src/apps/FIR.hpp
index 3ca8a1cef..9a43b3d1f 100644
--- a/src/apps/FIR.hpp
+++ b/src/apps/FIR.hpp
@@ -78,6 +78,7 @@ class FIR : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/apps/HALOEXCHANGE-StdPar.cpp b/src/apps/HALOEXCHANGE-StdPar.cpp
new file mode 100644
index 000000000..6f549bd03
--- /dev/null
+++ b/src/apps/HALOEXCHANGE-StdPar.cpp
@@ -0,0 +1,138 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "HALOEXCHANGE.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf
+{
+namespace apps
+{
+
+
+void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+
+  HALOEXCHANGE_DATA_SETUP;
+
+  auto ibegin = 0;
+  auto iend   = num_neighbors;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type l) noexcept {
+          Real_ptr buffer = buffers[l];
+          Int_ptr list = pack_index_lists[l];
+          Index_type  len  = pack_index_list_lengths[l];
+          for (Index_type v = 0; v < num_vars; ++v) {
+            Real_ptr var = vars[v];
+            for (Index_type i = 0; i < len; i++) {
+              HALOEXCHANGE_PACK_BODY;
+            }
+            buffer += len;
+          }
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type l) noexcept {
+          Real_ptr buffer = buffers[l];
+          Int_ptr list = unpack_index_lists[l];
+          Index_type  len  = unpack_index_list_lengths[l];
+          for (Index_type v = 0; v < num_vars; ++v) {
+            Real_ptr var = vars[v];
+            for (Index_type i = 0; i < len; i++) {
+              HALOEXCHANGE_UNPACK_BODY;
+            }
+            buffer += len;
+          }
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type l) noexcept {
+          Real_ptr buffer = buffers[l];
+          Int_ptr list = pack_index_lists[l];
+          Index_type  len  = pack_index_list_lengths[l];
+          for (Index_type v = 0; v < num_vars; ++v) {
+            Real_ptr var = vars[v];
+            auto haloexchange_pack_base_lam = [=](Index_type i) {
+                  HALOEXCHANGE_PACK_BODY;
+                };
+            for (Index_type i = 0; i < len; i++) {
+              haloexchange_pack_base_lam(i);
+            }
+            buffer += len;
+          }
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type l) noexcept {
+          Real_ptr buffer = buffers[l];
+          Int_ptr list = unpack_index_lists[l];
+          Index_type  len  = unpack_index_list_lengths[l];
+          for (Index_type v = 0; v < num_vars; ++v) {
+            Real_ptr var = vars[v];
+            auto haloexchange_unpack_base_lam = [=](Index_type i) {
+                  HALOEXCHANGE_UNPACK_BODY;
+                };
+            for (Index_type i = 0; i < len; i++) {
+              haloexchange_unpack_base_lam(i);
+            }
+            buffer += len;
+          }
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n HALOEXCHANGE : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+#endif
+}
+
+} // iend-ibegin namespace apps
+} // iend-ibegin namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/apps/HALOEXCHANGE.cpp b/src/apps/HALOEXCHANGE.cpp
index 53ec0ecc1..1b7ef9aa8 100644
--- a/src/apps/HALOEXCHANGE.cpp
+++ b/src/apps/HALOEXCHANGE.cpp
@@ -76,6 +76,9 @@ HALOEXCHANGE::HALOEXCHANGE(const RunParams& params)
 
   setVariantDefined( Base_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 HALOEXCHANGE::~HALOEXCHANGE()
diff --git a/src/apps/HALOEXCHANGE.hpp b/src/apps/HALOEXCHANGE.hpp
index 1f21d9616..b9fec003c 100644
--- a/src/apps/HALOEXCHANGE.hpp
+++ b/src/apps/HALOEXCHANGE.hpp
@@ -93,6 +93,7 @@ class HALOEXCHANGE : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp
new file mode 100644
index 000000000..47c531e50
--- /dev/null
+++ b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp
@@ -0,0 +1,179 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "HALOEXCHANGE_FUSED.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf
+{
+namespace apps
+{
+
+
+void HALOEXCHANGE_FUSED::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+
+  HALOEXCHANGE_FUSED_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      HALOEXCHANGE_FUSED_MANUAL_FUSER_SETUP;
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        Index_type pack_index = 0;
+
+        for (Index_type l = 0; l < num_neighbors; ++l) {
+          Real_ptr buffer = buffers[l];
+          Int_ptr list = pack_index_lists[l];
+          Index_type  len  = pack_index_list_lengths[l];
+          for (Index_type v = 0; v < num_vars; ++v) {
+            Real_ptr var = vars[v];
+            pack_ptr_holders[pack_index] = ptr_holder{buffer, list, var};
+            pack_lens[pack_index]        = len;
+            pack_index += 1;
+            buffer += len;
+          }
+        }
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), pack_index,
+                         [=](Index_type j) {
+          Real_ptr   buffer = pack_ptr_holders[j].buffer;
+          Int_ptr    list   = pack_ptr_holders[j].list;
+          Real_ptr   var    = pack_ptr_holders[j].var;
+          Index_type len    = pack_lens[j];
+          for (Index_type i = 0; i < len; i++) {
+            HALOEXCHANGE_FUSED_PACK_BODY;
+          }
+        });
+
+        Index_type unpack_index = 0;
+
+        for (Index_type l = 0; l < num_neighbors; ++l) {
+          Real_ptr buffer = buffers[l];
+          Int_ptr list = unpack_index_lists[l];
+          Index_type  len  = unpack_index_list_lengths[l];
+          for (Index_type v = 0; v < num_vars; ++v) {
+            Real_ptr var = vars[v];
+            unpack_ptr_holders[unpack_index] = ptr_holder{buffer, list, var};
+            unpack_lens[unpack_index]        = len;
+            unpack_index += 1;
+            buffer += len;
+          }
+        }
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), unpack_index,
+                         [=](Index_type j) {
+          Real_ptr   buffer = unpack_ptr_holders[j].buffer;
+          Int_ptr    list   = unpack_ptr_holders[j].list;
+          Real_ptr   var    = unpack_ptr_holders[j].var;
+          Index_type len    = unpack_lens[j];
+          for (Index_type i = 0; i < len; i++) {
+            HALOEXCHANGE_FUSED_UNPACK_BODY;
+          }
+        });
+
+      }
+      stopTimer();
+
+      HALOEXCHANGE_FUSED_MANUAL_FUSER_TEARDOWN;
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      HALOEXCHANGE_FUSED_MANUAL_LAMBDA_FUSER_SETUP;
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        Index_type pack_index = 0;
+
+        for (Index_type l = 0; l < num_neighbors; ++l) {
+          Real_ptr buffer = buffers[l];
+          Int_ptr list = pack_index_lists[l];
+          Index_type  len  = pack_index_list_lengths[l];
+          for (Index_type v = 0; v < num_vars; ++v) {
+            Real_ptr var = vars[v];
+            new(&pack_lambdas[pack_index]) pack_lambda_type(make_pack_lambda(buffer, list, var));
+            pack_lens[pack_index] = len;
+            pack_index += 1;
+            buffer += len;
+          }
+        }
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), pack_index,
+                         [=](Index_type j) {
+          auto       pack_lambda = pack_lambdas[j];
+          Index_type len         = pack_lens[j];
+          for (Index_type i = 0; i < len; i++) {
+            pack_lambda(i);
+          }
+        });
+
+        Index_type unpack_index = 0;
+
+        for (Index_type l = 0; l < num_neighbors; ++l) {
+          Real_ptr buffer = buffers[l];
+          Int_ptr list = unpack_index_lists[l];
+          Index_type  len  = unpack_index_list_lengths[l];
+          for (Index_type v = 0; v < num_vars; ++v) {
+            Real_ptr var = vars[v];
+            new(&unpack_lambdas[unpack_index]) unpack_lambda_type(make_unpack_lambda(buffer, list, var));
+            unpack_lens[unpack_index] = len;
+            unpack_index += 1;
+            buffer += len;
+          }
+        }
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), unpack_index,
+                         [=](Index_type j) {
+          auto       unpack_lambda = unpack_lambdas[j];
+          Index_type len           = unpack_lens[j];
+          for (Index_type i = 0; i < len; i++) {
+            unpack_lambda(i);
+          }
+        });
+
+      }
+      stopTimer();
+
+      HALOEXCHANGE_FUSED_MANUAL_LAMBDA_FUSER_TEARDOWN;
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n HALOEXCHANGE_FUSED : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+#endif
+}
+
+} // end namespace apps
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/apps/HALOEXCHANGE_FUSED.cpp b/src/apps/HALOEXCHANGE_FUSED.cpp
index 5486c3645..54b00790d 100644
--- a/src/apps/HALOEXCHANGE_FUSED.cpp
+++ b/src/apps/HALOEXCHANGE_FUSED.cpp
@@ -76,6 +76,9 @@ HALOEXCHANGE_FUSED::HALOEXCHANGE_FUSED(const RunParams& params)
 
   setVariantDefined( Base_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 HALOEXCHANGE_FUSED::~HALOEXCHANGE_FUSED()
diff --git a/src/apps/HALOEXCHANGE_FUSED.hpp b/src/apps/HALOEXCHANGE_FUSED.hpp
index b0af7e60e..116596d6c 100644
--- a/src/apps/HALOEXCHANGE_FUSED.hpp
+++ b/src/apps/HALOEXCHANGE_FUSED.hpp
@@ -137,6 +137,7 @@ class HALOEXCHANGE_FUSED : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/apps/LTIMES-StdPar.cpp b/src/apps/LTIMES-StdPar.cpp
new file mode 100644
index 000000000..3ccd3c987
--- /dev/null
+++ b/src/apps/LTIMES-StdPar.cpp
@@ -0,0 +1,113 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "LTIMES.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf
+{
+namespace apps
+{
+
+
+void LTIMES::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+
+  LTIMES_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), num_z*num_g*num_m,
+                         [=](Index_type zgm) {
+              const auto z  = zgm / (num_g*num_m);
+              const auto gm = zgm % (num_g*num_m);
+              const auto g  = gm / num_m;
+              const auto m  = gm % num_m;
+#else
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), num_z,
+                         [=](Index_type z) {
+          for (Index_type g = 0; g < num_g; ++g )
+            for (Index_type m = 0; m < num_m; ++m )
+#endif
+              for (Index_type d = 0; d < num_d; ++d ) {
+                LTIMES_BODY;
+              }
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto ltimes_base_lam = [=](Index_type d, Index_type z, Index_type g, Index_type m) {
+                               LTIMES_BODY;
+                             };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), num_z*num_g*num_m,
+                         [=](Index_type zgm) {
+              const auto z  = zgm / (num_g*num_m);
+              const auto gm = zgm % (num_g*num_m);
+              const auto g  = gm / num_m;
+              const auto m  = gm % num_m;
+#else
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), num_z,
+                         [=](Index_type z) {
+          for (Index_type g = 0; g < num_g; ++g )
+            for (Index_type m = 0; m < num_m; ++m )
+#endif
+              for (Index_type d = 0; d < num_d; ++d ) {
+                ltimes_base_lam(d, z, g, m);
+              }
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n LTIMES : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+#endif
+}
+
+} // end namespace apps
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/apps/LTIMES.cpp b/src/apps/LTIMES.cpp
index b920631dd..5f13966b7 100644
--- a/src/apps/LTIMES.cpp
+++ b/src/apps/LTIMES.cpp
@@ -77,6 +77,9 @@ LTIMES::LTIMES(const RunParams& params)
   setVariantDefined( Base_HIP );
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 LTIMES::~LTIMES()
diff --git a/src/apps/LTIMES.hpp b/src/apps/LTIMES.hpp
index 2f3f0ca6d..5fd360936 100644
--- a/src/apps/LTIMES.hpp
+++ b/src/apps/LTIMES.hpp
@@ -116,6 +116,7 @@ class LTIMES : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/apps/LTIMES_NOVIEW-StdPar.cpp b/src/apps/LTIMES_NOVIEW-StdPar.cpp
new file mode 100644
index 000000000..37087f20d
--- /dev/null
+++ b/src/apps/LTIMES_NOVIEW-StdPar.cpp
@@ -0,0 +1,113 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "LTIMES_NOVIEW.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace apps
+{
+
+
+void LTIMES_NOVIEW::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+
+  LTIMES_NOVIEW_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), num_z*num_g*num_m,
+                         [=](Index_type zgm) {
+              const auto z  = zgm / (num_g*num_m);
+              const auto gm = zgm % (num_g*num_m);
+              const auto g  = gm / num_m;
+              const auto m  = gm % num_m;
+#else
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), num_z,
+                         [=](Index_type z) {
+          for (Index_type g = 0; g < num_g; ++g )
+            for (Index_type m = 0; m < num_m; ++m )
+#endif
+              for (Index_type d = 0; d < num_d; ++d ) {
+                LTIMES_NOVIEW_BODY;
+              }
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+  auto ltimesnoview_lam = [=](Index_type d, Index_type z, Index_type g, Index_type m) {
+                                LTIMES_NOVIEW_BODY;
+                          };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), num_z*num_g*num_m,
+                         [=](Index_type zgm) {
+              const auto z  = zgm / (num_g*num_m);
+              const auto gm = zgm % (num_g*num_m);
+              const auto g  = gm / num_m;
+              const auto m  = gm % num_m;
+#else
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), num_z,
+                         [=](Index_type z) {
+          for (Index_type g = 0; g < num_g; ++g )
+            for (Index_type m = 0; m < num_m; ++m )
+#endif
+              for (Index_type d = 0; d < num_d; ++d ) {
+                ltimesnoview_lam(d, z, g, m);
+              }
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n LTIMES_NOVIEW : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+#endif
+}
+
+} // end namespace apps
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/apps/LTIMES_NOVIEW.cpp b/src/apps/LTIMES_NOVIEW.cpp
index 2f8dd4b40..c37128622 100644
--- a/src/apps/LTIMES_NOVIEW.cpp
+++ b/src/apps/LTIMES_NOVIEW.cpp
@@ -76,6 +76,9 @@ LTIMES_NOVIEW::LTIMES_NOVIEW(const RunParams& params)
   setVariantDefined( Base_HIP );
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 LTIMES_NOVIEW::~LTIMES_NOVIEW()
diff --git a/src/apps/LTIMES_NOVIEW.hpp b/src/apps/LTIMES_NOVIEW.hpp
index 96a296366..d00bd0da2 100644
--- a/src/apps/LTIMES_NOVIEW.hpp
+++ b/src/apps/LTIMES_NOVIEW.hpp
@@ -66,6 +66,7 @@ class LTIMES_NOVIEW : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/apps/MASS3DPA-StdPar.cpp b/src/apps/MASS3DPA-StdPar.cpp
new file mode 100644
index 000000000..8e18cd50b
--- /dev/null
+++ b/src/apps/MASS3DPA-StdPar.cpp
@@ -0,0 +1,115 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "MASS3DPA.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf {
+namespace apps {
+
+#define CPU_FOREACH(i, k, N) for (int i = 0; i < N; i++)
+
+void MASS3DPA::runStdParVariant(VariantID vid, size_t tune_idx) {
+#if defined(RUN_STDPAR)
+  const Index_type run_reps = getRunReps();
+
+  MASS3DPA_DATA_SETUP;
+
+  switch (vid) {
+
+  case Base_StdPar: {
+
+    startTimer();
+    for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+      std::for_each_n( std::execution::par_unseq,
+                       counting_iterator<int>(0), NE, 
+                       [=](int e) {
+
+        MASS3DPA_0_CPU
+
+        CPU_FOREACH(dy, y, MPA_D1D) {
+          CPU_FOREACH(dx, x, MPA_D1D){
+            MASS3DPA_1
+          }
+          CPU_FOREACH(dx, x, MPA_Q1D) {
+            MASS3DPA_2
+          }
+        }
+
+        CPU_FOREACH(dy, y, MPA_D1D) {
+          CPU_FOREACH(qx, x, MPA_Q1D) {
+            MASS3DPA_3
+          }
+        }
+
+        CPU_FOREACH(qy, y, MPA_Q1D) {
+          CPU_FOREACH(qx, x, MPA_Q1D) {
+            MASS3DPA_4
+          }
+        }
+
+        CPU_FOREACH(qy, y, MPA_Q1D) {
+          CPU_FOREACH(qx, x, MPA_Q1D) {
+            MASS3DPA_5
+          }
+        }
+
+        CPU_FOREACH(d, y, MPA_D1D) {
+          CPU_FOREACH(q, x, MPA_Q1D) {
+            MASS3DPA_6
+          }
+        }
+
+        CPU_FOREACH(qy, y, MPA_Q1D) {
+          CPU_FOREACH(dx, x, MPA_D1D) {
+            MASS3DPA_7
+          }
+        }
+
+        CPU_FOREACH(dy, y, MPA_D1D) {
+          CPU_FOREACH(dx, x, MPA_D1D) {
+            MASS3DPA_8
+          }
+        }
+
+        CPU_FOREACH(dy, y, MPA_D1D) {
+          CPU_FOREACH(dx, x, MPA_D1D) {
+            MASS3DPA_9
+          }
+        }
+
+      }); // element loop
+
+    }
+    stopTimer();
+
+    break;
+  }
+
+  default:
+    getCout() << "\n MASS3DPA : Unknown StdPar variant id = " << vid << std::endl;
+  }
+
+#else
+  RAJA_UNUSED_VAR(vid);
+#endif
+}
+
+} // end namespace apps
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/apps/MASS3DPA.cpp b/src/apps/MASS3DPA.cpp
index a70e98847..f8c9bc47f 100644
--- a/src/apps/MASS3DPA.cpp
+++ b/src/apps/MASS3DPA.cpp
@@ -61,6 +61,7 @@ MASS3DPA::MASS3DPA(const RunParams& params)
   setVariantDefined( Base_HIP );
   setVariantDefined( RAJA_HIP );
 
+  setVariantDefined( Base_StdPar );
 }
 
 MASS3DPA::~MASS3DPA()
diff --git a/src/apps/MASS3DPA.hpp b/src/apps/MASS3DPA.hpp
index 7365fa011..be53a625f 100644
--- a/src/apps/MASS3DPA.hpp
+++ b/src/apps/MASS3DPA.hpp
@@ -363,6 +363,7 @@ class MASS3DPA : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp b/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp
new file mode 100644
index 000000000..81f5ef3d1
--- /dev/null
+++ b/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp
@@ -0,0 +1,91 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/LICENSE file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "NODAL_ACCUMULATION_3D.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include "AppsData.hpp"
+
+#include <iostream>
+
+namespace rajaperf
+{
+namespace apps
+{
+
+
+void NODAL_ACCUMULATION_3D::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
+{
+#if defined(RUN_STDPAR)
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = m_domain->n_real_zones;
+
+  NODAL_ACCUMULATION_3D_DATA_SETUP;
+
+  NDPTRSET(m_domain->jp, m_domain->kp, x,x0,x1,x2,x3,x4,x5,x6,x7) ;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#warning needs parallel for+atomic or reduce
+        for (Index_type ii = ibegin ; ii < iend ; ++ii ) {
+          NODAL_ACCUMULATION_3D_BODY_INDEX;
+          NODAL_ACCUMULATION_3D_BODY;
+        }
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto nodal_accumulation_3d_lam = [=](Index_type ii) {
+                         NODAL_ACCUMULATION_3D_BODY_INDEX;
+                         NODAL_ACCUMULATION_3D_BODY;
+                       };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#warning needs parallel for+atomic or reduce
+        for (Index_type ii = ibegin ; ii < iend ; ++ii ) {
+          nodal_accumulation_3d_lam(ii);
+        }
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  NODAL_ACCUMULATION_3D : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace apps
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/apps/NODAL_ACCUMULATION_3D.cpp b/src/apps/NODAL_ACCUMULATION_3D.cpp
index 7ed2f0399..e9e11a81a 100644
--- a/src/apps/NODAL_ACCUMULATION_3D.cpp
+++ b/src/apps/NODAL_ACCUMULATION_3D.cpp
@@ -67,6 +67,9 @@ NODAL_ACCUMULATION_3D::NODAL_ACCUMULATION_3D(const RunParams& params)
 
   setVariantDefined( Base_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 NODAL_ACCUMULATION_3D::~NODAL_ACCUMULATION_3D()
diff --git a/src/apps/NODAL_ACCUMULATION_3D.hpp b/src/apps/NODAL_ACCUMULATION_3D.hpp
index 5b0ce0d77..05f83268f 100644
--- a/src/apps/NODAL_ACCUMULATION_3D.hpp
+++ b/src/apps/NODAL_ACCUMULATION_3D.hpp
@@ -95,6 +95,7 @@ class NODAL_ACCUMULATION_3D : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/apps/PRESSURE-StdPar.cpp b/src/apps/PRESSURE-StdPar.cpp
new file mode 100644
index 000000000..551c8c730
--- /dev/null
+++ b/src/apps/PRESSURE-StdPar.cpp
@@ -0,0 +1,103 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "PRESSURE.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace apps
+{
+
+
+void PRESSURE::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  PRESSURE_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                        counting_iterator<Index_type>(ibegin), iend-ibegin,
+                        [=](Index_type i) {
+          PRESSURE_BODY1;
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                        counting_iterator<Index_type>(ibegin), iend-ibegin,
+                        [=](Index_type i) {
+          PRESSURE_BODY2;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    } 
+
+    case Lambda_StdPar : {
+
+      auto pressure_lam1 = [=](Index_type i) {
+                             PRESSURE_BODY1;
+                           };
+      auto pressure_lam2 = [=](Index_type i) {
+                             PRESSURE_BODY2;
+                           };
+      
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+       std::for_each_n( std::execution::par_unseq,
+                        counting_iterator<Index_type>(ibegin), iend-ibegin,
+                        [=](Index_type i) {
+         pressure_lam1(i);
+       });
+
+       std::for_each_n( std::execution::par_unseq,
+                        counting_iterator<Index_type>(ibegin), iend-ibegin,
+                        [=](Index_type i) {
+         pressure_lam2(i);
+       });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  PRESSURE : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // iend-ibegin namespace apps
+} // iend-ibegin namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/apps/PRESSURE.cpp b/src/apps/PRESSURE.cpp
index 18979f3bd..64c15ccc4 100644
--- a/src/apps/PRESSURE.cpp
+++ b/src/apps/PRESSURE.cpp
@@ -52,6 +52,9 @@ PRESSURE::PRESSURE(const RunParams& params)
 
   setVariantDefined( Base_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 PRESSURE::~PRESSURE()
diff --git a/src/apps/PRESSURE.hpp b/src/apps/PRESSURE.hpp
index c0568a8e0..d1ad4e874 100644
--- a/src/apps/PRESSURE.hpp
+++ b/src/apps/PRESSURE.hpp
@@ -72,6 +72,7 @@ class PRESSURE : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/apps/VOL3D-StdPar.cpp b/src/apps/VOL3D-StdPar.cpp
new file mode 100644
index 000000000..087ebd577
--- /dev/null
+++ b/src/apps/VOL3D-StdPar.cpp
@@ -0,0 +1,96 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "VOL3D.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "AppsData.hpp"
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace apps
+{
+
+
+void VOL3D::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = m_domain->fpz;
+  const Index_type iend = m_domain->lpz+1;
+
+  VOL3D_DATA_SETUP;
+
+  NDPTRSET(m_domain->jp, m_domain->kp, x,x0,x1,x2,x3,x4,x5,x6,x7) ;
+  NDPTRSET(m_domain->jp, m_domain->kp, y,y0,y1,y2,y3,y4,y5,y6,y7) ;
+  NDPTRSET(m_domain->jp, m_domain->kp, z,z0,z1,z2,z3,z4,z5,z6,z7) ;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          VOL3D_BODY;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto vol3d_lam = [=](Index_type i) {
+                         VOL3D_BODY;
+                       };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          vol3d_lam(i);
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  VOL3D : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#else
+  RAJA_UNUSED_VAR(vid);
+#endif
+}
+
+} // end namespace apps
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/apps/VOL3D.cpp b/src/apps/VOL3D.cpp
index a1097163a..7f5edf7d1 100644
--- a/src/apps/VOL3D.cpp
+++ b/src/apps/VOL3D.cpp
@@ -64,6 +64,9 @@ VOL3D::VOL3D(const RunParams& params)
 
   setVariantDefined( Base_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 VOL3D::~VOL3D()
diff --git a/src/apps/VOL3D.hpp b/src/apps/VOL3D.hpp
index aa6701855..82fe191ae 100644
--- a/src/apps/VOL3D.hpp
+++ b/src/apps/VOL3D.hpp
@@ -169,6 +169,7 @@ class VOL3D : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/apps/WIP-COUPLE.hpp b/src/apps/WIP-COUPLE.hpp
index 33faa85cc..0b5b67ed0 100644
--- a/src/apps/WIP-COUPLE.hpp
+++ b/src/apps/WIP-COUPLE.hpp
@@ -171,6 +171,7 @@ class COUPLE : public KernelBase
   void runCudaVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) {(void) vid;}
   void runHipVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) {(void) vid;}
   void runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) {(void) vid;}
+  void runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) {(void) vid;}
 
 private:
   Complex_ptr m_t0;
diff --git a/src/basic-kokkos/DAXPY-Kokkos.cpp b/src/basic-kokkos/DAXPY-Kokkos.cpp
index b8ab91cd1..76ac5331a 100644
--- a/src/basic-kokkos/DAXPY-Kokkos.cpp
+++ b/src/basic-kokkos/DAXPY-Kokkos.cpp
@@ -53,7 +53,7 @@ void DAXPY::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)
     break;
   }
   default: {
-    std::cout << "\n  DAXPY : Unknown variant id = " << vid << std::endl;
+    getCout() << "\n  DAXPY : Unknown variant id = " << vid << std::endl;
   }
   }
 
diff --git a/src/basic-kokkos/IF_QUAD-Kokkos.cpp b/src/basic-kokkos/IF_QUAD-Kokkos.cpp
index 19e916dac..c38a15581 100644
--- a/src/basic-kokkos/IF_QUAD-Kokkos.cpp
+++ b/src/basic-kokkos/IF_QUAD-Kokkos.cpp
@@ -61,7 +61,7 @@ void IF_QUAD::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_id
   }
 
   default: {
-    std::cout << "\n  IF_QUAD : Unknown variant id = " << vid << std::endl;
+    getCout() << "\n  IF_QUAD : Unknown variant id = " << vid << std::endl;
   }
   }
 
diff --git a/src/basic-kokkos/INIT3-Kokkos.cpp b/src/basic-kokkos/INIT3-Kokkos.cpp
index 661180c7b..b8ffd4551 100644
--- a/src/basic-kokkos/INIT3-Kokkos.cpp
+++ b/src/basic-kokkos/INIT3-Kokkos.cpp
@@ -57,7 +57,7 @@ void INIT3::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)
   }
 
   default: {
-    std::cout << "\n  INIT3 : Unknown variant id = " << vid << std::endl;
+    getCout() << "\n  INIT3 : Unknown variant id = " << vid << std::endl;
   }
   }
 
diff --git a/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp
index 8c775a3b0..fddc36ed2 100644
--- a/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp
+++ b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp
@@ -46,7 +46,7 @@ void INIT_VIEW1D::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tun
   }
 
   default: {
-    std::cout << "\n  INIT_VIEW1D : Unknown variant id = " << vid << std::endl;
+    getCout() << "\n  INIT_VIEW1D : Unknown variant id = " << vid << std::endl;
   }
   }
 
diff --git a/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp
index 9df018264..7a39f711d 100644
--- a/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp
+++ b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp
@@ -46,7 +46,7 @@ void INIT_VIEW1D_OFFSET::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_
   }
 
   default: {
-    std::cout << "\n  INIT_VIEW1D_OFFSET : Unknown variant id = " << vid
+    getCout() << "\n  INIT_VIEW1D_OFFSET : Unknown variant id = " << vid
               << std::endl;
   }
   }
diff --git a/src/basic-kokkos/MULADDSUB-Kokkos.cpp b/src/basic-kokkos/MULADDSUB-Kokkos.cpp
index 49e890315..f913410b5 100644
--- a/src/basic-kokkos/MULADDSUB-Kokkos.cpp
+++ b/src/basic-kokkos/MULADDSUB-Kokkos.cpp
@@ -56,7 +56,7 @@ void MULADDSUB::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_
   }
 
   default: {
-    std::cout << "\n  MULADDSUB : Unknown variant id = " << vid << std::endl;
+    getCout() << "\n  MULADDSUB : Unknown variant id = " << vid << std::endl;
   }
   }
   moveDataToHostFromKokkosView(out1, out1_view, iend);
diff --git a/src/basic-kokkos/NESTED_INIT-Kokkos.cpp b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp
index 36929cead..aa212a724 100644
--- a/src/basic-kokkos/NESTED_INIT-Kokkos.cpp
+++ b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp
@@ -68,7 +68,7 @@ void NESTED_INIT::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tun
   }
 
   default: {
-    std::cout << "\n  NESTED_INIT : Unknown variant id = " << vid << std::endl;
+    getCout() << "\n  NESTED_INIT : Unknown variant id = " << vid << std::endl;
   }
   }
 }
diff --git a/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp
index 233ca71af..86d5c9e38 100644
--- a/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp
+++ b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp
@@ -59,7 +59,7 @@ void PI_ATOMIC::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_
   }
 
   default: {
-    std::cout << "\n  PI_ATOMIC : Unknown variant id = " << vid << std::endl;
+    getCout() << "\n  PI_ATOMIC : Unknown variant id = " << vid << std::endl;
   }
   }
 }
diff --git a/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp
index 23c0ab6f4..661badb8e 100644
--- a/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp
+++ b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp
@@ -65,7 +65,7 @@ void REDUCE3_INT::runKokkosVariant(VariantID vid,
   }
 
   default: {
-    std::cout << "\n  REDUCE3_INT : Unknown variant id = " << vid << std::endl;
+    getCout() << "\n  REDUCE3_INT : Unknown variant id = " << vid << std::endl;
   }
   }
 
diff --git a/src/basic-kokkos/TRAP_INT-Kokkos.cpp b/src/basic-kokkos/TRAP_INT-Kokkos.cpp
index 5cdb9060f..09f79e5fe 100644
--- a/src/basic-kokkos/TRAP_INT-Kokkos.cpp
+++ b/src/basic-kokkos/TRAP_INT-Kokkos.cpp
@@ -59,7 +59,7 @@ void TRAP_INT::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_i
   }
 
   default: {
-    std::cout << "\n  TRAP_INT : Unknown variant id = " << vid << std::endl;
+    getCout() << "\n  TRAP_INT : Unknown variant id = " << vid << std::endl;
   }
   }
 }
diff --git a/src/basic/CMakeLists.txt b/src/basic/CMakeLists.txt
index 3be6e0c3c..90bd7262d 100644
--- a/src/basic/CMakeLists.txt
+++ b/src/basic/CMakeLists.txt
@@ -10,30 +10,35 @@ blt_add_library(
   NAME basic
   SOURCES DAXPY.cpp
           DAXPY-Seq.cpp
+          DAXPY-StdPar.cpp
           DAXPY-Hip.cpp
           DAXPY-Cuda.cpp
           DAXPY-OMP.cpp
           DAXPY-OMPTarget.cpp
           DAXPY_ATOMIC.cpp
           DAXPY_ATOMIC-Seq.cpp
+          DAXPY_ATOMIC-StdPar.cpp
           DAXPY_ATOMIC-Hip.cpp
           DAXPY_ATOMIC-Cuda.cpp
           DAXPY_ATOMIC-OMP.cpp
           DAXPY_ATOMIC-OMPTarget.cpp
           IF_QUAD.cpp
           IF_QUAD-Seq.cpp
+          IF_QUAD-StdPar.cpp
           IF_QUAD-Hip.cpp
           IF_QUAD-Cuda.cpp
           IF_QUAD-OMP.cpp
           IF_QUAD-OMPTarget.cpp
           INDEXLIST.cpp
           INDEXLIST-Seq.cpp
+          INDEXLIST-StdPar.cpp
           INDEXLIST-Hip.cpp
           INDEXLIST-Cuda.cpp
           INDEXLIST-OMP.cpp
           INDEXLIST-OMPTarget.cpp
           INDEXLIST_3LOOP.cpp
           INDEXLIST_3LOOP-Seq.cpp
+          INDEXLIST_3LOOP-StdPar.cpp
           INDEXLIST_3LOOP-Hip.cpp
           INDEXLIST_3LOOP-Cuda.cpp
           INDEXLIST_3LOOP-OMP.cpp
@@ -46,63 +51,74 @@ blt_add_library(
           INIT3-OMPTarget.cpp
           INIT_VIEW1D.cpp
           INIT_VIEW1D-Seq.cpp
+          INIT_VIEW1D-StdPar.cpp
           INIT_VIEW1D-Hip.cpp
           INIT_VIEW1D-Cuda.cpp
           INIT_VIEW1D-OMP.cpp
           INIT_VIEW1D-OMPTarget.cpp
           INIT_VIEW1D_OFFSET.cpp
           INIT_VIEW1D_OFFSET-Seq.cpp
+          INIT_VIEW1D_OFFSET-StdPar.cpp
           INIT_VIEW1D_OFFSET-Hip.cpp
           INIT_VIEW1D_OFFSET-Cuda.cpp
           INIT_VIEW1D_OFFSET-OMP.cpp
           INIT_VIEW1D_OFFSET-OMPTarget.cpp
           MAT_MAT_SHARED.cpp
           MAT_MAT_SHARED-Seq.cpp
+          MAT_MAT_SHARED-StdPar.cpp
           MAT_MAT_SHARED-Hip.cpp
           MAT_MAT_SHARED-Cuda.cpp
           MAT_MAT_SHARED-OMP.cpp
           MAT_MAT_SHARED-OMPTarget.cpp
           MULADDSUB.cpp
           MULADDSUB-Seq.cpp
+          MULADDSUB-StdPar.cpp
           MULADDSUB-Hip.cpp
           MULADDSUB-Cuda.cpp
           MULADDSUB-OMP.cpp
           MULADDSUB-OMPTarget.cpp
           NESTED_INIT.cpp
           NESTED_INIT-Seq.cpp
+          NESTED_INIT-StdPar.cpp
           NESTED_INIT-Hip.cpp
           NESTED_INIT-Cuda.cpp
           NESTED_INIT-OMP.cpp
           NESTED_INIT-OMPTarget.cpp
           PI_ATOMIC.cpp
           PI_ATOMIC-Seq.cpp
+          PI_ATOMIC-StdPar.cpp
           PI_ATOMIC-Hip.cpp
           PI_ATOMIC-Cuda.cpp
           PI_ATOMIC-OMP.cpp
           PI_ATOMIC-OMPTarget.cpp
           PI_REDUCE.cpp
           PI_REDUCE-Seq.cpp
+          PI_REDUCE-StdPar.cpp
           PI_REDUCE-Hip.cpp
           PI_REDUCE-Cuda.cpp
           PI_REDUCE-OMP.cpp
           PI_REDUCE-OMPTarget.cpp
           REDUCE3_INT.cpp
           REDUCE3_INT-Seq.cpp
+          REDUCE3_INT-StdPar.cpp
           REDUCE3_INT-Hip.cpp
           REDUCE3_INT-Cuda.cpp
           REDUCE3_INT-OMP.cpp
           REDUCE3_INT-OMPTarget.cpp
           REDUCE_STRUCT.cpp
           REDUCE_STRUCT-Seq.cpp
+          REDUCE_STRUCT-StdPar.cpp
           REDUCE_STRUCT-Hip.cpp
           REDUCE_STRUCT-Cuda.cpp
           REDUCE_STRUCT-OMP.cpp
           REDUCE_STRUCT-OMPTarget.cpp
           TRAP_INT.cpp
           TRAP_INT-Seq.cpp
+          TRAP_INT-StdPar.cpp
           TRAP_INT-Hip.cpp
           TRAP_INT-Cuda.cpp
           TRAP_INT-OMPTarget.cpp
           TRAP_INT-OMP.cpp
+          INIT3-StdPar.cpp
   DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS}
   )
diff --git a/src/basic/DAXPY-StdPar.cpp b/src/basic/DAXPY-StdPar.cpp
new file mode 100644
index 000000000..6ee417e53
--- /dev/null
+++ b/src/basic/DAXPY-StdPar.cpp
@@ -0,0 +1,87 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/LICENSE file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "DAXPY.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf
+{
+namespace basic
+{
+
+
+void DAXPY::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  DAXPY_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          DAXPY_BODY;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto daxpy_lam = [=](Index_type i) {
+                         DAXPY_BODY;
+                       };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          daxpy_lam(i);
+        });
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  DAXPY : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace basic
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/basic/DAXPY.cpp b/src/basic/DAXPY.cpp
index 67f4b0eb0..8b47600d8 100644
--- a/src/basic/DAXPY.cpp
+++ b/src/basic/DAXPY.cpp
@@ -52,6 +52,9 @@ DAXPY::DAXPY(const RunParams& params)
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
 
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
+
   setVariantDefined( Kokkos_Lambda );
 }
 
diff --git a/src/basic/DAXPY.hpp b/src/basic/DAXPY.hpp
index bcaca8054..bb19f0022 100644
--- a/src/basic/DAXPY.hpp
+++ b/src/basic/DAXPY.hpp
@@ -52,6 +52,7 @@ class DAXPY : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
diff --git a/src/basic/DAXPY_ATOMIC-StdPar.cpp b/src/basic/DAXPY_ATOMIC-StdPar.cpp
new file mode 100644
index 000000000..6ad5f90a7
--- /dev/null
+++ b/src/basic/DAXPY_ATOMIC-StdPar.cpp
@@ -0,0 +1,120 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/LICENSE file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "DAXPY_ATOMIC.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+#include <atomic>
+
+namespace rajaperf
+{
+namespace basic
+{
+
+
+void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  DAXPY_ATOMIC_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+#if defined(NVCXX_GPU_ENABLED)
+          //atomicAdd(&y[i],a * x[i]);
+          atomicaddd(&y[i],a * x[i]);
+#elif defined(_OPENMP)
+          #pragma omp atomic
+          y[i] += a * x[i];
+#elif defined(_OPENACC)
+          #pragma acc atomic
+          y[i] += a * x[i];
+#elif __cpp_lib_atomic_ref
+          auto px = std::atomic_ref<Real_type>(x[i]);
+          auto py = std::atomic_ref<Real_type>(y[i]);
+          py += a * px;
+#else
+#warning No atomic
+          y[i] += a * x[i];
+#endif
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto daxpy_atomic_lam = [=](Index_type i) {
+#if defined(NVCXX_GPU_ENABLED)
+          //atomicAdd(&y[i],a * x[i]);
+          atomicaddd(&y[i],a * x[i]);
+#elif defined(_OPENMP)
+          #pragma omp atomic
+          y[i] += a * x[i];
+#elif defined(_OPENACC)
+          #pragma acc atomic
+          y[i] += a * x[i];
+#elif __cpp_lib_atomic_ref
+          auto px = std::atomic_ref<Real_type>(x[i]);
+          auto py = std::atomic_ref<Real_type>(y[i]);
+          py += a * px;
+#else
+#warning No atomic
+          y[i] += a * x[i];
+#endif
+      };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          daxpy_atomic_lam(i);
+        });
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  DAXPY_ATOMIC : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace basic
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/basic/DAXPY_ATOMIC.cpp b/src/basic/DAXPY_ATOMIC.cpp
index e58516d2f..b27dfedce 100644
--- a/src/basic/DAXPY_ATOMIC.cpp
+++ b/src/basic/DAXPY_ATOMIC.cpp
@@ -52,6 +52,9 @@ DAXPY_ATOMIC::DAXPY_ATOMIC(const RunParams& params)
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
 
+  setVariantDefined( Base_StdPar );
+  //setVariantDefined( Lambda_StdPar );
+
   setVariantDefined( Kokkos_Lambda );
 }
 
diff --git a/src/basic/DAXPY_ATOMIC.hpp b/src/basic/DAXPY_ATOMIC.hpp
index 9c2890e48..dc35161fd 100644
--- a/src/basic/DAXPY_ATOMIC.hpp
+++ b/src/basic/DAXPY_ATOMIC.hpp
@@ -55,6 +55,7 @@ class DAXPY_ATOMIC : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
diff --git a/src/basic/IF_QUAD-StdPar.cpp b/src/basic/IF_QUAD-StdPar.cpp
new file mode 100644
index 000000000..c36a7fcaa
--- /dev/null
+++ b/src/basic/IF_QUAD-StdPar.cpp
@@ -0,0 +1,87 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "IF_QUAD.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace basic
+{
+
+
+void IF_QUAD::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  IF_QUAD_DATA_SETUP;
+
+  auto ifquad_lam = [=](Index_type i) {
+                      IF_QUAD_BODY;
+                    };
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          IF_QUAD_BODY;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          ifquad_lam(i);
+        });
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  IF_QUAD : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace basic
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/basic/IF_QUAD.cpp b/src/basic/IF_QUAD.cpp
index b0fe3469b..c51d2ca47 100644
--- a/src/basic/IF_QUAD.cpp
+++ b/src/basic/IF_QUAD.cpp
@@ -56,6 +56,9 @@ IF_QUAD::IF_QUAD(const RunParams& params)
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
 
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
+
   setVariantDefined( Kokkos_Lambda );
 }
 
diff --git a/src/basic/IF_QUAD.hpp b/src/basic/IF_QUAD.hpp
index f1f3e12a8..3bd4888b0 100644
--- a/src/basic/IF_QUAD.hpp
+++ b/src/basic/IF_QUAD.hpp
@@ -69,6 +69,7 @@ class IF_QUAD : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
diff --git a/src/basic/INDEXLIST-StdPar.cpp b/src/basic/INDEXLIST-StdPar.cpp
new file mode 100644
index 000000000..2da1c38b9
--- /dev/null
+++ b/src/basic/INDEXLIST-StdPar.cpp
@@ -0,0 +1,125 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "INDEXLIST.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf
+{
+namespace basic
+{
+
+void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
+{
+#if defined(RUN_STDPAR)
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  INDEXLIST_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      auto counts = std::vector<Index_type>(iend+1,0);
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#if 1
+        Index_type count = 0;
+
+#warning needs parallel something
+        for (Index_type i = ibegin; i < iend; ++i ) {
+          if ( x[i] < 0.0 ) {
+            list[count++] = i;
+            //y[i] = 1;
+          }
+        }
+
+        m_len = count;
+#else
+        std::transform_exclusive_scan( //std::execution:seq,
+                                       &x[ibegin], &x[iend],
+                                       &counts[0], 0,
+                                       std::plus<Index_type>{},
+                                       [=](Real_type x){ return (x < 0.0); });
+
+        std::for_each_n( //std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          if (counts[i] != counts[i+1]) { \
+            list[counts[i]] = i;
+          }
+        });
+
+        m_len = counts[iend+1];
+#endif
+
+        if (irep == 0) {
+            //printf("\n\n%d\n",counts[iend]);
+            //for (Index_type i = ibegin, j=0; i < iend && j<count ; ++i, ++j ) {
+            for (Index_type i = ibegin; i < iend ; ++i) {
+                printf("%6d: x=%9.6f counts=%6d list=%6d\n",i,x[i],counts[i],list[i]);
+            }
+            printf("\n\n");
+
+        }
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto indexlist_base_lam = [=](Index_type i, Index_type& count) {
+                                 INDEXLIST_BODY
+                               };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        Index_type count = 0;
+
+#warning needs parallel inscan
+        for (Index_type i = ibegin; i < iend; ++i ) {
+          indexlist_base_lam(i, count);
+        }
+
+        m_len = count;
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  INDEXLIST : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace basic
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/basic/INDEXLIST.cpp b/src/basic/INDEXLIST.cpp
index f5614a7f2..8c4124d25 100644
--- a/src/basic/INDEXLIST.cpp
+++ b/src/basic/INDEXLIST.cpp
@@ -49,6 +49,8 @@ INDEXLIST::INDEXLIST(const RunParams& params)
   setVariantDefined( Base_CUDA );
 
   setVariantDefined( Base_HIP );
+
+  //setVariantDefined( Base_StdPar );
 }
 
 INDEXLIST::~INDEXLIST()
diff --git a/src/basic/INDEXLIST.hpp b/src/basic/INDEXLIST.hpp
index 0836d8197..0bd51f947 100644
--- a/src/basic/INDEXLIST.hpp
+++ b/src/basic/INDEXLIST.hpp
@@ -60,6 +60,7 @@ class INDEXLIST : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/basic/INDEXLIST_3LOOP-StdPar.cpp b/src/basic/INDEXLIST_3LOOP-StdPar.cpp
new file mode 100644
index 000000000..770632cd2
--- /dev/null
+++ b/src/basic/INDEXLIST_3LOOP-StdPar.cpp
@@ -0,0 +1,148 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "INDEXLIST_3LOOP.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf
+{
+namespace basic
+{
+
+#define INDEXLIST_3LOOP_DATA_SETUP_StdPar \
+  Index_type* counts = new Index_type[iend+1];
+
+#define INDEXLIST_3LOOP_DATA_TEARDOWN_StdPar \
+  delete[] counts; counts = nullptr;
+
+
+
+void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  INDEXLIST_3LOOP_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      INDEXLIST_3LOOP_DATA_SETUP_StdPar;
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          counts[i] = (INDEXLIST_3LOOP_CONDITIONAL) ? 1 : 0;
+        });
+
+        // The validation does not notice if the exscan
+        // is removed, or otherwise forced to be wrong.
+        // Using brute-force validation (see below):
+        // Intel and GCC output 0s when any execution policy is used.
+        // NVHPC (GPU) is fine.
+        std::exclusive_scan(
+#ifdef __NVCOMPILER
+                             std::execution::par_unseq,
+#endif
+                             counts+ibegin, counts+iend+1,
+                             counts+ibegin, 0);
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          INDEXLIST_3LOOP_MAKE_LIST;
+        });
+
+        m_len = counts[iend];
+
+#if BRUTE_FORCE_VALIDATION
+        for (Index_type i = ibegin; i < iend+1; ++i ) {
+          std::cout << "C: " << i << "," << counts[i] << "\n";
+        }
+#endif
+      }
+      stopTimer();
+
+      INDEXLIST_3LOOP_DATA_TEARDOWN_StdPar;
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      INDEXLIST_3LOOP_DATA_SETUP_StdPar;
+
+      auto indexlist_conditional_lam = [=](Index_type i) {
+                                  counts[i] = (INDEXLIST_3LOOP_CONDITIONAL) ? 1 : 0;
+                                };
+
+      auto indexlist_make_list_lam = [=](Index_type i) {
+                                  INDEXLIST_3LOOP_MAKE_LIST;
+                                };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          indexlist_conditional_lam(i);
+        });
+
+        // See comments above...
+        std::exclusive_scan(
+#ifdef __NVCOMPILER
+                             std::execution::par_unseq,
+#endif
+                             counts+ibegin, counts+iend+1,
+                             counts+ibegin, 0);
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          indexlist_make_list_lam(i);
+        });
+
+        m_len = counts[iend];
+
+      }
+      stopTimer();
+
+      INDEXLIST_3LOOP_DATA_TEARDOWN_StdPar;
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  INDEXLIST_3LOOP : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace basic
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/basic/INDEXLIST_3LOOP.cpp b/src/basic/INDEXLIST_3LOOP.cpp
index 3ddb3fc0c..311477a53 100644
--- a/src/basic/INDEXLIST_3LOOP.cpp
+++ b/src/basic/INDEXLIST_3LOOP.cpp
@@ -58,6 +58,9 @@ INDEXLIST_3LOOP::INDEXLIST_3LOOP(const RunParams& params)
 
   setVariantDefined( Base_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 INDEXLIST_3LOOP::~INDEXLIST_3LOOP()
diff --git a/src/basic/INDEXLIST_3LOOP.hpp b/src/basic/INDEXLIST_3LOOP.hpp
index e19ee5508..408c6483c 100644
--- a/src/basic/INDEXLIST_3LOOP.hpp
+++ b/src/basic/INDEXLIST_3LOOP.hpp
@@ -71,6 +71,7 @@ class INDEXLIST_3LOOP : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/basic/INIT3-StdPar.cpp b/src/basic/INIT3-StdPar.cpp
new file mode 100644
index 000000000..1817a1ee1
--- /dev/null
+++ b/src/basic/INIT3-StdPar.cpp
@@ -0,0 +1,87 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "INIT3.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf
+{
+namespace basic
+{
+
+
+void INIT3::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  INIT3_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          INIT3_BODY;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto init3_lam = [=](Index_type i) {
+                         INIT3_BODY;
+                       };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          init3_lam(i);
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  INIT3 : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace basic
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/basic/INIT3.cpp b/src/basic/INIT3.cpp
index b2be64f84..a0fdb4763 100644
--- a/src/basic/INIT3.cpp
+++ b/src/basic/INIT3.cpp
@@ -52,6 +52,9 @@ INIT3::INIT3(const RunParams& params)
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
 
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
+
   setVariantDefined( Kokkos_Lambda );
 }
 
diff --git a/src/basic/INIT3.hpp b/src/basic/INIT3.hpp
index aed67bfeb..d46abd1b9 100644
--- a/src/basic/INIT3.hpp
+++ b/src/basic/INIT3.hpp
@@ -55,6 +55,7 @@ class INIT3 : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
diff --git a/src/basic/INIT_VIEW1D-StdPar.cpp b/src/basic/INIT_VIEW1D-StdPar.cpp
new file mode 100644
index 000000000..1bbfce7f0
--- /dev/null
+++ b/src/basic/INIT_VIEW1D-StdPar.cpp
@@ -0,0 +1,87 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "INIT_VIEW1D.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf
+{
+namespace basic
+{
+
+
+void INIT_VIEW1D::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  INIT_VIEW1D_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          INIT_VIEW1D_BODY;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto initview1d_base_lam = [=](Index_type i) {
+                                   INIT_VIEW1D_BODY;
+                                 };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          initview1d_base_lam(i);
+        });
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  INIT_VIEW1D : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace basic
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/basic/INIT_VIEW1D.cpp b/src/basic/INIT_VIEW1D.cpp
index de34c5a28..9a470d387 100644
--- a/src/basic/INIT_VIEW1D.cpp
+++ b/src/basic/INIT_VIEW1D.cpp
@@ -53,6 +53,9 @@ INIT_VIEW1D::INIT_VIEW1D(const RunParams& params)
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
 
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
+
   setVariantDefined( Kokkos_Lambda );
 }
 
diff --git a/src/basic/INIT_VIEW1D.hpp b/src/basic/INIT_VIEW1D.hpp
index f3770f69a..84d2f89a8 100644
--- a/src/basic/INIT_VIEW1D.hpp
+++ b/src/basic/INIT_VIEW1D.hpp
@@ -66,6 +66,7 @@ class INIT_VIEW1D : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
diff --git a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp
new file mode 100644
index 000000000..e841874a3
--- /dev/null
+++ b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp
@@ -0,0 +1,87 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "INIT_VIEW1D_OFFSET.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace basic
+{
+
+
+void INIT_VIEW1D_OFFSET::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 1;
+  const Index_type iend = getActualProblemSize()+1;
+
+  INIT_VIEW1D_OFFSET_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          INIT_VIEW1D_OFFSET_BODY;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto initview1doffset_base_lam = [=](Index_type i) {
+                                         INIT_VIEW1D_OFFSET_BODY;
+                                       };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          initview1doffset_base_lam(i);
+        });
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  INIT_VIEW1D_OFFSET : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace basic
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/basic/INIT_VIEW1D_OFFSET.cpp b/src/basic/INIT_VIEW1D_OFFSET.cpp
index fe1867698..299a4ea27 100644
--- a/src/basic/INIT_VIEW1D_OFFSET.cpp
+++ b/src/basic/INIT_VIEW1D_OFFSET.cpp
@@ -53,6 +53,9 @@ INIT_VIEW1D_OFFSET::INIT_VIEW1D_OFFSET(const RunParams& params)
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
 
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
+
   setVariantDefined( Kokkos_Lambda );
 }
 
diff --git a/src/basic/INIT_VIEW1D_OFFSET.hpp b/src/basic/INIT_VIEW1D_OFFSET.hpp
index d32f59c7b..74c5e82a3 100644
--- a/src/basic/INIT_VIEW1D_OFFSET.hpp
+++ b/src/basic/INIT_VIEW1D_OFFSET.hpp
@@ -65,6 +65,7 @@ class INIT_VIEW1D_OFFSET : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
diff --git a/src/basic/MAT_MAT_SHARED-StdPar.cpp b/src/basic/MAT_MAT_SHARED-StdPar.cpp
new file mode 100644
index 000000000..b7814c1ae
--- /dev/null
+++ b/src/basic/MAT_MAT_SHARED-StdPar.cpp
@@ -0,0 +1,36 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/LICENSE file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "MAT_MAT_SHARED.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+
+#include <iostream>
+
+namespace rajaperf {
+namespace basic {
+
+void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
+{
+
+  switch (vid) {
+  default: {
+    getCout() << "\n  MAT_MAT_SHARED : Unknown variant id = " << vid
+              << std::endl;
+  }
+  }
+}
+
+} // end namespace basic
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/basic/MAT_MAT_SHARED.hpp b/src/basic/MAT_MAT_SHARED.hpp
index 095721c27..c18682960 100644
--- a/src/basic/MAT_MAT_SHARED.hpp
+++ b/src/basic/MAT_MAT_SHARED.hpp
@@ -139,6 +139,7 @@ class MAT_MAT_SHARED : public KernelBase {
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/basic/MULADDSUB-StdPar.cpp b/src/basic/MULADDSUB-StdPar.cpp
new file mode 100644
index 000000000..7b71fb648
--- /dev/null
+++ b/src/basic/MULADDSUB-StdPar.cpp
@@ -0,0 +1,87 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "MULADDSUB.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace basic
+{
+
+
+void MULADDSUB::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  MULADDSUB_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          MULADDSUB_BODY;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+  auto mas_lam = [=](Index_type i) {
+                   MULADDSUB_BODY;
+                 };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          mas_lam(i);
+        });
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  MULADDSUB : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace basic
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/basic/MULADDSUB.cpp b/src/basic/MULADDSUB.cpp
index 4ae8d6868..714cad544 100644
--- a/src/basic/MULADDSUB.cpp
+++ b/src/basic/MULADDSUB.cpp
@@ -52,6 +52,9 @@ MULADDSUB::MULADDSUB(const RunParams& params)
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
 
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
+
   setVariantDefined( Kokkos_Lambda );
 }
 
diff --git a/src/basic/MULADDSUB.hpp b/src/basic/MULADDSUB.hpp
index e604a34c8..97b800569 100644
--- a/src/basic/MULADDSUB.hpp
+++ b/src/basic/MULADDSUB.hpp
@@ -58,6 +58,7 @@ class MULADDSUB : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
diff --git a/src/basic/NESTED_INIT-Seq.cpp b/src/basic/NESTED_INIT-Seq.cpp
index 0a9c81ff6..917c40315 100644
--- a/src/basic/NESTED_INIT-Seq.cpp
+++ b/src/basic/NESTED_INIT-Seq.cpp
@@ -39,6 +39,7 @@ void NESTED_INIT::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_i
           for (Index_type j = 0; j < nj; ++j ) {
             for (Index_type i = 0; i < ni; ++i ) {
               NESTED_INIT_BODY;
+              //std::cout << i << "," << j << "," << k << ";" << k*nj*ni+j*ni+i << " SEQ\n";
             }
           }
         }
diff --git a/src/basic/NESTED_INIT-StdPar.cpp b/src/basic/NESTED_INIT-StdPar.cpp
new file mode 100644
index 000000000..575c6e9e3
--- /dev/null
+++ b/src/basic/NESTED_INIT-StdPar.cpp
@@ -0,0 +1,115 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "NESTED_INIT.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace basic
+{
+
+
+void NESTED_INIT::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+
+  NESTED_INIT_DATA_SETUP;
+
+  auto nestedinit_lam = [=](Index_type i, Index_type j, Index_type k) {
+                          NESTED_INIT_BODY;
+                        };
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni*nj*nk,
+                         [=](Index_type ijk) {
+              const auto k  = ijk / (nj*ni);
+              const auto ij = ijk % (nj*ni);
+              const auto j  = ij / ni;
+              const auto i  = ij % ni;
+#else
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), nk,
+                         [=](Index_type k) {
+            for (Index_type j = 0; j < nj; ++j )
+              for (Index_type i = 0; i < ni; ++i )
+#endif
+              {
+                NESTED_INIT_BODY;
+                //getCout() << i << "," << j << "," << k << ";" << ijk << " PAR\n";
+              }
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni*nj*nk,
+                         [=](Index_type ijk) {
+              const auto k  = ijk / (nj*ni);
+              const auto ij = ijk % (nj*ni);
+              const auto j  = ij / ni;
+              const auto i  = ij % ni;
+#else
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), nk,
+                         [=](Index_type k) {
+            for (Index_type j = 0; j < nj; ++j )
+              for (Index_type i = 0; i < ni; ++i )
+#endif
+              {
+                nestedinit_lam(i, j, k);
+              }
+          });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  NESTED_INIT : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace basic
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/basic/NESTED_INIT.cpp b/src/basic/NESTED_INIT.cpp
index 4b9183245..1bec8a9fd 100644
--- a/src/basic/NESTED_INIT.cpp
+++ b/src/basic/NESTED_INIT.cpp
@@ -63,6 +63,9 @@ NESTED_INIT::NESTED_INIT(const RunParams& params)
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
 
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
+
   setVariantDefined( Kokkos_Lambda );
 }
 
diff --git a/src/basic/NESTED_INIT.hpp b/src/basic/NESTED_INIT.hpp
index ccaf7079e..a4f2c00c0 100644
--- a/src/basic/NESTED_INIT.hpp
+++ b/src/basic/NESTED_INIT.hpp
@@ -58,6 +58,7 @@ class NESTED_INIT : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
diff --git a/src/basic/PI_ATOMIC-StdPar.cpp b/src/basic/PI_ATOMIC-StdPar.cpp
new file mode 100644
index 000000000..f33bc369b
--- /dev/null
+++ b/src/basic/PI_ATOMIC-StdPar.cpp
@@ -0,0 +1,112 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "PI_ATOMIC.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#ifndef _OPENMP
+#error Currently, OpenMP atomics are required here.
+#endif
+
+#if defined(__NVCOMPILER_CUDA__) || defined(_NVHPC_STDPAR_CUDA)
+#include <cuda/atomic>
+typedef cuda::std::atomic<double> myAtomic;
+#else
+// .fetch_add() for double is not available yet...
+#include <atomic>
+typedef std::atomic<double> myAtomic;
+#endif
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace basic
+{
+
+
+void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  PI_ATOMIC_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        //myAtomic a_pi{m_pi_init};
+        *pi = m_pi_init;
+        std::for_each_n( std::execution::par,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          double x = (double(i) + 0.5) * dx;
+          _Pragma("omp atomic")
+          *pi += dx / (1.0 + x * x);
+          //a_pi.fetch_add(dx / (1.0 + x * x));
+        });
+        //*pi = a_pi * 4.0;
+        *pi *= 4.0;
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto piatomic_base_lam = [=](Index_type i) {
+                                 double x = (double(i) + 0.5) * dx;
+                                 _Pragma("omp atomic")
+                                 *pi += dx / (1.0 + x * x);
+                               };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        *pi = m_pi_init;
+        std::for_each_n( std::execution::par,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                       [=](Index_type i) {
+          piatomic_base_lam(i);
+        });
+        *pi *= 4.0;
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  PI_ATOMIC : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace basic
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/basic/PI_ATOMIC.cpp b/src/basic/PI_ATOMIC.cpp
index e1f93dd2f..bcdf59440 100644
--- a/src/basic/PI_ATOMIC.cpp
+++ b/src/basic/PI_ATOMIC.cpp
@@ -54,6 +54,9 @@ PI_ATOMIC::PI_ATOMIC(const RunParams& params)
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
 
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
+
   setVariantDefined( Kokkos_Lambda );
 }
 
diff --git a/src/basic/PI_ATOMIC.hpp b/src/basic/PI_ATOMIC.hpp
index 803d6202f..20cf38a68 100644
--- a/src/basic/PI_ATOMIC.hpp
+++ b/src/basic/PI_ATOMIC.hpp
@@ -54,6 +54,7 @@ class PI_ATOMIC : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
diff --git a/src/basic/PI_REDUCE-StdPar.cpp b/src/basic/PI_REDUCE-StdPar.cpp
new file mode 100644
index 000000000..cd466a225
--- /dev/null
+++ b/src/basic/PI_REDUCE-StdPar.cpp
@@ -0,0 +1,98 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "PI_REDUCE.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace basic
+{
+
+
+void PI_REDUCE::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  auto begin = counting_iterator<Index_type>(ibegin);
+  auto end   = counting_iterator<Index_type>(iend);
+
+  PI_REDUCE_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        Real_type pi = m_pi_init;
+        pi += std::transform_reduce( std::execution::par_unseq,
+                                     begin, end,
+                                     Real_type(0), std::plus<Real_type>(),
+                        [=](Index_type i) {
+          Real_type x = (Real_type(i) + 0.5) * dx;
+          return dx / (1.0 + x * x);
+        });
+        m_pi = 4.0 * pi;
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto pireduce_base_lam = [=](Index_type i) -> Real_type {
+                                 Real_type x = (Real_type(i) + 0.5) * dx;
+                                 return dx / (1.0 + x * x);
+                               };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        Real_type pi = m_pi_init;
+
+        pi += std::transform_reduce( std::execution::par_unseq,
+                                     begin, end,
+                                     Real_type(0), std::plus<Real_type>(), pireduce_base_lam);
+
+        m_pi = 4.0 * pi;
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  PI_REDUCE : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace basic
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/basic/PI_REDUCE.cpp b/src/basic/PI_REDUCE.cpp
index 84c38ce67..482fc5811 100644
--- a/src/basic/PI_REDUCE.cpp
+++ b/src/basic/PI_REDUCE.cpp
@@ -51,6 +51,9 @@ PI_REDUCE::PI_REDUCE(const RunParams& params)
 
   setVariantDefined( Base_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 PI_REDUCE::~PI_REDUCE()
diff --git a/src/basic/PI_REDUCE.hpp b/src/basic/PI_REDUCE.hpp
index 49fca096d..2c2548909 100644
--- a/src/basic/PI_REDUCE.hpp
+++ b/src/basic/PI_REDUCE.hpp
@@ -56,6 +56,7 @@ class PI_REDUCE : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/basic/REDUCE3_INT-StdPar.cpp b/src/basic/REDUCE3_INT-StdPar.cpp
new file mode 100644
index 000000000..ac4abcf0d
--- /dev/null
+++ b/src/basic/REDUCE3_INT-StdPar.cpp
@@ -0,0 +1,117 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/LICENSE file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "REDUCE3_INT.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include <array>
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace basic
+{
+
+
+void REDUCE3_INT::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  auto begin = counting_iterator<Index_type>(ibegin);
+  auto end   = counting_iterator<Index_type>(iend);
+
+  REDUCE3_INT_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        typedef std::array<Int_type,3> Reduce_type;
+        Reduce_type result =
+        std::transform_reduce( std::execution::par_unseq,
+                               begin, end,
+                               Reduce_type{m_vsum_init,m_vmin_init,m_vmax_init},
+                        [=](Reduce_type a, Reduce_type b) -> Reduce_type {
+                             auto plus = a[0] + b[0];
+                             auto min  = std::min(a[1],b[1]);
+                             auto max  = std::max(a[2],b[2]);
+                             Reduce_type red{ plus, min, max };
+                             return red; 
+                        },
+                        [=](Index_type i) -> std::array<Int_type,3>{
+                             Reduce_type val{ vec[i], vec[i], vec[i] };
+                             return val; 
+
+                        }
+        );
+
+        m_vsum += result[0];
+        m_vmin = std::min(m_vmin, result[1]);
+        m_vmax = std::max(m_vmax, result[2]);
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto init3_base_lam = [=](Index_type i) -> Int_type {
+                              return vec[i];
+                            };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        Int_type vsum = m_vsum_init;
+        Int_type vmin = m_vmin_init;
+        Int_type vmax = m_vmax_init;
+
+        for (Index_type i = ibegin; i < iend; ++i ) {
+          vsum += init3_base_lam(i);
+          vmin = std::min(vmin, init3_base_lam(i));
+          vmax = std::max(vmax, init3_base_lam(i));
+        }
+
+        m_vsum += vsum;
+        m_vmin = std::min(m_vmin, vmin);
+        m_vmax = std::max(m_vmax, vmax);
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  REDUCE3_INT : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace basic
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/basic/REDUCE3_INT.cpp b/src/basic/REDUCE3_INT.cpp
index 975bf8f24..0fc262ea9 100644
--- a/src/basic/REDUCE3_INT.cpp
+++ b/src/basic/REDUCE3_INT.cpp
@@ -57,6 +57,9 @@ REDUCE3_INT::REDUCE3_INT(const RunParams& params)
   setVariantDefined( Base_HIP );
   setVariantDefined( RAJA_HIP );
 
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
+
   setVariantDefined( Kokkos_Lambda );
 }
 
diff --git a/src/basic/REDUCE3_INT.hpp b/src/basic/REDUCE3_INT.hpp
index e82c2cf05..a89435750 100644
--- a/src/basic/REDUCE3_INT.hpp
+++ b/src/basic/REDUCE3_INT.hpp
@@ -70,6 +70,7 @@ class REDUCE3_INT : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
diff --git a/src/basic/REDUCE_STRUCT-StdPar.cpp b/src/basic/REDUCE_STRUCT-StdPar.cpp
new file mode 100644
index 000000000..e82cc98ee
--- /dev/null
+++ b/src/basic/REDUCE_STRUCT-StdPar.cpp
@@ -0,0 +1,107 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/LICENSE file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "REDUCE_STRUCT.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include <limits>
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace basic
+{
+
+
+void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  auto begin = counting_iterator<Index_type>(ibegin);
+  auto end   = counting_iterator<Index_type>(iend);
+
+  REDUCE_STRUCT_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+ 
+        Real_type xsum = m_init_sum; Real_type ysum = m_init_sum;
+        Real_type xmin = m_init_min; Real_type ymin = m_init_min;
+        Real_type xmax = m_init_max; Real_type ymax = m_init_max;
+
+        using Reduce_type =  std::array<Real_type,6>;
+        Reduce_type result =
+        std::transform_reduce( std::execution::par_unseq,
+                               begin, end,
+                               Reduce_type{ m_init_sum, m_init_min, m_init_max,   // x
+                                            m_init_sum, m_init_min, m_init_max }, // y
+                        [=](Reduce_type a, Reduce_type b) -> Reduce_type {
+                             auto xsum = a[0] + b[0];
+                             auto xmin  = std::min(a[1],b[1]);
+                             auto xmax  = std::max(a[2],b[2]);
+                             auto ysum = a[3] + b[3];
+                             auto ymin  = std::min(a[4],b[4]);
+                             auto ymax  = std::max(a[5],b[5]);
+                             Reduce_type red{ xsum, xmin, xmax, ysum, ymin, ymax };
+                             return red;
+                        },
+                        [=](Index_type i) -> Reduce_type {
+                             Reduce_type val{ points.x[i], points.x[i], points.x[i],
+                                              points.y[i], points.y[i], points.y[i] };
+                             return val;
+
+                        }
+        );
+
+        xsum = result[0];
+        xmin = result[1];
+        xmax = result[2];
+        ysum = result[3];
+        ymin = result[4];
+        ymax = result[5];
+
+        points.SetCenter(xsum/(points.N), ysum/(points.N));
+        points.SetXMin(xmin); 
+        points.SetXMax(xmax);
+        points.SetYMin(ymin);
+        points.SetYMax(ymax);
+        m_points=points;
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  REDUCE_STRUCT : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace basic
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/basic/REDUCE_STRUCT.cpp b/src/basic/REDUCE_STRUCT.cpp
index 5edaadede..eb78bb21b 100644
--- a/src/basic/REDUCE_STRUCT.cpp
+++ b/src/basic/REDUCE_STRUCT.cpp
@@ -56,6 +56,9 @@ REDUCE_STRUCT::REDUCE_STRUCT(const RunParams& params)
 
   setVariantDefined( Base_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  //setVariantDefined( Lambda_StdPar );
 }
 
 REDUCE_STRUCT::~REDUCE_STRUCT()
diff --git a/src/basic/REDUCE_STRUCT.hpp b/src/basic/REDUCE_STRUCT.hpp
index 425e7796e..440964704 100644
--- a/src/basic/REDUCE_STRUCT.hpp
+++ b/src/basic/REDUCE_STRUCT.hpp
@@ -86,6 +86,7 @@ class REDUCE_STRUCT : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/basic/TRAP_INT-StdPar.cpp b/src/basic/TRAP_INT-StdPar.cpp
new file mode 100644
index 000000000..c2f22206b
--- /dev/null
+++ b/src/basic/TRAP_INT-StdPar.cpp
@@ -0,0 +1,112 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "TRAP_INT.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace basic
+{
+
+//
+// Function used in TRAP_INT loop.
+//
+RAJA_INLINE
+Real_type trap_int_func(Real_type x,
+                        Real_type y,
+                        Real_type xp,
+                        Real_type yp)
+{
+   Real_type denom = (x - xp)*(x - xp) + (y - yp)*(y - yp);
+   denom = 1.0/sqrt(denom);
+   return denom;
+}
+
+void TRAP_INT::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  auto begin = counting_iterator<Index_type>(ibegin);
+  auto end   = counting_iterator<Index_type>(iend);
+
+  TRAP_INT_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        Real_type sumx = m_sumx_init;
+
+        sumx += std::transform_reduce( std::execution::par_unseq,
+                                      begin, end,
+                                      Real_type(0), std::plus<Real_type>(),
+                        [=](Index_type i) {
+          Real_type x = x0 + i*h;
+          return trap_int_func(x, y, xp, yp);
+        });
+        m_sumx += sumx * h;
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto trapint_base_lam = [=](Index_type i) -> Real_type {
+                                Real_type x = x0 + i*h;
+                                return trap_int_func(x, y, xp, yp);
+                              };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        Real_type sumx = m_sumx_init;
+
+        sumx += std::transform_reduce( std::execution::par_unseq,
+                                      begin, end,
+                                      Real_type(0), std::plus<Real_type>(), trapint_base_lam);
+
+        m_sumx += sumx * h;
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  TRAP_INT : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace basic
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/basic/TRAP_INT.cpp b/src/basic/TRAP_INT.cpp
index eaac3ffda..6b4dc8a3c 100644
--- a/src/basic/TRAP_INT.cpp
+++ b/src/basic/TRAP_INT.cpp
@@ -52,6 +52,9 @@ TRAP_INT::TRAP_INT(const RunParams& params)
   setVariantDefined( Base_HIP );
   setVariantDefined( RAJA_HIP );
 
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
+
   setVariantDefined( Kokkos_Lambda );
 }
 
diff --git a/src/basic/TRAP_INT.hpp b/src/basic/TRAP_INT.hpp
index e64932dbe..53650e761 100644
--- a/src/basic/TRAP_INT.hpp
+++ b/src/basic/TRAP_INT.hpp
@@ -67,6 +67,7 @@ class TRAP_INT : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
diff --git a/src/common/Executor.cpp b/src/common/Executor.cpp
index a41e3fd00..40d6ecd9d 100644
--- a/src/common/Executor.cpp
+++ b/src/common/Executor.cpp
@@ -1071,10 +1071,10 @@ void Executor::writeCSVReport(ostream& file, CSVRepMode mode,
           if ( (mode == CSVRepMode::Speedup) &&
                (!kern->hasVariantTuningDefined(reference_vid, reference_tune_idx) ||
                 !kern->hasVariantTuningDefined(vid, tuning_name)) ) {
-            file << "Not run";
+            file << "NotRun";
           } else if ( (mode == CSVRepMode::Timing) &&
                       !kern->hasVariantTuningDefined(vid, tuning_name) ) {
-            file << "Not run";
+            file << "NotRun";
           } else {
             file << setprecision(prec) << std::fixed
                  << getReportDataEntry(mode, combiner, kern, vid,
diff --git a/src/common/KernelBase.cpp b/src/common/KernelBase.cpp
index c620c4880..7a86651b1 100644
--- a/src/common/KernelBase.cpp
+++ b/src/common/KernelBase.cpp
@@ -129,6 +129,14 @@ void KernelBase::setVariantDefined(VariantID vid)
 #endif
       break;
     }
+
+    case Base_StdPar :
+    case Lambda_StdPar :
+    {
+      setStdParTuningDefinitions(vid);
+      break;
+    }
+
 // Required for running Kokkos
     case Kokkos_Lambda :
     {
@@ -246,6 +254,14 @@ void KernelBase::runKernel(VariantID vid, size_t tune_idx)
 #endif
       break;
     }
+
+    case Base_StdPar :
+    case Lambda_StdPar :
+    {
+      runStdParVariant(vid, tune_idx);
+      break;
+    }
+
     case Kokkos_Lambda :
     {
 #if defined(RUN_KOKKOS)
diff --git a/src/common/KernelBase.hpp b/src/common/KernelBase.hpp
index cec79e2eb..eb00f8672 100644
--- a/src/common/KernelBase.hpp
+++ b/src/common/KernelBase.hpp
@@ -101,6 +101,9 @@ class KernelBase
   { addVariantTuningName(vid, getDefaultTuningName()); }
 #endif
 
+  virtual void setStdParTuningDefinitions(VariantID vid)
+  { addVariantTuningName(vid, getDefaultTuningName()); }
+
   //
   // Getter methods used to generate kernel execution summary
   // and kernel details report ouput.
@@ -333,6 +336,7 @@ class KernelBase
 #if defined(RAJA_ENABLE_TARGET_OPENMP)
   virtual void runOpenMPTargetVariant(VariantID vid, size_t tune_idx) = 0;
 #endif
+  virtual void runStdParVariant(VariantID vid, size_t tune_idx) = 0;
 
 #if defined(RUN_KOKKOS)
   virtual void runKokkosVariant(VariantID vid, size_t tune_idx)
diff --git a/src/common/RAJAPerfSuite.cpp b/src/common/RAJAPerfSuite.cpp
index da6683907..5516359ed 100644
--- a/src/common/RAJAPerfSuite.cpp
+++ b/src/common/RAJAPerfSuite.cpp
@@ -278,6 +278,9 @@ static const std::string VariantNames [] =
   std::string("Lambda_HIP"),
   std::string("RAJA_HIP"),
 
+  std::string("Base_StdPar"),
+  std::string("Lambda_StdPar"),
+
   std::string("Kokkos_Lambda"),
 
   std::string("Unknown Variant")  // Keep this at the end and DO NOT remove....
@@ -424,6 +427,11 @@ bool isVariantAvailable(VariantID vid)
   }
 #endif
 
+  if ( vid == Base_StdPar ||
+       vid == Lambda_StdPar) {
+    ret_val = true;
+  }
+
 #if defined(RUN_KOKKOS)
   if ( vid == Kokkos_Lambda ) {
     ret_val = true;
@@ -485,6 +493,11 @@ bool isVariantGPU(VariantID vid)
   }
 #endif
 
+  if ( vid == Base_StdPar ||
+       vid == Lambda_StdPar) {
+    ret_val = true;
+  }
+
 #if defined(RUN_KOKKOS)
   if ( vid == Kokkos_Lambda ) {
     ret_val = true;
diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp
index 1f4da8c9b..cb7c914dd 100644
--- a/src/common/RAJAPerfSuite.hpp
+++ b/src/common/RAJAPerfSuite.hpp
@@ -194,6 +194,9 @@ enum VariantID {
   Lambda_HIP,
   RAJA_HIP,
 
+  Base_StdPar,
+  Lambda_StdPar,
+
   Kokkos_Lambda,
 
   NumVariants // Keep this one last and NEVER comment out (!!)
diff --git a/src/common/StdParUtils.hpp b/src/common/StdParUtils.hpp
new file mode 100644
index 000000000..82ae2fa89
--- /dev/null
+++ b/src/common/StdParUtils.hpp
@@ -0,0 +1,133 @@
+/*
+Copyright (c) 2021, NVIDIA
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if defined(__NVCOMPILER_CUDA__) || defined(_NVHPC_STDPAR_CUDA)
+#define USE_STDPAR_COLLAPSE
+#define NVCXX_GPU_ENABLED
+#endif
+
+#if ( defined(__NVCOMPILER_CUDA__) || defined(_NVHPC_STDPAR_CUDA))
+#define NVHPC_CALC_VERSION(MAJOR, MINOR, PATCH) (((MAJOR) * 10000) + ((MINOR) * 100) +  (PATCH))
+#if NVHPC_CALC_VERSION(__NVCOMPILER_MAJOR__,__NVCOMPILER_MINOR__,__NVCOMPILER_PATCHLEVEL__) < 220900
+static inline void std::__throw_bad_array_new_length() { std::abort(); }
+#endif
+#endif
+
+#if 0 //defined(_NVHPC_STDPAR_MULTICORE)
+#warning COLLAPSE (TESTING ONLY - DISABLE IN PRODUCTION)
+#define USE_STDPAR_COLLAPSE
+#endif
+
+#if defined(NVCXX_GPU_ENABLED)
+// this is required to get NVC++ to compile CUDA atomics in StdPar
+#include <openacc.h>
+#endif
+
+// This implementation was authored by David Olsen
+
+#include <algorithm>
+#include <execution>
+#include <numeric>
+#include <type_traits>
+
+template <class T>
+struct counting_iterator {
+
+private:
+  typedef counting_iterator<T> self;
+
+public:
+  typedef T value_type;
+  typedef typename std::make_signed<T>::type difference_type;
+  typedef T const* pointer;
+  typedef T const& reference;
+  typedef std::random_access_iterator_tag iterator_category;
+
+  explicit counting_iterator(value_type v) : value(v) { }
+
+  value_type operator*() const { return value; }
+  value_type operator[](difference_type n) const { return value + n; }
+
+  self& operator++() { ++value; return *this; }
+  self operator++(int) {
+    self result{value};
+    ++value;
+    return result;
+  }
+  self& operator--() { --value; return *this; }
+  self operator--(int) {
+    self result{value};
+    --value;
+    return result;
+  }
+  self& operator+=(difference_type n) { value += n; return *this; }
+  self& operator-=(difference_type n) { value -= n; return *this; }
+
+  friend self operator+(self const& i, difference_type n) {
+    return self(i.value + n);
+  }
+  friend self operator+(difference_type n, self const& i) {
+    return self(i.value + n);
+  }
+  friend difference_type operator-(self const& x, self const& y) {
+    return x.value - y.value;
+  }
+  friend self operator-(self const& i, difference_type n) {
+    return self(i.value - n);
+  }
+
+  friend bool operator==(self const& x, self const& y) {
+    return x.value == y.value;
+  }
+  friend bool operator!=(self const& x, self const& y) {
+    return x.value != y.value;
+  }
+  friend bool operator<(self const& x, self const& y) {
+    return x.value < y.value;
+  }
+  friend bool operator<=(self const& x, self const& y) {
+    return x.value <= y.value;
+  }
+  friend bool operator>(self const& x, self const& y) {
+    return x.value > y.value;
+  }
+  friend bool operator>=(self const& x, self const& y) {
+    return x.value >= y.value;
+  }
+private:
+  value_type value;
+};
+
+template <class T,
+          class = typename std::enable_if<std::is_integral<T>::value>::type>
+inline counting_iterator<T> make_counter(T value) {
+  return counting_iterator<T>{value};
+}
+
diff --git a/src/lcals/CMakeLists.txt b/src/lcals/CMakeLists.txt
index f767bbd0b..06bf8d7cb 100644
--- a/src/lcals/CMakeLists.txt
+++ b/src/lcals/CMakeLists.txt
@@ -74,5 +74,16 @@ blt_add_library(
           TRIDIAG_ELIM-Cuda.cpp
           TRIDIAG_ELIM-OMP.cpp
           TRIDIAG_ELIM-OMPTarget.cpp
+          DIFF_PREDICT-StdPar.cpp
+          EOS-StdPar.cpp
+          FIRST_DIFF-StdPar.cpp
+          FIRST_MIN-StdPar.cpp
+          FIRST_SUM-StdPar.cpp
+          GEN_LIN_RECUR-StdPar.cpp
+          HYDRO_1D-StdPar.cpp
+          HYDRO_2D-StdPar.cpp
+          INT_PREDICT-StdPar.cpp
+          PLANCKIAN-StdPar.cpp
+          TRIDIAG_ELIM-StdPar.cpp
   DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS}
   )
diff --git a/src/lcals/DIFF_PREDICT-StdPar.cpp b/src/lcals/DIFF_PREDICT-StdPar.cpp
new file mode 100644
index 000000000..873703bcf
--- /dev/null
+++ b/src/lcals/DIFF_PREDICT-StdPar.cpp
@@ -0,0 +1,88 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "DIFF_PREDICT.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace lcals
+{
+
+
+void DIFF_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  DIFF_PREDICT_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend,
+                         [=](Index_type i) {
+          DIFF_PREDICT_BODY;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto diffpredict_lam = [=](Index_type i) {
+                               DIFF_PREDICT_BODY;
+                             };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend,
+                         [=](Index_type i) {
+          diffpredict_lam(i);
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  DIFF_PREDICT : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace lcals
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/lcals/DIFF_PREDICT.cpp b/src/lcals/DIFF_PREDICT.cpp
index 57bb9fb39..676d5f447 100644
--- a/src/lcals/DIFF_PREDICT.cpp
+++ b/src/lcals/DIFF_PREDICT.cpp
@@ -51,6 +51,9 @@ DIFF_PREDICT::DIFF_PREDICT(const RunParams& params)
   setVariantDefined( RAJA_HIP );
 
   setVariantDefined( Kokkos_Lambda );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 DIFF_PREDICT::~DIFF_PREDICT()
diff --git a/src/lcals/DIFF_PREDICT.hpp b/src/lcals/DIFF_PREDICT.hpp
index 3a583381b..7d4868204 100644
--- a/src/lcals/DIFF_PREDICT.hpp
+++ b/src/lcals/DIFF_PREDICT.hpp
@@ -94,6 +94,7 @@ class DIFF_PREDICT : public KernelBase
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/lcals/EOS-StdPar.cpp b/src/lcals/EOS-StdPar.cpp
new file mode 100644
index 000000000..eb74b434f
--- /dev/null
+++ b/src/lcals/EOS-StdPar.cpp
@@ -0,0 +1,88 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "EOS.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace lcals
+{
+
+
+void EOS::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  EOS_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend,
+                         [=](Index_type i) {
+          EOS_BODY;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto eos_lam = [=](Index_type i) {
+                       EOS_BODY;
+                     };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend,
+                         [=](Index_type i) {
+          eos_lam(i);
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  EOS : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace lcals
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/lcals/EOS.cpp b/src/lcals/EOS.cpp
index 69ffa4bc3..4d9cf5720 100644
--- a/src/lcals/EOS.cpp
+++ b/src/lcals/EOS.cpp
@@ -59,6 +59,9 @@ EOS::EOS(const RunParams& params)
   setVariantDefined( RAJA_HIP );
 
   setVariantDefined( Kokkos_Lambda );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 EOS::~EOS()
diff --git a/src/lcals/EOS.hpp b/src/lcals/EOS.hpp
index 9cc202a02..3efe5cee0 100644
--- a/src/lcals/EOS.hpp
+++ b/src/lcals/EOS.hpp
@@ -63,6 +63,7 @@ class EOS : public KernelBase
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/lcals/FIRST_DIFF-StdPar.cpp b/src/lcals/FIRST_DIFF-StdPar.cpp
new file mode 100644
index 000000000..9bf083a19
--- /dev/null
+++ b/src/lcals/FIRST_DIFF-StdPar.cpp
@@ -0,0 +1,88 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "FIRST_DIFF.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace lcals
+{
+
+
+void FIRST_DIFF::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  FIRST_DIFF_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend,
+                         [=](Index_type i) {
+          FIRST_DIFF_BODY;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto firstdiff_lam = [=](Index_type i) {
+                             FIRST_DIFF_BODY;
+                           };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend,
+                         [=](Index_type i) {
+          firstdiff_lam(i);
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  FIRST_DIFF : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace lcals
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/lcals/FIRST_DIFF.cpp b/src/lcals/FIRST_DIFF.cpp
index df13c2f39..b878d2c6c 100644
--- a/src/lcals/FIRST_DIFF.cpp
+++ b/src/lcals/FIRST_DIFF.cpp
@@ -29,7 +29,7 @@ FIRST_DIFF::FIRST_DIFF(const RunParams& params)
   m_N = getActualProblemSize()+1;
 
   setItsPerRep( getActualProblemSize() );
-  setItsPerRep( getActualProblemSize() );
+  setItsPerRep( getActualProblemSize() ); // why twice?
   setKernelsPerRep(1);
   setBytesPerRep( (1*sizeof(Real_type) + 0*sizeof(Real_type)) * getActualProblemSize() +
                   (0*sizeof(Real_type) + 1*sizeof(Real_type)) * m_N );
@@ -55,6 +55,9 @@ FIRST_DIFF::FIRST_DIFF(const RunParams& params)
   setVariantDefined( RAJA_HIP );
 
   setVariantDefined( Kokkos_Lambda );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 FIRST_DIFF::~FIRST_DIFF()
diff --git a/src/lcals/FIRST_DIFF.hpp b/src/lcals/FIRST_DIFF.hpp
index f3f6424f0..1fd314b75 100644
--- a/src/lcals/FIRST_DIFF.hpp
+++ b/src/lcals/FIRST_DIFF.hpp
@@ -53,6 +53,7 @@ class FIRST_DIFF : public KernelBase
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/lcals/FIRST_MIN-StdPar.cpp b/src/lcals/FIRST_MIN-StdPar.cpp
new file mode 100644
index 000000000..0f9067ea7
--- /dev/null
+++ b/src/lcals/FIRST_MIN-StdPar.cpp
@@ -0,0 +1,105 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "FIRST_MIN.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace lcals
+{
+
+
+void FIRST_MIN::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  FIRST_MIN_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#if 0
+        auto result =
+        std::min_element( std::execution::par_unseq,
+                          &x[ibegin], &x[iend]);
+        auto loc = std::distance(&x[ibegin], result);
+        m_minloc = std::max(m_minloc, loc);
+#else
+        FIRST_MIN_MINLOC_INIT;
+        for (Index_type i = ibegin; i < iend; ++i ) {
+          if ( x[i] < mymin.val ) {
+            mymin.val = x[i];
+            mymin.loc = i;
+          }
+        }
+        m_minloc = mymin.loc;
+#endif
+
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto firstmin_base_lam = [=](Index_type i) -> Real_type {
+                                 return x[i];
+                               };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        FIRST_MIN_MINLOC_INIT;
+
+        for (Index_type i = ibegin; i < iend; ++i ) {
+          if ( firstmin_base_lam(i) < mymin.val ) {
+            mymin.val = x[i];
+            mymin.loc = i;
+          }
+        }
+
+        m_minloc = std::max(m_minloc, mymin.loc);
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  FIRST_MIN : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace lcals
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/lcals/FIRST_MIN.cpp b/src/lcals/FIRST_MIN.cpp
index 427cbe0a6..e4ebe5f1d 100644
--- a/src/lcals/FIRST_MIN.cpp
+++ b/src/lcals/FIRST_MIN.cpp
@@ -59,6 +59,9 @@ FIRST_MIN::FIRST_MIN(const RunParams& params)
   setVariantDefined( RAJA_HIP );
 
   setVariantDefined( Kokkos_Lambda );
+
+  setVariantDefined( Base_StdPar );
+  //setVariantDefined( Lambda_StdPar );
 }
 
 FIRST_MIN::~FIRST_MIN()
diff --git a/src/lcals/FIRST_MIN.hpp b/src/lcals/FIRST_MIN.hpp
index dd00d4392..77e05fc59 100644
--- a/src/lcals/FIRST_MIN.hpp
+++ b/src/lcals/FIRST_MIN.hpp
@@ -80,6 +80,7 @@ class FIRST_MIN : public KernelBase
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/lcals/FIRST_SUM-StdPar.cpp b/src/lcals/FIRST_SUM-StdPar.cpp
new file mode 100644
index 000000000..8c2881b02
--- /dev/null
+++ b/src/lcals/FIRST_SUM-StdPar.cpp
@@ -0,0 +1,88 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "FIRST_SUM.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace lcals
+{
+
+
+void FIRST_SUM::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 1;
+  const Index_type iend = getActualProblemSize();
+
+  FIRST_SUM_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend,
+                         [=](Index_type i) {
+          FIRST_SUM_BODY;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto firstsum_lam = [=](Index_type i) {
+                            FIRST_SUM_BODY;
+                          };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend,
+                         [=](Index_type i) {
+          firstsum_lam(i);
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  FIRST_SUM : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace lcals
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/lcals/FIRST_SUM.cpp b/src/lcals/FIRST_SUM.cpp
index d5559cd6c..bf0a6ec1f 100644
--- a/src/lcals/FIRST_SUM.cpp
+++ b/src/lcals/FIRST_SUM.cpp
@@ -54,6 +54,9 @@ FIRST_SUM::FIRST_SUM(const RunParams& params)
   setVariantDefined( RAJA_HIP );
 
   setVariantDefined( Kokkos_Lambda );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 FIRST_SUM::~FIRST_SUM()
diff --git a/src/lcals/FIRST_SUM.hpp b/src/lcals/FIRST_SUM.hpp
index 59c1c0bfd..a73cac474 100644
--- a/src/lcals/FIRST_SUM.hpp
+++ b/src/lcals/FIRST_SUM.hpp
@@ -56,6 +56,7 @@ class FIRST_SUM : public KernelBase
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/lcals/GEN_LIN_RECUR-StdPar.cpp b/src/lcals/GEN_LIN_RECUR-StdPar.cpp
new file mode 100644
index 000000000..bcf188c70
--- /dev/null
+++ b/src/lcals/GEN_LIN_RECUR-StdPar.cpp
@@ -0,0 +1,101 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "GEN_LIN_RECUR.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace lcals
+{
+
+
+void GEN_LIN_RECUR::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+
+  GEN_LIN_RECUR_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), N,
+                         [=](Index_type k) {
+          GEN_LIN_RECUR_BODY1;
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(1), N,
+                         [=](Index_type i) {
+          GEN_LIN_RECUR_BODY2;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto genlinrecur_lam1 = [=](Index_type k) {
+                                GEN_LIN_RECUR_BODY1;
+                              };
+      auto genlinrecur_lam2 = [=](Index_type i) {
+                                GEN_LIN_RECUR_BODY2;
+                              };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), N,
+                         [=](Index_type k) {
+          genlinrecur_lam1(k);
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(1), N,
+                         [=](Index_type i) {
+          genlinrecur_lam2(i);
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  GEN_LIN_RECUR : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace lcals
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/lcals/GEN_LIN_RECUR.cpp b/src/lcals/GEN_LIN_RECUR.cpp
index 340219129..c6dd2bcfd 100644
--- a/src/lcals/GEN_LIN_RECUR.cpp
+++ b/src/lcals/GEN_LIN_RECUR.cpp
@@ -59,6 +59,9 @@ GEN_LIN_RECUR::GEN_LIN_RECUR(const RunParams& params)
   setVariantDefined( RAJA_HIP );
 
   setVariantDefined( Kokkos_Lambda );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 GEN_LIN_RECUR::~GEN_LIN_RECUR()
diff --git a/src/lcals/GEN_LIN_RECUR.hpp b/src/lcals/GEN_LIN_RECUR.hpp
index 9586a69b4..9daaac57a 100644
--- a/src/lcals/GEN_LIN_RECUR.hpp
+++ b/src/lcals/GEN_LIN_RECUR.hpp
@@ -77,6 +77,7 @@ class GEN_LIN_RECUR : public KernelBase
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/lcals/HYDRO_1D-StdPar.cpp b/src/lcals/HYDRO_1D-StdPar.cpp
new file mode 100644
index 000000000..c458fdf64
--- /dev/null
+++ b/src/lcals/HYDRO_1D-StdPar.cpp
@@ -0,0 +1,88 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "HYDRO_1D.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace lcals
+{
+
+
+void HYDRO_1D::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  HYDRO_1D_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend,
+                         [=](Index_type i) {
+          HYDRO_1D_BODY;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto hydro1d_lam = [=](Index_type i) {
+                           HYDRO_1D_BODY;
+                         };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend,
+                         [=](Index_type i) {
+          hydro1d_lam(i);
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  HYDRO_1D : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace lcals
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/lcals/HYDRO_1D.cpp b/src/lcals/HYDRO_1D.cpp
index 16a251e2d..ea1853001 100644
--- a/src/lcals/HYDRO_1D.cpp
+++ b/src/lcals/HYDRO_1D.cpp
@@ -58,6 +58,9 @@ HYDRO_1D::HYDRO_1D(const RunParams& params)
   setVariantDefined( RAJA_HIP );
 
   setVariantDefined( Kokkos_Lambda );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 HYDRO_1D::~HYDRO_1D()
diff --git a/src/lcals/HYDRO_1D.hpp b/src/lcals/HYDRO_1D.hpp
index dd61f112c..45fe3b1d9 100644
--- a/src/lcals/HYDRO_1D.hpp
+++ b/src/lcals/HYDRO_1D.hpp
@@ -58,6 +58,7 @@ class HYDRO_1D : public KernelBase
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/lcals/HYDRO_2D-StdPar.cpp b/src/lcals/HYDRO_2D-StdPar.cpp
new file mode 100644
index 000000000..da131a2b6
--- /dev/null
+++ b/src/lcals/HYDRO_2D-StdPar.cpp
@@ -0,0 +1,211 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "HYDRO_2D.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace lcals
+{
+
+void HYDRO_2D::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type kbeg = 1;
+  const Index_type kend = m_kn - 1;
+  const Index_type jbeg = 1;
+  const Index_type jend = m_jn - 1;
+
+#ifdef USE_STDPAR_COLLAPSE
+  // this is going to run from [(0,0),..]
+  // we will add (1,1) later
+  const auto nk = kend-kbeg;
+  const auto nj = jend-jbeg;
+#endif
+
+  HYDRO_2D_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), nk*nj,
+                         [=](Index_type kj) {
+            const auto k  = kbeg + kj / nj;
+            const auto j  = jbeg + kj % nj;
+#else
+        std::for_each_n( std::execution::par,
+                         counting_iterator<Index_type>(kbeg), kend-kbeg,
+                         [=](Index_type k) {
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(jbeg), jend-jbeg,
+                           [=](Index_type j) {
+#endif
+            //std::cerr << "JEFF: " << k << "," << j << "\n";
+            HYDRO_2D_BODY1;
+#ifndef USE_STDPAR_COLLAPSE
+          });
+#endif
+        });
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), nk*nj,
+                         [=](Index_type kj) {
+            const auto k = kbeg + kj / nj;
+            const auto j = jbeg + kj % nj;
+#else
+        std::for_each_n( std::execution::par,
+                         counting_iterator<Index_type>(kbeg), kend-kbeg,
+                         [=](Index_type k) {
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(jbeg), jend-jbeg,
+                           [=](Index_type j) {
+#endif
+            HYDRO_2D_BODY2;
+#ifndef USE_STDPAR_COLLAPSE
+          });
+#endif
+        });
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), nk*nj,
+                         [=](Index_type kj) {
+            const auto k = kbeg + kj / nj;
+            const auto j = jbeg + kj % nj;
+#else
+        std::for_each_n( std::execution::par,
+                         counting_iterator<Index_type>(kbeg), kend-kbeg,
+                         [=](Index_type k) {
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(jbeg), jend-jbeg,
+                           [=](Index_type j) {
+#endif
+            HYDRO_2D_BODY3;
+#ifndef USE_STDPAR_COLLAPSE
+          });
+#endif
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto hydro2d_base_lam1 = [=] (Index_type k, Index_type j) {
+                                 HYDRO_2D_BODY1;
+                               };
+      auto hydro2d_base_lam2 = [=] (Index_type k, Index_type j) {
+                                 HYDRO_2D_BODY2;
+                               };
+      auto hydro2d_base_lam3 = [=] (Index_type k, Index_type j) {
+                                 HYDRO_2D_BODY3;
+                               };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), nk*nj,
+                         [=](Index_type kj) {
+            const auto k = kbeg + kj / nj;
+            const auto j = jbeg + kj % nj;
+#else
+        std::for_each_n( std::execution::par,
+                         counting_iterator<Index_type>(kbeg), kend-kbeg,
+                         [=](Index_type k) {
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(jbeg), jend-jbeg,
+                           [=](Index_type j) {
+#endif
+            hydro2d_base_lam1(k, j);
+#ifndef USE_STDPAR_COLLAPSE
+          });
+#endif
+        });
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), nk*nj,
+                         [=](Index_type kj) {
+            const auto k = kbeg + kj / nj;
+            const auto j = jbeg + kj % nj;
+#else
+        std::for_each_n( std::execution::par,
+                         counting_iterator<Index_type>(kbeg), kend-kbeg,
+                         [=](Index_type k) {
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(jbeg), jend-jbeg,
+                           [=](Index_type j) {
+#endif
+            hydro2d_base_lam2(k, j);
+#ifndef USE_STDPAR_COLLAPSE
+          });
+#endif
+        });
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), nk*nj,
+                         [=](Index_type kj) {
+            const auto k = kbeg + kj / nj;
+            const auto j = jbeg + kj % nj;
+#else
+        std::for_each_n( std::execution::par,
+                         counting_iterator<Index_type>(kbeg), kend-kbeg,
+                         [=](Index_type k) {
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(jbeg), jend-jbeg,
+                           [=](Index_type j) {
+#endif
+            hydro2d_base_lam3(k, j);
+#ifndef USE_STDPAR_COLLAPSE
+          });
+#endif
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  HYDRO_2D : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace lcals
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/lcals/HYDRO_2D.cpp b/src/lcals/HYDRO_2D.cpp
index 0920de8c1..9c696fa20 100644
--- a/src/lcals/HYDRO_2D.cpp
+++ b/src/lcals/HYDRO_2D.cpp
@@ -73,6 +73,9 @@ HYDRO_2D::HYDRO_2D(const RunParams& params)
   setVariantDefined( RAJA_HIP );
 
   setVariantDefined( Kokkos_Lambda );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 HYDRO_2D::~HYDRO_2D()
diff --git a/src/lcals/HYDRO_2D.hpp b/src/lcals/HYDRO_2D.hpp
index e735abde5..5f2c4132d 100644
--- a/src/lcals/HYDRO_2D.hpp
+++ b/src/lcals/HYDRO_2D.hpp
@@ -60,7 +60,7 @@
   const Real_type s = m_s; \
   const Real_type t = m_t; \
 \
-  const Index_type kn = m_kn; \
+  const Index_type kn = m_kn; (void)kn; \
   const Index_type jn = m_jn;
 
 #define HYDRO_2D_BODY1  \
@@ -154,6 +154,7 @@ class HYDRO_2D : public KernelBase
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/lcals/INT_PREDICT-StdPar.cpp b/src/lcals/INT_PREDICT-StdPar.cpp
new file mode 100644
index 000000000..aeecccddc
--- /dev/null
+++ b/src/lcals/INT_PREDICT-StdPar.cpp
@@ -0,0 +1,88 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "INT_PREDICT.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace lcals
+{
+
+
+void INT_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  INT_PREDICT_DATA_SETUP;
+
+  auto intpredict_lam = [=](Index_type i) {
+                          INT_PREDICT_BODY;
+                        };
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend,
+                         [=](Index_type i) {
+          INT_PREDICT_BODY;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend,
+                         [=](Index_type i) {
+          intpredict_lam(i);
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  INT_PREDICT : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace lcals
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/lcals/INT_PREDICT.cpp b/src/lcals/INT_PREDICT.cpp
index 1e7375752..49437aae2 100644
--- a/src/lcals/INT_PREDICT.cpp
+++ b/src/lcals/INT_PREDICT.cpp
@@ -51,6 +51,9 @@ INT_PREDICT::INT_PREDICT(const RunParams& params)
   setVariantDefined( RAJA_HIP );
 
   setVariantDefined( Kokkos_Lambda );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 INT_PREDICT::~INT_PREDICT()
diff --git a/src/lcals/INT_PREDICT.hpp b/src/lcals/INT_PREDICT.hpp
index a81ae6fb2..a7366f532 100644
--- a/src/lcals/INT_PREDICT.hpp
+++ b/src/lcals/INT_PREDICT.hpp
@@ -73,6 +73,7 @@ class INT_PREDICT : public KernelBase
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/lcals/PLANCKIAN-StdPar.cpp b/src/lcals/PLANCKIAN-StdPar.cpp
new file mode 100644
index 000000000..cb55f5869
--- /dev/null
+++ b/src/lcals/PLANCKIAN-StdPar.cpp
@@ -0,0 +1,89 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "PLANCKIAN.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+#include <cmath>
+
+namespace rajaperf 
+{
+namespace lcals
+{
+
+
+void PLANCKIAN::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  PLANCKIAN_DATA_SETUP;
+
+  auto planckian_lam = [=](Index_type i) {
+                         PLANCKIAN_BODY;
+                       };
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend,
+                         [=](Index_type i) {
+          PLANCKIAN_BODY;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                          counting_iterator<Index_type>(ibegin), iend,
+                          [=](Index_type i) {
+          planckian_lam(i);
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  PLANCKIAN : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace lcals
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/lcals/PLANCKIAN.cpp b/src/lcals/PLANCKIAN.cpp
index da178a407..44397bc8b 100644
--- a/src/lcals/PLANCKIAN.cpp
+++ b/src/lcals/PLANCKIAN.cpp
@@ -51,6 +51,9 @@ PLANCKIAN::PLANCKIAN(const RunParams& params)
   setVariantDefined( RAJA_HIP );
 
   setVariantDefined( Kokkos_Lambda );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 PLANCKIAN::~PLANCKIAN()
diff --git a/src/lcals/PLANCKIAN.hpp b/src/lcals/PLANCKIAN.hpp
index 92b55fc95..2af31cc8f 100644
--- a/src/lcals/PLANCKIAN.hpp
+++ b/src/lcals/PLANCKIAN.hpp
@@ -58,6 +58,7 @@ class PLANCKIAN : public KernelBase
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/lcals/TRIDIAG_ELIM-StdPar.cpp b/src/lcals/TRIDIAG_ELIM-StdPar.cpp
new file mode 100644
index 000000000..9ad42dd9c
--- /dev/null
+++ b/src/lcals/TRIDIAG_ELIM-StdPar.cpp
@@ -0,0 +1,90 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "TRIDIAG_ELIM.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace lcals
+{
+
+
+void TRIDIAG_ELIM::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 1;
+  const Index_type iend = m_N;
+
+  TRIDIAG_ELIM_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend,
+                         [=](Index_type i) {
+          TRIDIAG_ELIM_BODY;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto tridiag_elim_lam = [=](Index_type i) {
+                                TRIDIAG_ELIM_BODY;
+                              };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend,
+                         [=](Index_type i) {
+          tridiag_elim_lam(i);
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  TRIDIAG_ELIM : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#else
+  RAJA_UNUSED_VAR(vid);
+#endif
+}
+
+} // end namespace lcals
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/lcals/TRIDIAG_ELIM.cpp b/src/lcals/TRIDIAG_ELIM.cpp
index 98278cbcf..49904e0b6 100644
--- a/src/lcals/TRIDIAG_ELIM.cpp
+++ b/src/lcals/TRIDIAG_ELIM.cpp
@@ -53,6 +53,9 @@ TRIDIAG_ELIM::TRIDIAG_ELIM(const RunParams& params)
   setVariantDefined( RAJA_HIP );
 
   setVariantDefined( Kokkos_Lambda );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 TRIDIAG_ELIM::~TRIDIAG_ELIM()
diff --git a/src/lcals/TRIDIAG_ELIM.hpp b/src/lcals/TRIDIAG_ELIM.hpp
index c95685de9..906dad0de 100644
--- a/src/lcals/TRIDIAG_ELIM.hpp
+++ b/src/lcals/TRIDIAG_ELIM.hpp
@@ -58,6 +58,7 @@ class TRIDIAG_ELIM : public KernelBase
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/polybench/CMakeLists.txt b/src/polybench/CMakeLists.txt
index f9cd2c1c2..3779533d4 100644
--- a/src/polybench/CMakeLists.txt
+++ b/src/polybench/CMakeLists.txt
@@ -86,5 +86,18 @@ blt_add_library(
           POLYBENCH_MVT-Cuda.cpp
           POLYBENCH_MVT-OMP.cpp
           POLYBENCH_MVT-OMPTarget.cpp
+          POLYBENCH_2MM-StdPar.cpp
+          POLYBENCH_3MM-StdPar.cpp
+          POLYBENCH_ADI-StdPar.cpp
+          POLYBENCH_ATAX-StdPar.cpp
+          POLYBENCH_FDTD_2D-StdPar.cpp
+          POLYBENCH_FLOYD_WARSHALL-StdPar.cpp
+          POLYBENCH_GEMM-StdPar.cpp
+          POLYBENCH_GEMVER-StdPar.cpp
+          POLYBENCH_GESUMMV-StdPar.cpp
+          POLYBENCH_HEAT_3D-StdPar.cpp
+          POLYBENCH_JACOBI_1D-StdPar.cpp
+          POLYBENCH_JACOBI_2D-StdPar.cpp
+          POLYBENCH_MVT-StdPar.cpp
   DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS}
   )
diff --git a/src/polybench/POLYBENCH_2MM-StdPar.cpp b/src/polybench/POLYBENCH_2MM-StdPar.cpp
new file mode 100644
index 000000000..ed89ff4fe
--- /dev/null
+++ b/src/polybench/POLYBENCH_2MM-StdPar.cpp
@@ -0,0 +1,204 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "POLYBENCH_2MM.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace polybench
+{
+
+void POLYBENCH_2MM::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+
+  POLYBENCH_2MM_DATA_SETUP;
+
+#if 0
+  auto begin = counting_iterator<Index_type>(0);
+  auto end   = counting_iterator<Index_type>(nk);
+#endif
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni*nj,
+                         [=](Index_type ij) {
+            const auto i  = ij / nj;
+            const auto j  = ij % nj;
+#else
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni,
+                         [=](Index_type i) {
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), nj,
+                           [=](Index_type j) {
+#endif
+#if 1
+            POLYBENCH_2MM_BODY1;
+            std::for_each_n( std::execution::unseq,
+                             counting_iterator<Index_type>(0), nk,
+                             [=,&dot](Index_type k) {
+              POLYBENCH_2MM_BODY2;
+            });
+            POLYBENCH_2MM_BODY3;
+#else
+            tmp[j + i*nj] = std::transform_reduce( std::execution::unseq,
+                                                   begin, end,
+                                                   (Real_type)0, std::plus<Real_type>(),
+                                                   [=] (Index_type k) {
+                                                     return alpha * A[k + i*nk] * B[j + k*nj];
+                                                   });
+#endif
+#ifndef USE_STDPAR_COLLAPSE
+          });
+#endif
+        });
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni*nl,
+                         [=](Index_type il) {
+            const auto i  = il / nl;
+            const auto l  = il % nl;
+#else
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni,
+                         [=](Index_type i) {
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), nl,
+                           [=](Index_type l) {
+#endif
+            POLYBENCH_2MM_BODY4;
+            std::for_each_n( std::execution::unseq, 
+                             counting_iterator<Index_type>(0), nj,
+                             [=,&dot](Index_type j) {
+              POLYBENCH_2MM_BODY5;
+            });
+            POLYBENCH_2MM_BODY6;
+#ifndef USE_STDPAR_COLLAPSE
+          });
+#endif
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto poly_2mm_base_lam2 =
+              [=](Index_type i, Index_type j, Index_type k, Real_type &dot) {
+                                  POLYBENCH_2MM_BODY2;
+                                };
+      auto poly_2mm_base_lam3 =
+              [=](Index_type i, Index_type j, Real_type &dot) {
+                                  POLYBENCH_2MM_BODY3;
+                                };
+      auto poly_2mm_base_lam5 =
+              [=](Index_type i, Index_type l, Index_type j, Real_type &dot) {
+                                  POLYBENCH_2MM_BODY5;
+                                };
+      auto poly_2mm_base_lam6 =
+              [=](Index_type i, Index_type l, Real_type &dot) {
+                                  POLYBENCH_2MM_BODY6;
+                                };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni*nj,
+                         [=](Index_type ij) {
+            const auto i  = ij / nj;
+            const auto j  = ij % nj;
+#else
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni,
+                         [=](Index_type i) {
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), nj,
+                           [=](Index_type j) {
+#endif
+            POLYBENCH_2MM_BODY1;
+            std::for_each_n( std::execution::unseq,
+                             counting_iterator<Index_type>(0), nk,
+                             [=,&dot](Index_type k) {
+              poly_2mm_base_lam2(i, j, k, dot);
+            });
+            poly_2mm_base_lam3(i, j, dot);
+#ifndef USE_STDPAR_COLLAPSE
+          });
+#endif
+        });
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni*nl,
+                         [=](Index_type il) {
+            const auto i  = il / nl;
+            const auto l  = il % nl;
+#else
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni,
+                         [=](Index_type i) {
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), nl,
+                           [=](Index_type l) {
+#endif
+            POLYBENCH_2MM_BODY4;
+            std::for_each_n( std::execution::unseq,
+                             counting_iterator<Index_type>(0), nj,
+                             [=,&dot](Index_type j) {
+              poly_2mm_base_lam5(i, l, j, dot);
+            });
+            poly_2mm_base_lam6(i, l, dot);
+#ifndef USE_STDPAR_COLLAPSE
+          });
+#endif
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  POLYBENCH_2MM : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace polybench
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/polybench/POLYBENCH_2MM.cpp b/src/polybench/POLYBENCH_2MM.cpp
index 80136cd49..9d44e3005 100644
--- a/src/polybench/POLYBENCH_2MM.cpp
+++ b/src/polybench/POLYBENCH_2MM.cpp
@@ -78,6 +78,9 @@ POLYBENCH_2MM::POLYBENCH_2MM(const RunParams& params)
   setVariantDefined( Base_HIP );
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 POLYBENCH_2MM::~POLYBENCH_2MM()
diff --git a/src/polybench/POLYBENCH_2MM.hpp b/src/polybench/POLYBENCH_2MM.hpp
index e11d4889b..d792d549d 100644
--- a/src/polybench/POLYBENCH_2MM.hpp
+++ b/src/polybench/POLYBENCH_2MM.hpp
@@ -127,6 +127,7 @@ class POLYBENCH_2MM : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/polybench/POLYBENCH_3MM-Seq.cpp b/src/polybench/POLYBENCH_3MM-Seq.cpp
index fb1bdd03b..28ea1a319 100644
--- a/src/polybench/POLYBENCH_3MM-Seq.cpp
+++ b/src/polybench/POLYBENCH_3MM-Seq.cpp
@@ -19,7 +19,6 @@ namespace rajaperf
 namespace polybench
 {
 
-
 void POLYBENCH_3MM::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
 {
   const Index_type run_reps = getRunReps();
diff --git a/src/polybench/POLYBENCH_3MM-StdPar.cpp b/src/polybench/POLYBENCH_3MM-StdPar.cpp
new file mode 100644
index 000000000..36ead6be7
--- /dev/null
+++ b/src/polybench/POLYBENCH_3MM-StdPar.cpp
@@ -0,0 +1,250 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "POLYBENCH_3MM.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace polybench
+{
+
+void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+
+  POLYBENCH_3MM_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni*nj,
+                         [=](Index_type ij) {
+            const auto i  = ij / nj;
+            const auto j  = ij % nj;
+#else
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni,
+                         [=](Index_type i) {
+          std::for_each_n( std::execution::seq,
+                           counting_iterator<Index_type>(0), nj,
+                           [=](Index_type j) {
+#endif
+            POLYBENCH_3MM_BODY1;
+            std::for_each_n( std::execution::unseq,
+                             counting_iterator<Index_type>(0), nk,
+                             [=,&dot](Index_type k) {
+              POLYBENCH_3MM_BODY2;
+            });
+            POLYBENCH_3MM_BODY3;
+#ifndef USE_STDPAR_COLLAPSE
+          });
+#endif
+        });
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(0), nj*nl,
+                         [=](Index_type jl) {
+            const auto j  = jl / nl;
+            const auto l  = jl % nl;
+#else
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), nj,
+                         [=](Index_type j) {
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), nl,
+                           [=](Index_type l) {
+#endif
+            POLYBENCH_3MM_BODY4;
+            std::for_each_n( std::execution::unseq,
+                             counting_iterator<Index_type>(0), nm,
+                             [=,&dot](Index_type m) {
+              POLYBENCH_3MM_BODY5;
+            });
+            POLYBENCH_3MM_BODY6;
+#ifndef USE_STDPAR_COLLAPSE
+          });
+#endif
+        });
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni*nl,
+                         [=](Index_type il) {
+            const auto i  = il / nl;
+            const auto l  = il % nl;
+#else
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni,
+                         [=](Index_type i) {
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), nl,
+                           [=](Index_type l) {
+#endif
+            POLYBENCH_3MM_BODY7;
+            std::for_each_n( std::execution::unseq,
+                             counting_iterator<Index_type>(0), nj,
+                             [=,&dot](Index_type j) {
+              POLYBENCH_3MM_BODY8;
+            });
+            POLYBENCH_3MM_BODY9;
+#ifndef USE_STDPAR_COLLAPSE
+          });
+#endif
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto poly_3mm_base_lam2 =
+              [=] (Index_type i, Index_type j, Index_type k, Real_type &dot) {
+                                  POLYBENCH_3MM_BODY2;
+                                };
+      auto poly_3mm_base_lam3 =
+              [=] (Index_type i, Index_type j, Real_type &dot) {
+                                  POLYBENCH_3MM_BODY3;
+                                };
+      auto poly_3mm_base_lam5 =
+              [=] (Index_type j, Index_type l, Index_type m, Real_type &dot) {
+                                   POLYBENCH_3MM_BODY5;
+                                };
+      auto poly_3mm_base_lam6 =
+              [=] (Index_type j, Index_type l, Real_type &dot) {
+                                  POLYBENCH_3MM_BODY6;
+                                };
+      auto poly_3mm_base_lam8 =
+              [=] (Index_type i, Index_type l, Index_type j, Real_type &dot) {
+                                  POLYBENCH_3MM_BODY8;
+                                };
+      auto poly_3mm_base_lam9 =
+              [=] (Index_type i, Index_type l, Real_type &dot) {
+                                  POLYBENCH_3MM_BODY9;
+                                };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni*nj,
+                         [=](Index_type ij) {
+            const auto i  = ij / nj;
+            const auto j  = ij % nj;
+#else
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni,
+                         [=](Index_type i) {
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), nj,
+                           [=](Index_type j) {
+#endif
+            POLYBENCH_3MM_BODY1;
+            std::for_each_n( std::execution::unseq,
+                             counting_iterator<Index_type>(0), nk,
+                             [=,&dot](Index_type k) {
+              poly_3mm_base_lam2(i, j, k, dot);
+            });
+            poly_3mm_base_lam3(i, j, dot);
+#ifndef USE_STDPAR_COLLAPSE
+          });
+#endif
+        });
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(0), nj*nl,
+                           [=](Index_type jl) {
+            const auto j  = jl / nl;
+            const auto l  = jl % nl;
+#else
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), nj,
+                         [=](Index_type j) {
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), nl,
+                           [=](Index_type l) {
+#endif
+            POLYBENCH_3MM_BODY4;
+            std::for_each_n( std::execution::unseq,
+                             counting_iterator<Index_type>(0), nm,
+                             [=,&dot](Index_type m) {
+              poly_3mm_base_lam5(j, l, m, dot);
+            });
+            poly_3mm_base_lam6(j, l, dot);
+#ifndef USE_STDPAR_COLLAPSE
+          });
+#endif
+        });
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni*nl,
+                         [=](Index_type il) {
+            const auto i  = il / nl;
+            const auto l  = il % nl;
+#else
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni,
+                         [=](Index_type i) {
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), nl,
+                           [=](Index_type l) {
+#endif
+            POLYBENCH_3MM_BODY7;
+            std::for_each_n( std::execution::unseq,
+                             counting_iterator<Index_type>(0), nj,
+                             [=,&dot](Index_type j) {
+              poly_3mm_base_lam8(i, l, j, dot);
+            });
+            poly_3mm_base_lam9(i, l, dot);
+#ifndef USE_STDPAR_COLLAPSE
+          });
+#endif
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  POLYBENCH_3MM : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace polybench
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/polybench/POLYBENCH_3MM.cpp b/src/polybench/POLYBENCH_3MM.cpp
index e1dad595c..a4aa1d181 100644
--- a/src/polybench/POLYBENCH_3MM.cpp
+++ b/src/polybench/POLYBENCH_3MM.cpp
@@ -86,6 +86,9 @@ POLYBENCH_3MM::POLYBENCH_3MM(const RunParams& params)
   setVariantDefined( Base_HIP );
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 POLYBENCH_3MM::~POLYBENCH_3MM()
diff --git a/src/polybench/POLYBENCH_3MM.hpp b/src/polybench/POLYBENCH_3MM.hpp
index 4331e3930..4e01307a3 100644
--- a/src/polybench/POLYBENCH_3MM.hpp
+++ b/src/polybench/POLYBENCH_3MM.hpp
@@ -153,6 +153,7 @@ class POLYBENCH_3MM : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/polybench/POLYBENCH_ADI-StdPar.cpp b/src/polybench/POLYBENCH_ADI-StdPar.cpp
new file mode 100644
index 000000000..7ea88960a
--- /dev/null
+++ b/src/polybench/POLYBENCH_ADI-StdPar.cpp
@@ -0,0 +1,154 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "POLYBENCH_ADI.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace polybench
+{
+
+void POLYBENCH_ADI::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+
+  POLYBENCH_ADI_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        for (Index_type t = 1; t <= tsteps; ++t) { 
+
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(1), n-2,
+                           [=](Index_type i) {
+            POLYBENCH_ADI_BODY2;
+            for (Index_type j = 1; j < n-1; ++j) {
+              POLYBENCH_ADI_BODY3;
+            }  
+            POLYBENCH_ADI_BODY4;
+            for (Index_type k = n-2; k >= 1; --k) {
+              POLYBENCH_ADI_BODY5;
+            }  
+          });
+
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(1), n-2,
+                           [=](Index_type i) {
+            POLYBENCH_ADI_BODY6;
+            for (Index_type j = 1; j < n-1; ++j) {
+              POLYBENCH_ADI_BODY7;
+            }
+            POLYBENCH_ADI_BODY8;
+            for (Index_type k = n-2; k >= 1; --k) {
+              POLYBENCH_ADI_BODY9;
+            }  
+          });
+
+        }  // tstep loop
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto poly_adi_base_lam2 = [=](Index_type i) {
+                                  POLYBENCH_ADI_BODY2;
+                                };
+      auto poly_adi_base_lam3 = [=](Index_type i, Index_type j) {
+                                  POLYBENCH_ADI_BODY3;
+                                };
+      auto poly_adi_base_lam4 = [=](Index_type i) {
+                                  POLYBENCH_ADI_BODY4;
+                                };
+      auto poly_adi_base_lam5 = [=](Index_type i, Index_type k) {
+                                  POLYBENCH_ADI_BODY5;
+                                };
+      auto poly_adi_base_lam6 = [=](Index_type i) {
+                                  POLYBENCH_ADI_BODY6;
+                                };
+      auto poly_adi_base_lam7 = [=](Index_type i, Index_type j) {
+                                  POLYBENCH_ADI_BODY7;
+                                };
+      auto poly_adi_base_lam8 = [=](Index_type i) {
+                                  POLYBENCH_ADI_BODY8;
+                                };
+      auto poly_adi_base_lam9 = [=](Index_type i, Index_type k) {
+                                  POLYBENCH_ADI_BODY9;
+                                };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        for (Index_type t = 1; t <= tsteps; ++t) {
+
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(1), n-2,
+                           [=](Index_type i) {
+            poly_adi_base_lam2(i);
+            for (Index_type j = 1; j < n-1; ++j) {
+              poly_adi_base_lam3(i, j);
+            }
+            poly_adi_base_lam4(i);
+            for (Index_type k = n-2; k >= 1; --k) {
+              poly_adi_base_lam5(i, k);
+            }
+          });
+
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(1), n-2,
+                           [=](Index_type i) {
+            poly_adi_base_lam6(i);
+            for (Index_type j = 1; j < n-1; ++j) {
+              poly_adi_base_lam7(i, j);
+            }
+            poly_adi_base_lam8(i);
+            for (Index_type k = n-2; k >= 1; --k) {
+              poly_adi_base_lam9(i, k);
+            }
+          });
+
+        }  // tstep loop
+
+      }  // run_reps
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\nPOLYBENCH_ADI  Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace polybench
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/polybench/POLYBENCH_ADI.cpp b/src/polybench/POLYBENCH_ADI.cpp
index b513bdebc..7d6968a36 100644
--- a/src/polybench/POLYBENCH_ADI.cpp
+++ b/src/polybench/POLYBENCH_ADI.cpp
@@ -63,6 +63,9 @@ POLYBENCH_ADI::POLYBENCH_ADI(const RunParams& params)
   setVariantDefined( Base_HIP );
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 POLYBENCH_ADI::~POLYBENCH_ADI()
diff --git a/src/polybench/POLYBENCH_ADI.hpp b/src/polybench/POLYBENCH_ADI.hpp
index 848fb9dc4..b316735ba 100644
--- a/src/polybench/POLYBENCH_ADI.hpp
+++ b/src/polybench/POLYBENCH_ADI.hpp
@@ -195,6 +195,7 @@ class POLYBENCH_ADI : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/polybench/POLYBENCH_ATAX-StdPar.cpp b/src/polybench/POLYBENCH_ATAX-StdPar.cpp
new file mode 100644
index 000000000..88866ca63
--- /dev/null
+++ b/src/polybench/POLYBENCH_ATAX-StdPar.cpp
@@ -0,0 +1,130 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "POLYBENCH_ATAX.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf
+{
+namespace polybench
+{
+
+void POLYBENCH_ATAX::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps= getRunReps();
+
+  POLYBENCH_ATAX_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), N,
+                         [=](Index_type i) {
+          POLYBENCH_ATAX_BODY1;
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), N,
+                           [=,&dot](Index_type j) {
+            POLYBENCH_ATAX_BODY2;
+          });
+          POLYBENCH_ATAX_BODY3;
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), N,
+                         [=](Index_type j) {
+          POLYBENCH_ATAX_BODY4;
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), N,
+                           [=,&dot](Index_type i) {
+            POLYBENCH_ATAX_BODY5;
+          });
+          POLYBENCH_ATAX_BODY6;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto poly_atax_base_lam2 = [=] (Index_type i, Index_type j, Real_type &dot) {
+                                   POLYBENCH_ATAX_BODY2;
+                                 };
+      auto poly_atax_base_lam3 = [=] (Index_type i, Real_type &dot) {
+                                   POLYBENCH_ATAX_BODY3;
+                                  };
+      auto poly_atax_base_lam5 = [=] (Index_type i, Index_type j , Real_type &dot) {
+                                   POLYBENCH_ATAX_BODY5;
+                                  };
+      auto poly_atax_base_lam6 = [=] (Index_type j, Real_type &dot) {
+                                   POLYBENCH_ATAX_BODY6;
+                                  };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), N,
+                         [=](Index_type i) {
+          POLYBENCH_ATAX_BODY1;
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), N,
+                           [=,&dot](Index_type j) {
+            poly_atax_base_lam2(i, j, dot);
+          });
+          poly_atax_base_lam3(i, dot);
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), N,
+                         [=](Index_type j) {
+          POLYBENCH_ATAX_BODY4;
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), N,
+                           [=,&dot](Index_type i) {
+            poly_atax_base_lam5(i, j, dot);
+          });
+          poly_atax_base_lam6(j, dot);
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  POLYBENCH_ATAX : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace polybench
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/polybench/POLYBENCH_ATAX.cpp b/src/polybench/POLYBENCH_ATAX.cpp
index 4c159d3a7..38318a600 100644
--- a/src/polybench/POLYBENCH_ATAX.cpp
+++ b/src/polybench/POLYBENCH_ATAX.cpp
@@ -65,6 +65,9 @@ POLYBENCH_ATAX::POLYBENCH_ATAX(const RunParams& params)
   setVariantDefined( Base_HIP );
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 POLYBENCH_ATAX::~POLYBENCH_ATAX()
diff --git a/src/polybench/POLYBENCH_ATAX.hpp b/src/polybench/POLYBENCH_ATAX.hpp
index f94ade140..d5c019ba1 100644
--- a/src/polybench/POLYBENCH_ATAX.hpp
+++ b/src/polybench/POLYBENCH_ATAX.hpp
@@ -115,6 +115,7 @@ class POLYBENCH_ATAX : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp
new file mode 100644
index 000000000..64c50c34a
--- /dev/null
+++ b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp
@@ -0,0 +1,163 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "POLYBENCH_FDTD_2D.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace polybench
+{
+
+void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+
+  POLYBENCH_FDTD_2D_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        for (t = 0; t < tsteps; ++t) {
+
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(0), ny,
+                           [=](Index_type j) {
+            POLYBENCH_FDTD_2D_BODY1;
+          });
+
+          // Note to future developers:
+          //   Do not try to be smart and use more C++ than necessary.
+          //   auto [i,j] = std::div(ij,ny); i++;
+          //   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This is noticeably slower than below.
+
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(0), (nx-1)*ny,
+                           [=](Index_type ij) {
+              const auto i  = 1 + ij / ny;
+              const auto j  =     ij % ny;
+              POLYBENCH_FDTD_2D_BODY2;
+          });
+
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(0), nx*(ny-1),
+                           [=](Index_type ij) {
+              const auto i  =     ij / (ny-1);
+              const auto j  = 1 + ij % (ny-1);
+              POLYBENCH_FDTD_2D_BODY3;
+          });
+
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(0), (nx-1)*(ny-1),
+                           [=](Index_type ij) {
+              const auto i  = ij / (ny-1);
+              const auto j  = ij % (ny-1);
+              POLYBENCH_FDTD_2D_BODY4;
+          });
+
+        }  // tstep loop
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      //
+      // Note: first lambda must use capture by reference so that the
+      //       scalar variable 't' used in it is updated for each
+      //       t-loop iteration.
+      //
+      // capturing t by reference is required for GCC 11 to generate correct results
+      // but that breaks NVHPC GPU, so we instead make it an explicit parameter
+      auto poly_fdtd2d_base_lam1 = [=](Index_type j, Index_type t) {
+                                     //ey[j + 0*ny] = fict[t];
+                                     POLYBENCH_FDTD_2D_BODY1;
+                                   };
+      auto poly_fdtd2d_base_lam2 = [=](Index_type i, Index_type j) {
+                                     POLYBENCH_FDTD_2D_BODY2;
+                                   };
+      auto poly_fdtd2d_base_lam3 = [=](Index_type i, Index_type j) {
+                                     POLYBENCH_FDTD_2D_BODY3;
+                                   };
+      auto poly_fdtd2d_base_lam4 = [=](Index_type i, Index_type j) {
+                                     POLYBENCH_FDTD_2D_BODY4;
+                                   };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        for (t = 0; t < tsteps; ++t) {
+
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(0), ny,
+                           [=](Index_type j) {
+            poly_fdtd2d_base_lam1(j,t);
+          });
+
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(0), (nx-1)*ny,
+                           [=](Index_type ij) {
+              const auto i  = 1 + ij / ny;
+              const auto j  =     ij % ny;
+              poly_fdtd2d_base_lam2(i, j);
+          });
+
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(0), nx*(ny-1),
+                           [=](Index_type ij) {
+              const auto i  =     ij / (ny-1);
+              const auto j  = 1 + ij % (ny-1);
+              poly_fdtd2d_base_lam3(i, j);
+          });
+
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(0), (nx-1)*(ny-1),
+                           [=](Index_type ij) {
+              const auto i  = ij / (ny-1);
+              const auto j  = ij % (ny-1);
+              poly_fdtd2d_base_lam4(i, j);
+          });
+
+        }  // tstep loop
+
+      }  // run_reps
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\nPOLYBENCH_FDTD_2D  Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace polybench
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/polybench/POLYBENCH_FDTD_2D.cpp b/src/polybench/POLYBENCH_FDTD_2D.cpp
index 37dd1f9f5..1709d5eb7 100644
--- a/src/polybench/POLYBENCH_FDTD_2D.cpp
+++ b/src/polybench/POLYBENCH_FDTD_2D.cpp
@@ -84,6 +84,9 @@ POLYBENCH_FDTD_2D::POLYBENCH_FDTD_2D(const RunParams& params)
   setVariantDefined( Base_HIP );
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 POLYBENCH_FDTD_2D::~POLYBENCH_FDTD_2D()
diff --git a/src/polybench/POLYBENCH_FDTD_2D.hpp b/src/polybench/POLYBENCH_FDTD_2D.hpp
index e1d1b67c3..29127bd72 100644
--- a/src/polybench/POLYBENCH_FDTD_2D.hpp
+++ b/src/polybench/POLYBENCH_FDTD_2D.hpp
@@ -113,6 +113,7 @@ class POLYBENCH_FDTD_2D : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-Seq.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-Seq.cpp
index 40aad73e3..b40ff70df 100644
--- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-Seq.cpp
+++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-Seq.cpp
@@ -17,7 +17,6 @@ namespace rajaperf
 namespace polybench
 {
 
-
 void POLYBENCH_FLOYD_WARSHALL::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
 {
   const Index_type run_reps= getRunReps();
diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp
new file mode 100644
index 000000000..b17f9f9f4
--- /dev/null
+++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp
@@ -0,0 +1,119 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "POLYBENCH_FLOYD_WARSHALL.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+//#define USE_STDPAR_COLLAPSE 1
+
+namespace rajaperf 
+{
+namespace polybench
+{
+
+void POLYBENCH_FLOYD_WARSHALL::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+
+  POLYBENCH_FLOYD_WARSHALL_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        for (Index_type k = 0; k < N; ++k) {
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), N*N,
+                         [=](Index_type ji) {
+            const auto j  = ji / N;
+            const auto i  = ji % N;
+#else
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), N,
+                         [=](Index_type i) {
+          std::for_each_n( std::execution::seq,
+                           counting_iterator<Index_type>(0), N,
+                           [=](Index_type j) {
+#endif
+              POLYBENCH_FLOYD_WARSHALL_BODY;
+            });
+#ifndef USE_STDPAR_COLLAPSE
+          });
+#endif
+        }
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto poly_floydwarshall_base_lam = [=](Index_type k, Index_type i, Index_type j) {
+                                           POLYBENCH_FLOYD_WARSHALL_BODY;
+                                         };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        for (Index_type k = 0; k < N; ++k) {
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), N*N,
+                         [=](Index_type ji) {
+            const auto j  = ji / N;
+            const auto i  = ji % N;
+#else
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), N,
+                         [=](Index_type i) {
+          std::for_each_n( std::execution::seq,
+                           counting_iterator<Index_type>(0), N,
+                           [=](Index_type j) {
+#endif
+              poly_floydwarshall_base_lam(k, i, j);
+          });
+#ifndef USE_STDPAR_COLLAPSE
+        });
+#endif
+       }
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  POLYBENCH_FLOYD_WARSHALL : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace polybench
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp
index 9770821b0..5ce169421 100644
--- a/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp
+++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp
@@ -60,6 +60,9 @@ POLYBENCH_FLOYD_WARSHALL::POLYBENCH_FLOYD_WARSHALL(const RunParams& params)
   setVariantDefined( Base_HIP );
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 POLYBENCH_FLOYD_WARSHALL::~POLYBENCH_FLOYD_WARSHALL()
diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp
index e8a067377..0eebb9ee7 100644
--- a/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp
+++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp
@@ -76,6 +76,7 @@ class POLYBENCH_FLOYD_WARSHALL : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/polybench/POLYBENCH_GEMM-StdPar.cpp b/src/polybench/POLYBENCH_GEMM-StdPar.cpp
new file mode 100644
index 000000000..1c1687471
--- /dev/null
+++ b/src/polybench/POLYBENCH_GEMM-StdPar.cpp
@@ -0,0 +1,133 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "POLYBENCH_GEMM.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace polybench
+{
+
+void POLYBENCH_GEMM::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+
+  POLYBENCH_GEMM_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni*nj,
+                         [=](Index_type ij) {
+            const auto i  = ij / nj;
+            const auto j  = ij % nj;
+#else
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni,
+                         [=](Index_type i) {
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), nj,
+                           [=](Index_type j) {
+#endif
+            POLYBENCH_GEMM_BODY1;
+            POLYBENCH_GEMM_BODY2;
+            std::for_each_n( std::execution::unseq,
+                             counting_iterator<Index_type>(0), nk,
+                             [=,&dot](Index_type k) {
+               POLYBENCH_GEMM_BODY3;
+            });
+            POLYBENCH_GEMM_BODY4;
+#ifndef USE_STDPAR_COLLAPSE
+          });
+#endif
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto poly_gemm_base_lam2 = [=](Index_type i, Index_type j) {
+                                   POLYBENCH_GEMM_BODY2;
+                                 };
+      auto poly_gemm_base_lam3 = [=](Index_type i, Index_type j, Index_type k, Real_type& dot) {
+                                   POLYBENCH_GEMM_BODY3;
+                                  };
+      auto poly_gemm_base_lam4 = [=](Index_type i, Index_type j, Real_type& dot) {
+                                   POLYBENCH_GEMM_BODY4;
+                                  };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni*nj,
+                         [=](Index_type ij) {
+            const auto i  = ij / nj;
+            const auto j  = ij % nj;
+#else
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), ni,
+                         [=](Index_type i) {
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), nj,
+                           [=](Index_type j) {
+#endif
+            POLYBENCH_GEMM_BODY1;
+            poly_gemm_base_lam2(i, j);
+            std::for_each_n( std::execution::unseq,
+                             counting_iterator<Index_type>(0), nk,
+                             [=,&dot](Index_type k) {
+              poly_gemm_base_lam3(i, j, k, dot);
+            });
+            poly_gemm_base_lam4(i, j, dot);
+#ifndef USE_STDPAR_COLLAPSE
+          });
+#endif
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  POLYBENCH_GEMM : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace polybench
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/polybench/POLYBENCH_GEMM.cpp b/src/polybench/POLYBENCH_GEMM.cpp
index e080c6df5..202713e61 100644
--- a/src/polybench/POLYBENCH_GEMM.cpp
+++ b/src/polybench/POLYBENCH_GEMM.cpp
@@ -70,6 +70,9 @@ POLYBENCH_GEMM::POLYBENCH_GEMM(const RunParams& params)
   setVariantDefined( Base_HIP );
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 POLYBENCH_GEMM::~POLYBENCH_GEMM()
diff --git a/src/polybench/POLYBENCH_GEMM.hpp b/src/polybench/POLYBENCH_GEMM.hpp
index 33ea77997..cd1e00865 100644
--- a/src/polybench/POLYBENCH_GEMM.hpp
+++ b/src/polybench/POLYBENCH_GEMM.hpp
@@ -99,6 +99,7 @@ class POLYBENCH_GEMM : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/polybench/POLYBENCH_GEMVER-Seq.cpp b/src/polybench/POLYBENCH_GEMVER-Seq.cpp
index d17f9b709..2423507f6 100644
--- a/src/polybench/POLYBENCH_GEMVER-Seq.cpp
+++ b/src/polybench/POLYBENCH_GEMVER-Seq.cpp
@@ -131,7 +131,7 @@ void POLYBENCH_GEMVER::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t
       auto poly_gemver_lam1 = [=] (Index_type i, Index_type j) {
                                    POLYBENCH_GEMVER_BODY1_RAJA;
                                   };
-      auto poly_gemver_lam2 = [=] (Index_type /* i */, Real_type &dot) {
+      auto poly_gemver_lam2 = [=] (Real_type &dot) {
                                    POLYBENCH_GEMVER_BODY2_RAJA;
                                   };
       auto poly_gemver_lam3 = [=] (Index_type i, Index_type j, Real_type &dot) {
@@ -162,10 +162,10 @@ void POLYBENCH_GEMVER::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t
           >
         >;
 
-      using EXEC_POL24 =
+      using EXEC_POL2 =
         RAJA::KernelPolicy<
           RAJA::statement::For<0, RAJA::loop_exec,
-            RAJA::statement::Lambda<0, RAJA::Segs<0>, RAJA::Params<0>>,
+            RAJA::statement::Lambda<0, RAJA::Params<0>>,
             RAJA::statement::For<1, RAJA::loop_exec,
               RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>>
             >,
@@ -175,6 +175,17 @@ void POLYBENCH_GEMVER::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t
 
       using EXEC_POL3 = RAJA::loop_exec;
 
+      using EXEC_POL4 =
+        RAJA::KernelPolicy<
+          RAJA::statement::For<0, RAJA::loop_exec,
+            RAJA::statement::Lambda<0, RAJA::Segs<0>, RAJA::Params<0>>,
+            RAJA::statement::For<1, RAJA::loop_exec,
+              RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>>
+            >,
+            RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>>
+          >
+        >;
+
       startTimer();
       for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
 
@@ -183,7 +194,7 @@ void POLYBENCH_GEMVER::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t
           poly_gemver_lam1
         );
 
-        RAJA::kernel_param<EXEC_POL24>(
+        RAJA::kernel_param<EXEC_POL2>(
           RAJA::make_tuple(RAJA::RangeSegment{0, n},
                            RAJA::RangeSegment{0, n}),
           RAJA::tuple<Real_type>{0.0},
@@ -197,7 +208,7 @@ void POLYBENCH_GEMVER::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t
           poly_gemver_lam5
         );
 
-        RAJA::kernel_param<EXEC_POL24>(
+        RAJA::kernel_param<EXEC_POL4>(
           RAJA::make_tuple(RAJA::RangeSegment{0, n},
                            RAJA::RangeSegment{0, n}),
           RAJA::tuple<Real_type>{0.0},
diff --git a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp
new file mode 100644
index 000000000..32d715002
--- /dev/null
+++ b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp
@@ -0,0 +1,168 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "POLYBENCH_GEMVER.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace polybench
+{
+
+void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+
+  POLYBENCH_GEMVER_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), n,
+                         [=](Index_type i) {
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), n,
+                           [=](Index_type j) {
+            POLYBENCH_GEMVER_BODY1;
+          });
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), n,
+                         [=](Index_type i) {
+          POLYBENCH_GEMVER_BODY2;
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), n,
+                           [=,&dot](Index_type j) {
+            POLYBENCH_GEMVER_BODY3;
+          });
+          POLYBENCH_GEMVER_BODY4;
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), n,
+                         [=](Index_type i) {
+          POLYBENCH_GEMVER_BODY5;
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), n,
+                         [=](Index_type i) {
+          POLYBENCH_GEMVER_BODY6;
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), n,
+                           [=,&dot](Index_type j) {
+            POLYBENCH_GEMVER_BODY7;
+          });
+          POLYBENCH_GEMVER_BODY8;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto poly_gemver_base_lam1 = [=](Index_type i, Index_type j) {
+                                     POLYBENCH_GEMVER_BODY1;
+                                   };
+      auto poly_gemver_base_lam3 = [=](Index_type i, Index_type j, Real_type &dot) {
+                                     POLYBENCH_GEMVER_BODY3;
+                                   };
+      auto poly_gemver_base_lam4 = [=](Index_type i, Real_type &dot) {
+                                     POLYBENCH_GEMVER_BODY4;
+                                   };
+      auto poly_gemver_base_lam5 = [=](Index_type i) {
+                                     POLYBENCH_GEMVER_BODY5;
+                                   };
+      auto poly_gemver_base_lam7 = [=](Index_type i, Index_type j, Real_type &dot) {
+                                     POLYBENCH_GEMVER_BODY7;
+                                    };
+      auto poly_gemver_base_lam8 = [=](Index_type i, Real_type &dot) {
+                                     POLYBENCH_GEMVER_BODY8;
+                                   };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), n,
+                         [=](Index_type i) {
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), n,
+                           [=](Index_type j) {
+            poly_gemver_base_lam1(i, j);
+          });
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), n,
+                         [=](Index_type i) {
+          POLYBENCH_GEMVER_BODY2;
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), n,
+                           [=,&dot](Index_type j) {
+            poly_gemver_base_lam3(i, j, dot);
+          });
+          poly_gemver_base_lam4(i, dot);
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), n,
+                         [=](Index_type i) {
+          poly_gemver_base_lam5(i);
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), n,
+                         [=](Index_type i) {
+          POLYBENCH_GEMVER_BODY6;
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), n,
+                           [=,&dot](Index_type j) {
+            poly_gemver_base_lam7(i, j, dot);
+          });
+          poly_gemver_base_lam8(i, dot);
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  POLYBENCH_GEMVER : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace basic
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/polybench/POLYBENCH_GEMVER.cpp b/src/polybench/POLYBENCH_GEMVER.cpp
index 99e16324f..5e6414c2f 100644
--- a/src/polybench/POLYBENCH_GEMVER.cpp
+++ b/src/polybench/POLYBENCH_GEMVER.cpp
@@ -79,6 +79,9 @@ POLYBENCH_GEMVER::POLYBENCH_GEMVER(const RunParams& params)
   setVariantDefined( Base_HIP );
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 POLYBENCH_GEMVER::~POLYBENCH_GEMVER()
diff --git a/src/polybench/POLYBENCH_GEMVER.hpp b/src/polybench/POLYBENCH_GEMVER.hpp
index 07ecae962..cbf6ef605 100644
--- a/src/polybench/POLYBENCH_GEMVER.hpp
+++ b/src/polybench/POLYBENCH_GEMVER.hpp
@@ -18,11 +18,9 @@
 /// Note: this part of the kernel is modified to avoid
 ///       excessively large checksums
 /// for (Index_type i = 0; i < N; i++) {
-///   Real_type dot = 0.0;
 ///   for (Index_type j = 0; j < N; j++) {
-///     dot += beta * A[j][i] * y[j];
+///     x[i] = x[i] + beta * A[j][i] * y[j];
 ///   }
-///   x[i] = dot;
 /// }
 ///
 /// for (Index_type i = 0; i < N; i++) {
@@ -98,7 +96,7 @@
   xview(i) += zview(i);
 
 #define POLYBENCH_GEMVER_BODY6_RAJA \
-  dot = wview(i);
+  dot = w[i];
 
 #define POLYBENCH_GEMVER_BODY7_RAJA \
   dot +=  alpha * Aview(i,j) * xview(j);
@@ -152,6 +150,7 @@ class POLYBENCH_GEMVER : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/polybench/POLYBENCH_GESUMMV-Seq.cpp b/src/polybench/POLYBENCH_GESUMMV-Seq.cpp
index 642d0b463..3b8982696 100644
--- a/src/polybench/POLYBENCH_GESUMMV-Seq.cpp
+++ b/src/polybench/POLYBENCH_GESUMMV-Seq.cpp
@@ -93,9 +93,9 @@ void POLYBENCH_GESUMMV::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(
 
       using EXEC_POL =
         RAJA::KernelPolicy<
-          RAJA::statement::For<0, RAJA::loop_exec,         // i
+          RAJA::statement::For<0, RAJA::loop_exec,
             RAJA::statement::Lambda<0, RAJA::Params<0,1>>,
-            RAJA::statement::For<1, RAJA::loop_exec,       // j
+            RAJA::statement::For<1, RAJA::loop_exec,
               RAJA::statement::Lambda<1, RAJA::Segs<0, 1>, RAJA::Params<0,1>>
             >,
             RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0,1>>
diff --git a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp
new file mode 100644
index 000000000..23afa5f2b
--- /dev/null
+++ b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp
@@ -0,0 +1,100 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "POLYBENCH_GESUMMV.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace polybench
+{
+
+void POLYBENCH_GESUMMV::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps= getRunReps();
+
+  POLYBENCH_GESUMMV_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), N,
+                         [=](Index_type i) {
+          POLYBENCH_GESUMMV_BODY1;
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), N,
+                           [=,&tmpdot,&ydot](Index_type j) {
+            POLYBENCH_GESUMMV_BODY2;
+          });
+          POLYBENCH_GESUMMV_BODY3;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto poly_gesummv_base_lam2 = [=](Index_type i, Index_type j, Real_type& tmpdot, Real_type& ydot) {
+                                      POLYBENCH_GESUMMV_BODY2;
+                                    };
+      auto poly_gesummv_base_lam3 = [=](Index_type i, Real_type& tmpdot, Real_type& ydot) {
+                                      POLYBENCH_GESUMMV_BODY3;
+                                    };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), N,
+                         [=](Index_type i) {
+          POLYBENCH_GESUMMV_BODY1;
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), N,
+                           [=,&tmpdot,&ydot](Index_type j) {
+            poly_gesummv_base_lam2(i, j, tmpdot, ydot);
+          });
+          poly_gesummv_base_lam3(i, tmpdot, ydot);
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  POLYBENCH_GESUMMV : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace polybench
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/polybench/POLYBENCH_GESUMMV.cpp b/src/polybench/POLYBENCH_GESUMMV.cpp
index fdf07a58f..7e250b9e1 100644
--- a/src/polybench/POLYBENCH_GESUMMV.cpp
+++ b/src/polybench/POLYBENCH_GESUMMV.cpp
@@ -59,6 +59,9 @@ POLYBENCH_GESUMMV::POLYBENCH_GESUMMV(const RunParams& params)
 
   setVariantDefined( Base_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 POLYBENCH_GESUMMV::~POLYBENCH_GESUMMV()
diff --git a/src/polybench/POLYBENCH_GESUMMV.hpp b/src/polybench/POLYBENCH_GESUMMV.hpp
index 32a1b0eae..9a6552301 100644
--- a/src/polybench/POLYBENCH_GESUMMV.hpp
+++ b/src/polybench/POLYBENCH_GESUMMV.hpp
@@ -98,6 +98,7 @@ class POLYBENCH_GESUMMV : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/polybench/POLYBENCH_HEAT_3D-Seq.cpp b/src/polybench/POLYBENCH_HEAT_3D-Seq.cpp
index 7222e5934..1808e53b0 100644
--- a/src/polybench/POLYBENCH_HEAT_3D-Seq.cpp
+++ b/src/polybench/POLYBENCH_HEAT_3D-Seq.cpp
@@ -107,6 +107,13 @@ void POLYBENCH_HEAT_3D::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(
 
       POLYBENCH_HEAT_3D_VIEWS_RAJA;
 
+      auto poly_heat3d_lam1 = [=](Index_type i, Index_type j, Index_type k) {
+                                POLYBENCH_HEAT_3D_BODY1_RAJA;
+                              };
+      auto poly_heat3d_lam2 = [=](Index_type i, Index_type j, Index_type k) {
+                                POLYBENCH_HEAT_3D_BODY2_RAJA;
+                              };
+
       using EXEC_POL =
         RAJA::KernelPolicy<
           RAJA::statement::For<0, RAJA::loop_exec,
@@ -115,6 +122,13 @@ void POLYBENCH_HEAT_3D::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(
                 RAJA::statement::Lambda<0>
               >
             >
+          >,
+          RAJA::statement::For<0, RAJA::loop_exec,
+            RAJA::statement::For<1, RAJA::loop_exec,
+              RAJA::statement::For<2, RAJA::loop_exec,
+                RAJA::statement::Lambda<1>
+              >
+            >
           >
         >;
 
@@ -127,20 +141,8 @@ void POLYBENCH_HEAT_3D::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(
                                                    RAJA::RangeSegment{1, N-1},
                                                    RAJA::RangeSegment{1, N-1}),
 
-            [=](Index_type i, Index_type j, Index_type k) {
-              POLYBENCH_HEAT_3D_BODY1_RAJA;
-            }
-
-          );
-
-          RAJA::kernel<EXEC_POL>( RAJA::make_tuple(RAJA::RangeSegment{1, N-1},
-                                                   RAJA::RangeSegment{1, N-1},
-                                                   RAJA::RangeSegment{1, N-1}),
-
-            [=](Index_type i, Index_type j, Index_type k) {
-              POLYBENCH_HEAT_3D_BODY2_RAJA;
-            }
-
+            poly_heat3d_lam1,
+            poly_heat3d_lam2
           );
 
         }
diff --git a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp
new file mode 100644
index 000000000..1b70e2441
--- /dev/null
+++ b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp
@@ -0,0 +1,198 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "POLYBENCH_HEAT_3D.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace polybench
+{
+
+void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+
+  POLYBENCH_HEAT_3D_DATA_SETUP;
+
+#ifdef USE_STDPAR_COLLAPSE
+  const auto nn = N-2;
+#endif
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        for (Index_type t = 0; t < tsteps; ++t) { 
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), nn*nn*nn,
+                         [=](Index_type ijk) {
+              const auto i  = 1 + ijk / (nn*nn);
+              const auto jk = ijk % (nn*nn);
+              const auto j  = 1 + jk / nn;
+              const auto k  = 1 + jk % nn;
+#else
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(1), N-2,
+                           [=](Index_type i) {
+            std::for_each_n( std::execution::unseq,
+                             counting_iterator<Index_type>(1), N-2,
+                             [=](Index_type j) {
+              std::for_each_n( std::execution::unseq,
+                               counting_iterator<Index_type>(1), N-2,
+                               [=](Index_type k) {
+#endif
+                POLYBENCH_HEAT_3D_BODY1;
+#ifndef USE_STDPAR_COLLAPSE
+              });
+            });
+#endif
+          });
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), nn*nn*nn,
+                         [=](Index_type ijk) {
+              const auto i  = 1 + ijk / (nn*nn);
+              const auto jk = ijk % (nn*nn);
+              const auto j  = 1 + jk / nn;
+              const auto k  = 1 + jk % nn;
+#else
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(1), N-2,
+                           [=](Index_type i) {
+            std::for_each_n( std::execution::unseq,
+                             counting_iterator<Index_type>(1), N-2,
+                             [=](Index_type j) {
+              std::for_each_n( std::execution::unseq,
+                               counting_iterator<Index_type>(1), N-2,
+                               [=](Index_type k) {
+#endif
+                POLYBENCH_HEAT_3D_BODY2;
+#ifndef USE_STDPAR_COLLAPSE
+              });
+            });
+#endif
+          });
+
+        }
+
+      }
+      stopTimer();
+
+      POLYBENCH_HEAT_3D_DATA_RESET;
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto poly_heat3d_base_lam1 = [=](Index_type i, Index_type j, 
+                                       Index_type k) {
+                                     POLYBENCH_HEAT_3D_BODY1;
+                                   };
+      auto poly_heat3d_base_lam2 = [=](Index_type i, Index_type j, 
+                                       Index_type k) {
+                                     POLYBENCH_HEAT_3D_BODY2;
+                                   };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        for (Index_type t = 0; t < tsteps; ++t) {
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), nn*nn*nn,
+                         [=](Index_type ijk) {
+              const auto i  = 1 + ijk / (nn*nn);
+              const auto jk = ijk % (nn*nn);
+              const auto j  = 1 + jk / nn;
+              const auto k  = 1 + jk % nn;
+#else
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(1), N-2,
+                           [=](Index_type i) {
+            std::for_each_n( std::execution::unseq,
+                             counting_iterator<Index_type>(1), N-2,
+                             [=](Index_type j) {
+              std::for_each_n( std::execution::unseq,
+                               counting_iterator<Index_type>(1), N-2,
+                               [=](Index_type k) {
+#endif
+                poly_heat3d_base_lam1(i, j, k);
+#ifndef USE_STDPAR_COLLAPSE
+              });
+            });
+#endif
+          });
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), nn*nn*nn,
+                         [=](Index_type ijk) {
+              const auto i  = 1 + ijk / (nn*nn);
+              const auto jk = ijk % (nn*nn);
+              const auto j  = 1 + jk / nn;
+              const auto k  = 1 + jk % nn;
+#else
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(1), N-2,
+                           [=](Index_type i) {
+            std::for_each_n( std::execution::unseq,
+                             counting_iterator<Index_type>(1), N-2,
+                             [=](Index_type j) {
+              std::for_each_n( std::execution::unseq,
+                               counting_iterator<Index_type>(1), N-2,
+                               [=](Index_type k) {
+#endif
+                poly_heat3d_base_lam2(i, j, k);
+#ifndef USE_STDPAR_COLLAPSE
+              });
+            });
+#endif
+          });
+
+        }
+
+      }
+      stopTimer();
+
+      POLYBENCH_HEAT_3D_DATA_RESET;
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  POLYBENCH_HEAT_3D : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace polybench
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/polybench/POLYBENCH_HEAT_3D.cpp b/src/polybench/POLYBENCH_HEAT_3D.cpp
index be6c0e218..506738787 100644
--- a/src/polybench/POLYBENCH_HEAT_3D.cpp
+++ b/src/polybench/POLYBENCH_HEAT_3D.cpp
@@ -70,6 +70,9 @@ POLYBENCH_HEAT_3D::POLYBENCH_HEAT_3D(const RunParams& params)
   setVariantDefined( Base_HIP );
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 POLYBENCH_HEAT_3D::~POLYBENCH_HEAT_3D()
diff --git a/src/polybench/POLYBENCH_HEAT_3D.hpp b/src/polybench/POLYBENCH_HEAT_3D.hpp
index 8d7eff93c..fd641dbed 100644
--- a/src/polybench/POLYBENCH_HEAT_3D.hpp
+++ b/src/polybench/POLYBENCH_HEAT_3D.hpp
@@ -124,6 +124,7 @@ class POLYBENCH_HEAT_3D : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
@@ -140,6 +141,8 @@ class POLYBENCH_HEAT_3D : public KernelBase
   Index_type m_N;
   Index_type m_tsteps;
 
+  Real_type m_factor;
+
   Real_ptr m_A;
   Real_ptr m_B;
   Real_ptr m_Ainit;
diff --git a/src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp b/src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp
index a7e81fe87..3c6e34bdc 100644
--- a/src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp
+++ b/src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp
@@ -18,7 +18,6 @@ namespace rajaperf
 namespace polybench
 {
 
-
 void POLYBENCH_JACOBI_1D::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
 {
   const Index_type run_reps= getRunReps();
diff --git a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp
new file mode 100644
index 000000000..ba3a65f9d
--- /dev/null
+++ b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp
@@ -0,0 +1,110 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "POLYBENCH_JACOBI_1D.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace polybench
+{
+
+void POLYBENCH_JACOBI_1D::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps= getRunReps();
+
+  POLYBENCH_JACOBI_1D_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        for (Index_type t = 0; t < tsteps; ++t) { 
+
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(1), N-2,
+                           [=](Index_type i) {
+            POLYBENCH_JACOBI_1D_BODY1;
+          });
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(1), N-2,
+                           [=](Index_type i) {
+            POLYBENCH_JACOBI_1D_BODY2;
+          });
+
+        }
+
+      }
+      stopTimer();
+
+      POLYBENCH_JACOBI_1D_DATA_RESET;
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto poly_jacobi1d_lam1 = [=] (Index_type i) {
+                                  POLYBENCH_JACOBI_1D_BODY1;
+                                };
+      auto poly_jacobi1d_lam2 = [=] (Index_type i) {
+                                  POLYBENCH_JACOBI_1D_BODY2;
+                                };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        for (Index_type t = 0; t < tsteps; ++t) {
+
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(1), N-2,
+                           [=](Index_type i) {
+            poly_jacobi1d_lam1(i);
+          });
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(1), N-2,
+                           [=](Index_type i) {
+            poly_jacobi1d_lam2(i);
+          });
+
+        }
+
+      }
+      stopTimer();
+
+      POLYBENCH_JACOBI_1D_DATA_RESET;
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  POLYBENCH_JACOBI_1D : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace polybench
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/polybench/POLYBENCH_JACOBI_1D.cpp b/src/polybench/POLYBENCH_JACOBI_1D.cpp
index 925cd2682..33ca0dc3e 100644
--- a/src/polybench/POLYBENCH_JACOBI_1D.cpp
+++ b/src/polybench/POLYBENCH_JACOBI_1D.cpp
@@ -67,6 +67,9 @@ POLYBENCH_JACOBI_1D::POLYBENCH_JACOBI_1D(const RunParams& params)
 
   setVariantDefined( Base_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 POLYBENCH_JACOBI_1D::~POLYBENCH_JACOBI_1D()
diff --git a/src/polybench/POLYBENCH_JACOBI_1D.hpp b/src/polybench/POLYBENCH_JACOBI_1D.hpp
index 035096f89..4a94f891e 100644
--- a/src/polybench/POLYBENCH_JACOBI_1D.hpp
+++ b/src/polybench/POLYBENCH_JACOBI_1D.hpp
@@ -70,6 +70,7 @@ class POLYBENCH_JACOBI_1D : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp
new file mode 100644
index 000000000..948113937
--- /dev/null
+++ b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp
@@ -0,0 +1,169 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "POLYBENCH_JACOBI_2D.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace polybench
+{
+
+void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps= getRunReps();
+
+  POLYBENCH_JACOBI_2D_DATA_SETUP;
+
+  const auto n2 = (N-2);
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        for (Index_type t = 0; t < tsteps; ++t) { 
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), n2*n2,
+                         [=](Index_type ij) {
+              const auto i  = 1 + ij / n2;
+              const auto j  = 1 + ij % n2;
+#else
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(1), n2,
+                           [=](Index_type i) {
+            std::for_each_n( std::execution::unseq,
+                             counting_iterator<Index_type>(1), n2,
+                             [=](Index_type j) {
+#endif
+              POLYBENCH_JACOBI_2D_BODY1;
+#ifndef USE_STDPAR_COLLAPSE
+            });
+#endif
+          });
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), n2*n2,
+                         [=](Index_type ij) {
+              const auto i  = 1 + ij / n2;
+              const auto j  = 1 + ij % n2;
+#else
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(1), n2,
+                           [=](Index_type i) {
+            std::for_each_n( std::execution::unseq,
+                             counting_iterator<Index_type>(1), n2,
+                             [=](Index_type j) {
+#endif
+              POLYBENCH_JACOBI_2D_BODY2;
+#ifndef USE_STDPAR_COLLAPSE
+            });
+#endif
+          });
+
+        }
+
+      }
+      stopTimer();
+
+      POLYBENCH_JACOBI_2D_DATA_RESET;
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto poly_jacobi2d_base_lam1 = [=](Index_type i, Index_type j) {
+                                       POLYBENCH_JACOBI_2D_BODY1;
+                                     };
+      auto poly_jacobi2d_base_lam2 = [=](Index_type i, Index_type j) {
+                                       POLYBENCH_JACOBI_2D_BODY2;
+                                     };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        for (Index_type t = 0; t < tsteps; ++t) {
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), n2*n2,
+                         [=](Index_type ij) {
+              const auto i  = 1 + ij / n2;
+              const auto j  = 1 + ij % n2;
+#else
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(1), n2,
+                           [=](Index_type i) {
+            std::for_each_n( std::execution::unseq,
+                             counting_iterator<Index_type>(1), n2,
+                             [=](Index_type j) {
+#endif
+              poly_jacobi2d_base_lam1(i, j);
+#ifndef USE_STDPAR_COLLAPSE
+            });
+#endif
+          });
+
+#ifdef USE_STDPAR_COLLAPSE
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), n2*n2,
+                         [=](Index_type ij) {
+              const auto i  = 1 + ij / n2;
+              const auto j  = 1 + ij % n2;
+#else
+          std::for_each_n( std::execution::par_unseq,
+                           counting_iterator<Index_type>(1), n2,
+                           [=](Index_type i) {
+            std::for_each_n( std::execution::unseq,
+                             counting_iterator<Index_type>(1), n2,
+                             [=](Index_type j) {
+#endif
+              poly_jacobi2d_base_lam2(i, j);
+#ifndef USE_STDPAR_COLLAPSE
+            });
+#endif
+          });
+
+        }
+
+      }
+      stopTimer();
+
+      POLYBENCH_JACOBI_2D_DATA_RESET;
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  POLYBENCH_JACOBI_2D : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace polybench
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/polybench/POLYBENCH_JACOBI_2D.cpp b/src/polybench/POLYBENCH_JACOBI_2D.cpp
index e1b62d364..4e1d97598 100644
--- a/src/polybench/POLYBENCH_JACOBI_2D.cpp
+++ b/src/polybench/POLYBENCH_JACOBI_2D.cpp
@@ -69,6 +69,9 @@ POLYBENCH_JACOBI_2D::POLYBENCH_JACOBI_2D(const RunParams& params)
   setVariantDefined( Base_HIP );
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 POLYBENCH_JACOBI_2D::~POLYBENCH_JACOBI_2D()
diff --git a/src/polybench/POLYBENCH_JACOBI_2D.hpp b/src/polybench/POLYBENCH_JACOBI_2D.hpp
index 49ab2cd40..17bd86c41 100644
--- a/src/polybench/POLYBENCH_JACOBI_2D.hpp
+++ b/src/polybench/POLYBENCH_JACOBI_2D.hpp
@@ -90,6 +90,7 @@ class POLYBENCH_JACOBI_2D : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/polybench/POLYBENCH_MVT-StdPar.cpp b/src/polybench/POLYBENCH_MVT-StdPar.cpp
new file mode 100644
index 000000000..27867a184
--- /dev/null
+++ b/src/polybench/POLYBENCH_MVT-StdPar.cpp
@@ -0,0 +1,132 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "POLYBENCH_MVT.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+namespace rajaperf 
+{
+namespace polybench
+{
+
+void POLYBENCH_MVT::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps= getRunReps();
+
+  POLYBENCH_MVT_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), N,
+                         [=](Index_type i) {
+          POLYBENCH_MVT_BODY1;
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), N,
+                           [=,&dot](Index_type j) {
+            POLYBENCH_MVT_BODY2;
+          });
+          POLYBENCH_MVT_BODY3;
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), N,
+                         [=](Index_type i) {
+          POLYBENCH_MVT_BODY4;
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), N,
+                           [=,&dot](Index_type j) {
+            POLYBENCH_MVT_BODY5;
+          });
+          POLYBENCH_MVT_BODY6;
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto poly_mvt_base_lam2 = [=] (Index_type i, Index_type j, 
+                                     Real_type &dot) {
+                                  POLYBENCH_MVT_BODY2;
+                                 };
+      auto poly_mvt_base_lam3 = [=] (Index_type i, 
+                                     Real_type &dot) {
+                                  POLYBENCH_MVT_BODY3;
+                                };
+      auto poly_mvt_base_lam5 = [=] (Index_type i, Index_type j, 
+                                     Real_type &dot) {
+                                  POLYBENCH_MVT_BODY5;
+                                };
+      auto poly_mvt_base_lam6 = [=] (Index_type i, 
+                                     Real_type &dot) {
+                                  POLYBENCH_MVT_BODY6;
+                                };
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), N,
+                         [=](Index_type i) {
+          POLYBENCH_MVT_BODY1;
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), N,
+                           [=,&dot](Index_type j) {
+            poly_mvt_base_lam2(i, j, dot);
+          });
+          poly_mvt_base_lam3(i, dot);
+        });
+
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(0), N,
+                         [=](Index_type i) {
+          POLYBENCH_MVT_BODY4;
+          std::for_each_n( std::execution::unseq,
+                           counting_iterator<Index_type>(0), N,
+                           [=,&dot](Index_type j) {
+            poly_mvt_base_lam5(i, j, dot);
+          });
+          poly_mvt_base_lam6(i, dot);
+        });
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  POLYBENCH_MVT : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace polybench
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/polybench/POLYBENCH_MVT.cpp b/src/polybench/POLYBENCH_MVT.cpp
index e58065f28..637431ed9 100644
--- a/src/polybench/POLYBENCH_MVT.cpp
+++ b/src/polybench/POLYBENCH_MVT.cpp
@@ -62,6 +62,9 @@ POLYBENCH_MVT::POLYBENCH_MVT(const RunParams& params)
 
   setVariantDefined( Base_HIP );
   setVariantDefined( RAJA_HIP );
+
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
 }
 
 POLYBENCH_MVT::~POLYBENCH_MVT()
diff --git a/src/polybench/POLYBENCH_MVT.hpp b/src/polybench/POLYBENCH_MVT.hpp
index 518d75dd8..fb0adbcb9 100644
--- a/src/polybench/POLYBENCH_MVT.hpp
+++ b/src/polybench/POLYBENCH_MVT.hpp
@@ -112,6 +112,7 @@ class POLYBENCH_MVT : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
   void setHipTuningDefinitions(VariantID vid);
diff --git a/src/stream/ADD-StdPar.cpp b/src/stream/ADD-StdPar.cpp
new file mode 100644
index 000000000..968a24f58
--- /dev/null
+++ b/src/stream/ADD-StdPar.cpp
@@ -0,0 +1,105 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "ADD.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf
+{
+namespace stream
+{
+
+
+void ADD::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  ADD_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#if 0
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          ADD_BODY;
+        });
+#else
+        std::transform( std::execution::par_unseq,
+                        &a[ibegin], &a[iend], &b[ibegin], &c[ibegin],
+                        [=](Real_type a, Real_type b) { return a + b; });
+#endif
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+#if 0
+      auto add_lam = [=](Index_type i) {
+                       ADD_BODY;
+                     };
+#else
+      auto add_lam = [=](Real_type a, Real_type b) {
+                       return a + b;
+                      };
+#endif
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#if 0
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          add_lam(i);
+        });
+#else
+        std::transform( std::execution::par_unseq,
+                        &a[ibegin], &a[iend], &b[ibegin], &c[ibegin],
+                        add_lam );
+#endif
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  ADD : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace stream
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/stream/ADD.cpp b/src/stream/ADD.cpp
index 3ca91bed5..c58a73322 100644
--- a/src/stream/ADD.cpp
+++ b/src/stream/ADD.cpp
@@ -53,6 +53,9 @@ ADD::ADD(const RunParams& params)
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
 
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
+
   setVariantDefined( Kokkos_Lambda );
 }
 
diff --git a/src/stream/ADD.hpp b/src/stream/ADD.hpp
index 49e09a602..34ed50cf0 100644
--- a/src/stream/ADD.hpp
+++ b/src/stream/ADD.hpp
@@ -52,6 +52,7 @@ class ADD : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
diff --git a/src/stream/CMakeLists.txt b/src/stream/CMakeLists.txt
index 03351ff5d..7a0e0b1fc 100644
--- a/src/stream/CMakeLists.txt
+++ b/src/stream/CMakeLists.txt
@@ -10,30 +10,35 @@ blt_add_library(
   NAME stream
   SOURCES ADD.cpp
           ADD-Seq.cpp 
+          ADD-StdPar.cpp 
           ADD-Hip.cpp
           ADD-Cuda.cpp
           ADD-OMP.cpp
           ADD-OMPTarget.cpp
           COPY.cpp 
           COPY-Seq.cpp 
+          COPY-StdPar.cpp 
           COPY-Hip.cpp
           COPY-Cuda.cpp
           COPY-OMP.cpp
           COPY-OMPTarget.cpp
           DOT.cpp 
           DOT-Seq.cpp 
+          DOT-StdPar.cpp 
           DOT-Hip.cpp 
           DOT-Cuda.cpp 
           DOT-OMP.cpp 
           DOT-OMPTarget.cpp 
           MUL.cpp 
           MUL-Seq.cpp 
+          MUL-StdPar.cpp 
           MUL-Hip.cpp 
           MUL-Cuda.cpp 
           MUL-OMP.cpp 
           MUL-OMPTarget.cpp 
           TRIAD.cpp 
           TRIAD-Seq.cpp 
+          TRIAD-StdPar.cpp 
           TRIAD-Hip.cpp 
           TRIAD-Cuda.cpp 
           TRIAD-OMPTarget.cpp 
diff --git a/src/stream/COPY-StdPar.cpp b/src/stream/COPY-StdPar.cpp
new file mode 100644
index 000000000..488350a56
--- /dev/null
+++ b/src/stream/COPY-StdPar.cpp
@@ -0,0 +1,96 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "COPY.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf
+{
+namespace stream
+{
+
+
+void COPY::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  COPY_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#if 0
+        std::copy( std::execution::par_unseq,
+                   &a[ibegin], &a[iend], &c[ibegin]);
+#else
+        std::transform( std::execution::par_unseq,
+                        &a[ibegin], &a[iend], &c[ibegin],
+                        [=](Real_type a) { return a; });
+#endif
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+#if 1
+      auto copy_lam = [=](Real_type a) {
+                       return a;
+                      };
+#endif
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#if 0
+        std::copy( std::execution::par_unseq,
+                   &a[ibegin], &a[iend], &c[ibegin]);
+#else
+        std::transform( std::execution::par_unseq,
+                        &a[ibegin], &a[iend], &c[ibegin],
+                        copy_lam );
+#endif
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  COPY : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace stream
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/stream/COPY.cpp b/src/stream/COPY.cpp
index 9cbfcbff2..51df2fafb 100644
--- a/src/stream/COPY.cpp
+++ b/src/stream/COPY.cpp
@@ -53,6 +53,9 @@ COPY::COPY(const RunParams& params)
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
 
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
+
   setVariantDefined( Kokkos_Lambda );
 }
 
diff --git a/src/stream/COPY.hpp b/src/stream/COPY.hpp
index 0544e0d2f..574364388 100644
--- a/src/stream/COPY.hpp
+++ b/src/stream/COPY.hpp
@@ -51,6 +51,7 @@ class COPY : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
diff --git a/src/stream/DOT-StdPar.cpp b/src/stream/DOT-StdPar.cpp
new file mode 100644
index 000000000..b00a9c5a7
--- /dev/null
+++ b/src/stream/DOT-StdPar.cpp
@@ -0,0 +1,96 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "DOT.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace stream
+{
+
+
+void DOT::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  DOT_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        Real_type dot = m_dot_init;
+
+        dot += std::transform_reduce( std::execution::par_unseq,
+                                      &a[ibegin], &a[iend], &b[ibegin],
+                                      (Real_type)0);
+
+        m_dot += dot;
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+      auto dot_base_lam = [=](Index_type i) -> Real_type {
+                            return a[i] * b[i];
+                          };
+
+      auto begin = counting_iterator<Index_type>(ibegin);
+      auto end   = counting_iterator<Index_type>(iend);
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+        Real_type dot = m_dot_init;
+
+        dot += std::transform_reduce( std::execution::par_unseq,
+                                      begin,end,
+                                      (Real_type)0,
+                                      std::plus<Real_type>(),
+                                      dot_base_lam);
+
+        m_dot += dot;
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  DOT : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace stream
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/stream/DOT.cpp b/src/stream/DOT.cpp
index cc32be5f2..4e4d713be 100644
--- a/src/stream/DOT.cpp
+++ b/src/stream/DOT.cpp
@@ -53,6 +53,9 @@ DOT::DOT(const RunParams& params)
   setVariantDefined( Base_HIP );
   setVariantDefined( RAJA_HIP );
 
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
+
   setVariantDefined( Kokkos_Lambda );
 }
 
diff --git a/src/stream/DOT.hpp b/src/stream/DOT.hpp
index 5912c120a..55cab2826 100644
--- a/src/stream/DOT.hpp
+++ b/src/stream/DOT.hpp
@@ -51,6 +51,7 @@ class DOT : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
diff --git a/src/stream/MUL-StdPar.cpp b/src/stream/MUL-StdPar.cpp
new file mode 100644
index 000000000..731cee15d
--- /dev/null
+++ b/src/stream/MUL-StdPar.cpp
@@ -0,0 +1,105 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "MUL.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace stream
+{
+
+
+void MUL::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  MUL_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#if 0
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          MUL_BODY;
+        });
+#else
+        std::transform( std::execution::par_unseq,
+                        &c[ibegin], &c[iend], &b[ibegin],
+                        [=](Real_type c) { return alpha * c; });
+#endif
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+#if 0
+      auto mul_lam = [=](Index_type i) {
+                       MUL_BODY;
+                     };
+#else
+      auto mul_lam = [=](Real_type c) {
+                       return alpha * c;
+                      };
+#endif
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#if 0
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          mul_lam(i);
+        });
+#else
+        std::transform( std::execution::par_unseq,
+                        &c[ibegin], &c[iend], &b[ibegin],
+                        mul_lam );
+#endif
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  MUL : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace stream
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/stream/MUL.cpp b/src/stream/MUL.cpp
index 74ce32cb0..0bad0d536 100644
--- a/src/stream/MUL.cpp
+++ b/src/stream/MUL.cpp
@@ -53,6 +53,9 @@ MUL::MUL(const RunParams& params)
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
 
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
+
   setVariantDefined( Kokkos_Lambda );
 }
 
diff --git a/src/stream/MUL.hpp b/src/stream/MUL.hpp
index 3db59092a..4596a7d39 100644
--- a/src/stream/MUL.hpp
+++ b/src/stream/MUL.hpp
@@ -52,6 +52,7 @@ class MUL : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);
diff --git a/src/stream/TRIAD-StdPar.cpp b/src/stream/TRIAD-StdPar.cpp
new file mode 100644
index 000000000..93d08a2dd
--- /dev/null
+++ b/src/stream/TRIAD-StdPar.cpp
@@ -0,0 +1,105 @@
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC
+// and RAJA Performance Suite project contributors.
+// See the RAJAPerf/LICENSE file for details.
+//
+// SPDX-License-Identifier: (BSD-3-Clause)
+//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
+
+#include "TRIAD.hpp"
+
+#include "RAJA/RAJA.hpp"
+
+#if defined(BUILD_STDPAR)
+
+#include "common/StdParUtils.hpp"
+
+#include <iostream>
+
+namespace rajaperf 
+{
+namespace stream
+{
+
+
+void TRIAD::runStdParVariant(VariantID vid, size_t tune_idx)
+{
+#if defined(RUN_STDPAR)
+
+  const Index_type run_reps = getRunReps();
+  const Index_type ibegin = 0;
+  const Index_type iend = getActualProblemSize();
+
+  TRIAD_DATA_SETUP;
+
+  switch ( vid ) {
+
+    case Base_StdPar : {
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#if 0
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          TRIAD_BODY;
+        });
+#else
+        std::transform( std::execution::par_unseq,
+                        &b[ibegin], &b[iend], &c[ibegin], &a[ibegin],
+                        [=](Real_type b, Real_type c) { return b + alpha * c; });
+#endif
+
+      }
+      stopTimer();
+
+      break;
+    }
+
+    case Lambda_StdPar : {
+
+#if 0
+      auto triad_lam = [=](Index_type i) {
+                         TRIAD_BODY;
+                       };
+#else
+      auto triad_lam = [=](Real_type b, Real_type c) {
+                        return b + alpha * c;
+                       };
+#endif
+
+      startTimer();
+      for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
+
+#if 0
+        std::for_each_n( std::execution::par_unseq,
+                         counting_iterator<Index_type>(ibegin), iend-ibegin,
+                         [=](Index_type i) {
+          triad_lam(i);
+        });
+#else
+        std::transform( std::execution::par_unseq,
+                        &b[ibegin], &b[iend], &c[ibegin], &a[ibegin],
+                        triad_lam );
+#endif
+      }
+      stopTimer();
+
+      break;
+    }
+
+    default : {
+      getCout() << "\n  TRIAD : Unknown variant id = " << vid << std::endl;
+    }
+
+  }
+
+#endif
+}
+
+} // end namespace stream
+} // end namespace rajaperf
+
+#endif  // BUILD_STDPAR
+
diff --git a/src/stream/TRIAD.cpp b/src/stream/TRIAD.cpp
index 4790707bb..cb26ba43c 100644
--- a/src/stream/TRIAD.cpp
+++ b/src/stream/TRIAD.cpp
@@ -57,6 +57,9 @@ TRIAD::TRIAD(const RunParams& params)
   setVariantDefined( Lambda_HIP );
   setVariantDefined( RAJA_HIP );
 
+  setVariantDefined( Base_StdPar );
+  setVariantDefined( Lambda_StdPar );
+
   setVariantDefined( Kokkos_Lambda );
 }
 
diff --git a/src/stream/TRIAD.hpp b/src/stream/TRIAD.hpp
index 3f65bf804..1f7a3dd41 100644
--- a/src/stream/TRIAD.hpp
+++ b/src/stream/TRIAD.hpp
@@ -53,6 +53,7 @@ class TRIAD : public KernelBase
   void runCudaVariant(VariantID vid, size_t tune_idx);
   void runHipVariant(VariantID vid, size_t tune_idx);
   void runOpenMPTargetVariant(VariantID vid, size_t tune_idx);
+  void runStdParVariant(VariantID vid, size_t tune_idx);
   void runKokkosVariant(VariantID vid, size_t tune_idx);
 
   void setCudaTuningDefinitions(VariantID vid);