From 2543ff83115c63ff78f29c891d387c59a9c3bcad Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 7 Jul 2022 17:25:27 +0300 Subject: [PATCH 001/174] starting over with StdPar because git submodules are trash --- CMakeLists.txt | 6 ++++ src/algorithm/CMakeLists.txt | 2 ++ src/algorithm/MEMCPY.hpp | 1 + src/algorithm/MEMSET.hpp | 1 + src/algorithm/REDUCE_SUM.hpp | 1 + src/algorithm/SCAN.hpp | 1 + src/algorithm/SORT.cpp | 3 ++ src/algorithm/SORT.hpp | 1 + src/algorithm/SORTPAIRS.cpp | 3 ++ src/algorithm/SORTPAIRS.hpp | 1 + src/apps/CMakeLists.txt | 11 +++++++ src/apps/CONVECTION3DPA.hpp | 1 + src/apps/DEL_DOT_VEC_2D.cpp | 4 +++ src/apps/DEL_DOT_VEC_2D.hpp | 1 + src/apps/DIFFUSION3DPA.cpp | 2 ++ src/apps/DIFFUSION3DPA.hpp | 1 + src/apps/ENERGY.cpp | 4 +++ src/apps/ENERGY.hpp | 1 + src/apps/FIR.cpp | 4 +++ src/apps/FIR.hpp | 1 + src/apps/HALOEXCHANGE.cpp | 4 +++ src/apps/HALOEXCHANGE.hpp | 1 + src/apps/HALOEXCHANGE_FUSED.cpp | 4 +++ src/apps/HALOEXCHANGE_FUSED.hpp | 1 + src/apps/LTIMES.cpp | 4 +++ src/apps/LTIMES.hpp | 1 + src/apps/LTIMES_NOVIEW.cpp | 4 +++ src/apps/LTIMES_NOVIEW.hpp | 1 + src/apps/MASS3DPA.cpp | 2 ++ src/apps/MASS3DPA.hpp | 1 + src/apps/NODAL_ACCUMULATION_3D.hpp | 1 + src/apps/PRESSURE.cpp | 4 +++ src/apps/PRESSURE.hpp | 1 + src/apps/VOL3D.cpp | 4 +++ src/apps/VOL3D.hpp | 1 + src/apps/WIP-COUPLE.hpp | 1 + src/basic/CMakeLists.txt | 12 ++++++++ src/basic/DAXPY.cpp | 4 +++ src/basic/DAXPY.hpp | 1 + src/basic/DAXPY_ATOMIC.hpp | 1 + src/basic/IF_QUAD.cpp | 4 +++ src/basic/IF_QUAD.hpp | 1 + src/basic/INDEXLIST.hpp | 1 + src/basic/INDEXLIST_3LOOP.hpp | 1 + src/basic/INIT3.cpp | 4 +++ src/basic/INIT3.hpp | 1 + src/basic/INIT_VIEW1D.cpp | 4 +++ src/basic/INIT_VIEW1D.hpp | 1 + src/basic/INIT_VIEW1D_OFFSET.cpp | 4 +++ src/basic/INIT_VIEW1D_OFFSET.hpp | 1 + src/basic/MAT_MAT_SHARED.cpp | 4 +++ src/basic/MAT_MAT_SHARED.hpp | 1 + src/basic/MULADDSUB.cpp | 4 +++ src/basic/MULADDSUB.hpp | 1 + src/basic/NESTED_INIT-Seq.cpp | 1 + src/basic/NESTED_INIT.cpp | 4 +++ src/basic/NESTED_INIT.hpp | 1 + src/basic/PI_ATOMIC.cpp | 4 +++ src/basic/PI_ATOMIC.hpp | 1 + src/basic/PI_REDUCE.cpp | 4 +++ src/basic/PI_REDUCE.hpp | 1 + src/basic/REDUCE3_INT.cpp | 4 +++ src/basic/REDUCE3_INT.hpp | 1 + src/basic/REDUCE_STRUCT.hpp | 1 + src/basic/TRAP_INT.cpp | 4 +++ src/basic/TRAP_INT.hpp | 1 + src/common/KernelBase.cpp | 16 ++++++++++ src/common/KernelBase.hpp | 1 + src/common/RAJAPerfSuite.cpp | 24 +++++++++++++++ src/common/RAJAPerfSuite.hpp | 4 +++ src/lcals/CMakeLists.txt | 11 +++++++ src/lcals/DIFF_PREDICT.cpp | 4 +++ src/lcals/DIFF_PREDICT.hpp | 1 + src/lcals/EOS.cpp | 4 +++ src/lcals/EOS.hpp | 1 + src/lcals/FIRST_DIFF.cpp | 6 +++- src/lcals/FIRST_DIFF.hpp | 1 + src/lcals/FIRST_MIN.cpp | 4 +++ src/lcals/FIRST_MIN.hpp | 1 + src/lcals/FIRST_SUM.cpp | 4 +++ src/lcals/FIRST_SUM.hpp | 1 + src/lcals/GEN_LIN_RECUR.cpp | 4 +++ src/lcals/GEN_LIN_RECUR.hpp | 1 + src/lcals/HYDRO_1D.cpp | 4 +++ src/lcals/HYDRO_1D.hpp | 1 + src/lcals/HYDRO_2D.cpp | 4 +++ src/lcals/HYDRO_2D.hpp | 1 + src/lcals/INT_PREDICT.cpp | 4 +++ src/lcals/INT_PREDICT.hpp | 1 + src/lcals/PLANCKIAN.cpp | 4 +++ src/lcals/PLANCKIAN.hpp | 1 + src/lcals/TRIDIAG_ELIM.cpp | 4 +++ src/lcals/TRIDIAG_ELIM.hpp | 1 + src/polybench/CMakeLists.txt | 13 ++++++++ src/polybench/POLYBENCH_2MM.cpp | 4 +++ src/polybench/POLYBENCH_2MM.hpp | 1 + src/polybench/POLYBENCH_3MM-Seq.cpp | 1 - src/polybench/POLYBENCH_3MM.cpp | 4 +++ src/polybench/POLYBENCH_3MM.hpp | 1 + src/polybench/POLYBENCH_ADI.cpp | 4 +++ src/polybench/POLYBENCH_ADI.hpp | 1 + src/polybench/POLYBENCH_ATAX.cpp | 4 +++ src/polybench/POLYBENCH_ATAX.hpp | 1 + src/polybench/POLYBENCH_FDTD_2D.cpp | 4 +++ src/polybench/POLYBENCH_FDTD_2D.hpp | 1 + .../POLYBENCH_FLOYD_WARSHALL-Seq.cpp | 1 - src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp | 4 +++ src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp | 1 + src/polybench/POLYBENCH_GEMM.cpp | 4 +++ src/polybench/POLYBENCH_GEMM.hpp | 1 + src/polybench/POLYBENCH_GEMVER-Seq.cpp | 21 +++++++++---- src/polybench/POLYBENCH_GEMVER.cpp | 4 +++ src/polybench/POLYBENCH_GEMVER.hpp | 7 ++--- src/polybench/POLYBENCH_GESUMMV-Seq.cpp | 4 +-- src/polybench/POLYBENCH_GESUMMV.cpp | 4 +++ src/polybench/POLYBENCH_GESUMMV.hpp | 1 + src/polybench/POLYBENCH_HEAT_3D-Seq.cpp | 30 ++++++++++--------- src/polybench/POLYBENCH_HEAT_3D.cpp | 4 +++ src/polybench/POLYBENCH_HEAT_3D.hpp | 3 ++ src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp | 1 - src/polybench/POLYBENCH_JACOBI_1D.cpp | 4 +++ src/polybench/POLYBENCH_JACOBI_1D.hpp | 1 + src/polybench/POLYBENCH_JACOBI_2D.cpp | 4 +++ src/polybench/POLYBENCH_JACOBI_2D.hpp | 1 + src/polybench/POLYBENCH_MVT.cpp | 4 +++ src/polybench/POLYBENCH_MVT.hpp | 1 + src/stream/ADD-Seq.cpp | 1 - src/stream/ADD.cpp | 4 +++ src/stream/ADD.hpp | 1 + src/stream/CMakeLists.txt | 5 ++++ src/stream/COPY.cpp | 4 +++ src/stream/COPY.hpp | 1 + src/stream/DOT.cpp | 4 +++ src/stream/DOT.hpp | 1 + src/stream/MUL.cpp | 4 +++ src/stream/MUL.hpp | 1 + src/stream/TRIAD.cpp | 4 +++ src/stream/TRIAD.hpp | 1 + 138 files changed, 420 insertions(+), 30 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 83a574af2..50670be17 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,9 @@ endif() if(ENABLE_KOKKOS) set(CMAKE_CXX_STANDARD 17) set(BLT_CXX_STD c++17) +elseif (ENABLE_STDPAR) + set(CMAKE_CXX_STANDARD 20) + set(BLT_CXX_STD c++14) else() set(CMAKE_CXX_STANDARD 14) set(BLT_CXX_STD c++14) @@ -90,6 +93,9 @@ endif () if (ENABLE_OPENMP) add_definitions(-DRUN_OPENMP) endif () +if (ENABLE_STDPAR) + add_definitions(-DRUN_STDPAR) +endif () set(RAJA_PERFSUITE_VERSION_MAJOR 0) set(RAJA_PERFSUITE_VERSION_MINOR 11) diff --git a/src/algorithm/CMakeLists.txt b/src/algorithm/CMakeLists.txt index 73bea70d2..7c0fcd39f 100644 --- a/src/algorithm/CMakeLists.txt +++ b/src/algorithm/CMakeLists.txt @@ -15,11 +15,13 @@ blt_add_library( SCAN-OMP.cpp SORT.cpp SORT-Seq.cpp + SORT-StdPar.cpp SORT-Hip.cpp SORT-Cuda.cpp SORT-OMP.cpp SORTPAIRS.cpp SORTPAIRS-Seq.cpp + SORTPAIRS-StdPar.cpp SORTPAIRS-Hip.cpp SORTPAIRS-Cuda.cpp SORTPAIRS-OMP.cpp diff --git a/src/algorithm/MEMCPY.hpp b/src/algorithm/MEMCPY.hpp index 67fff5255..2477115ce 100644 --- a/src/algorithm/MEMCPY.hpp +++ b/src/algorithm/MEMCPY.hpp @@ -54,6 +54,7 @@ class MEMCPY : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setSeqTuningDefinitions(VariantID vid); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/algorithm/MEMSET.hpp b/src/algorithm/MEMSET.hpp index 8edc5b611..0e9630fa7 100644 --- a/src/algorithm/MEMSET.hpp +++ b/src/algorithm/MEMSET.hpp @@ -54,6 +54,7 @@ class MEMSET : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setSeqTuningDefinitions(VariantID vid); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/algorithm/REDUCE_SUM.hpp b/src/algorithm/REDUCE_SUM.hpp index f6dba52db..4d6f2f9b9 100644 --- a/src/algorithm/REDUCE_SUM.hpp +++ b/src/algorithm/REDUCE_SUM.hpp @@ -58,6 +58,7 @@ class REDUCE_SUM : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/algorithm/SCAN.hpp b/src/algorithm/SCAN.hpp index 519789a55..51cc13325 100644 --- a/src/algorithm/SCAN.hpp +++ b/src/algorithm/SCAN.hpp @@ -61,6 +61,7 @@ class SCAN : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); private: static const size_t default_gpu_block_size = 0; diff --git a/src/algorithm/SORT.cpp b/src/algorithm/SORT.cpp index b9722c4d7..15192f500 100644 --- a/src/algorithm/SORT.cpp +++ b/src/algorithm/SORT.cpp @@ -41,6 +41,9 @@ SORT::SORT(const RunParams& params) setVariantDefined( RAJA_CUDA ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( RAJA_StdPar ); } SORT::~SORT() diff --git a/src/algorithm/SORT.hpp b/src/algorithm/SORT.hpp index 0670c9dd0..3331dacf8 100644 --- a/src/algorithm/SORT.hpp +++ b/src/algorithm/SORT.hpp @@ -54,6 +54,7 @@ class SORT : public KernelBase { getCout() << "\n SORT : Unknown OMP Target variant id = " << vid << std::endl; } + void runStdParVariant(VariantID vid, size_t tune_idx); private: static const size_t default_gpu_block_size = 0; diff --git a/src/algorithm/SORTPAIRS.cpp b/src/algorithm/SORTPAIRS.cpp index df175844e..882527eb1 100644 --- a/src/algorithm/SORTPAIRS.cpp +++ b/src/algorithm/SORTPAIRS.cpp @@ -41,6 +41,9 @@ SORTPAIRS::SORTPAIRS(const RunParams& params) setVariantDefined( RAJA_CUDA ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( RAJA_StdPar ); } SORTPAIRS::~SORTPAIRS() diff --git a/src/algorithm/SORTPAIRS.hpp b/src/algorithm/SORTPAIRS.hpp index 658d3ad4b..9a2365957 100644 --- a/src/algorithm/SORTPAIRS.hpp +++ b/src/algorithm/SORTPAIRS.hpp @@ -53,6 +53,7 @@ class SORTPAIRS : public KernelBase { getCout() << "\n SORTPAIRS : Unknown OMP Target variant id = " << vid << std::endl; } + void runStdParVariant(VariantID vid, size_t tune_idx); private: static const size_t default_gpu_block_size = 0; diff --git a/src/apps/CMakeLists.txt b/src/apps/CMakeLists.txt index dbb0637fa..e79db7717 100644 --- a/src/apps/CMakeLists.txt +++ b/src/apps/CMakeLists.txt @@ -88,5 +88,16 @@ blt_add_library( VOL3D-OMP.cpp VOL3D-OMPTarget.cpp WIP-COUPLE.cpp + DEL_DOT_VEC_2D-StdPar.cpp + ENERGY-StdPar.cpp + FIR-StdPar.cpp + HALOEXCHANGE-StdPar.cpp + HALOEXCHANGE_FUSED-StdPar.cpp + LTIMES-StdPar.cpp + LTIMES_NOVIEW-StdPar.cpp + MASS3DPA-StdPar.cpp + PRESSURE-StdPar.cpp + VOL3D-StdPar.cpp + DIFFUSION3DPA-StdPar.cpp DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} ) diff --git a/src/apps/CONVECTION3DPA.hpp b/src/apps/CONVECTION3DPA.hpp index 810aaefe3..47ffbe4e4 100644 --- a/src/apps/CONVECTION3DPA.hpp +++ b/src/apps/CONVECTION3DPA.hpp @@ -378,6 +378,7 @@ class CONVECTION3DPA : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/DEL_DOT_VEC_2D.cpp b/src/apps/DEL_DOT_VEC_2D.cpp index 24121e157..7e3bf7579 100644 --- a/src/apps/DEL_DOT_VEC_2D.cpp +++ b/src/apps/DEL_DOT_VEC_2D.cpp @@ -62,6 +62,10 @@ DEL_DOT_VEC_2D::DEL_DOT_VEC_2D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } DEL_DOT_VEC_2D::~DEL_DOT_VEC_2D() diff --git a/src/apps/DEL_DOT_VEC_2D.hpp b/src/apps/DEL_DOT_VEC_2D.hpp index 60d577a05..65b073de2 100644 --- a/src/apps/DEL_DOT_VEC_2D.hpp +++ b/src/apps/DEL_DOT_VEC_2D.hpp @@ -113,6 +113,7 @@ class DEL_DOT_VEC_2D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/DIFFUSION3DPA.cpp b/src/apps/DIFFUSION3DPA.cpp index 3844668c6..69ee1aa3a 100644 --- a/src/apps/DIFFUSION3DPA.cpp +++ b/src/apps/DIFFUSION3DPA.cpp @@ -65,6 +65,8 @@ DIFFUSION3DPA::DIFFUSION3DPA(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( RAJA_StdPar ); } DIFFUSION3DPA::~DIFFUSION3DPA() diff --git a/src/apps/DIFFUSION3DPA.hpp b/src/apps/DIFFUSION3DPA.hpp index b0ba7c977..a811769f0 100644 --- a/src/apps/DIFFUSION3DPA.hpp +++ b/src/apps/DIFFUSION3DPA.hpp @@ -481,6 +481,7 @@ class DIFFUSION3DPA : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/ENERGY.cpp b/src/apps/ENERGY.cpp index 9ed11381a..66f796db1 100644 --- a/src/apps/ENERGY.cpp +++ b/src/apps/ENERGY.cpp @@ -62,6 +62,10 @@ ENERGY::ENERGY(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } ENERGY::~ENERGY() diff --git a/src/apps/ENERGY.hpp b/src/apps/ENERGY.hpp index 6461fdd5f..2848fd3b4 100644 --- a/src/apps/ENERGY.hpp +++ b/src/apps/ENERGY.hpp @@ -203,6 +203,7 @@ class ENERGY : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/FIR.cpp b/src/apps/FIR.cpp index 8dd25358e..90871a160 100644 --- a/src/apps/FIR.cpp +++ b/src/apps/FIR.cpp @@ -56,6 +56,10 @@ FIR::FIR(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } FIR::~FIR() diff --git a/src/apps/FIR.hpp b/src/apps/FIR.hpp index dd46d9934..6ce82907a 100644 --- a/src/apps/FIR.hpp +++ b/src/apps/FIR.hpp @@ -78,6 +78,7 @@ class FIR : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/HALOEXCHANGE.cpp b/src/apps/HALOEXCHANGE.cpp index 890fcf0a9..35c9839b1 100644 --- a/src/apps/HALOEXCHANGE.cpp +++ b/src/apps/HALOEXCHANGE.cpp @@ -98,6 +98,10 @@ HALOEXCHANGE::HALOEXCHANGE(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } HALOEXCHANGE::~HALOEXCHANGE() diff --git a/src/apps/HALOEXCHANGE.hpp b/src/apps/HALOEXCHANGE.hpp index 5d653762a..de1398210 100644 --- a/src/apps/HALOEXCHANGE.hpp +++ b/src/apps/HALOEXCHANGE.hpp @@ -93,6 +93,7 @@ class HALOEXCHANGE : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/HALOEXCHANGE_FUSED.cpp b/src/apps/HALOEXCHANGE_FUSED.cpp index 406cc654b..272d66de4 100644 --- a/src/apps/HALOEXCHANGE_FUSED.cpp +++ b/src/apps/HALOEXCHANGE_FUSED.cpp @@ -98,6 +98,10 @@ HALOEXCHANGE_FUSED::HALOEXCHANGE_FUSED(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } HALOEXCHANGE_FUSED::~HALOEXCHANGE_FUSED() diff --git a/src/apps/HALOEXCHANGE_FUSED.hpp b/src/apps/HALOEXCHANGE_FUSED.hpp index e47c1e14e..b83129571 100644 --- a/src/apps/HALOEXCHANGE_FUSED.hpp +++ b/src/apps/HALOEXCHANGE_FUSED.hpp @@ -137,6 +137,7 @@ class HALOEXCHANGE_FUSED : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/LTIMES.cpp b/src/apps/LTIMES.cpp index ede451a0a..9d170071b 100644 --- a/src/apps/LTIMES.cpp +++ b/src/apps/LTIMES.cpp @@ -77,6 +77,10 @@ LTIMES::LTIMES(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } LTIMES::~LTIMES() diff --git a/src/apps/LTIMES.hpp b/src/apps/LTIMES.hpp index 31eae0f83..64e773c5c 100644 --- a/src/apps/LTIMES.hpp +++ b/src/apps/LTIMES.hpp @@ -116,6 +116,7 @@ class LTIMES : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/LTIMES_NOVIEW.cpp b/src/apps/LTIMES_NOVIEW.cpp index c0c0f7413..a4f53d360 100644 --- a/src/apps/LTIMES_NOVIEW.cpp +++ b/src/apps/LTIMES_NOVIEW.cpp @@ -76,6 +76,10 @@ LTIMES_NOVIEW::LTIMES_NOVIEW(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } LTIMES_NOVIEW::~LTIMES_NOVIEW() diff --git a/src/apps/LTIMES_NOVIEW.hpp b/src/apps/LTIMES_NOVIEW.hpp index 1385864fb..09fa881cc 100644 --- a/src/apps/LTIMES_NOVIEW.hpp +++ b/src/apps/LTIMES_NOVIEW.hpp @@ -66,6 +66,7 @@ class LTIMES_NOVIEW : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/MASS3DPA.cpp b/src/apps/MASS3DPA.cpp index 288e7ff82..c951336ad 100644 --- a/src/apps/MASS3DPA.cpp +++ b/src/apps/MASS3DPA.cpp @@ -61,6 +61,8 @@ MASS3DPA::MASS3DPA(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( RAJA_StdPar ); } MASS3DPA::~MASS3DPA() diff --git a/src/apps/MASS3DPA.hpp b/src/apps/MASS3DPA.hpp index 0d1c3a42d..de29544d2 100644 --- a/src/apps/MASS3DPA.hpp +++ b/src/apps/MASS3DPA.hpp @@ -363,6 +363,7 @@ class MASS3DPA : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/NODAL_ACCUMULATION_3D.hpp b/src/apps/NODAL_ACCUMULATION_3D.hpp index a574f331a..52b8faa7a 100644 --- a/src/apps/NODAL_ACCUMULATION_3D.hpp +++ b/src/apps/NODAL_ACCUMULATION_3D.hpp @@ -95,6 +95,7 @@ class NODAL_ACCUMULATION_3D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/PRESSURE.cpp b/src/apps/PRESSURE.cpp index df2cb744f..29fc72adc 100644 --- a/src/apps/PRESSURE.cpp +++ b/src/apps/PRESSURE.cpp @@ -52,6 +52,10 @@ PRESSURE::PRESSURE(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } PRESSURE::~PRESSURE() diff --git a/src/apps/PRESSURE.hpp b/src/apps/PRESSURE.hpp index 6421ce6b0..16bcb2b1f 100644 --- a/src/apps/PRESSURE.hpp +++ b/src/apps/PRESSURE.hpp @@ -72,6 +72,7 @@ class PRESSURE : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/VOL3D.cpp b/src/apps/VOL3D.cpp index fd2ebb5aa..b05511f99 100644 --- a/src/apps/VOL3D.cpp +++ b/src/apps/VOL3D.cpp @@ -64,6 +64,10 @@ VOL3D::VOL3D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } VOL3D::~VOL3D() diff --git a/src/apps/VOL3D.hpp b/src/apps/VOL3D.hpp index 9ddedbd19..289b07b83 100644 --- a/src/apps/VOL3D.hpp +++ b/src/apps/VOL3D.hpp @@ -169,6 +169,7 @@ class VOL3D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/WIP-COUPLE.hpp b/src/apps/WIP-COUPLE.hpp index cdafcd5eb..a37875418 100644 --- a/src/apps/WIP-COUPLE.hpp +++ b/src/apps/WIP-COUPLE.hpp @@ -171,6 +171,7 @@ class COUPLE : public KernelBase void runCudaVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) {(void) vid;} void runHipVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) {(void) vid;} void runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) {(void) vid;} + void runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) {(void) vid;} private: Complex_ptr m_t0; diff --git a/src/basic/CMakeLists.txt b/src/basic/CMakeLists.txt index ceeb1a502..cca6bf286 100644 --- a/src/basic/CMakeLists.txt +++ b/src/basic/CMakeLists.txt @@ -10,6 +10,7 @@ blt_add_library( NAME basic SOURCES DAXPY.cpp DAXPY-Seq.cpp + DAXPY-StdPar.cpp DAXPY-Hip.cpp DAXPY-Cuda.cpp DAXPY-OMP.cpp @@ -22,6 +23,7 @@ blt_add_library( DAXPY_ATOMIC-OMPTarget.cpp IF_QUAD.cpp IF_QUAD-Seq.cpp + IF_QUAD-StdPar.cpp IF_QUAD-Hip.cpp IF_QUAD-Cuda.cpp IF_QUAD-OMP.cpp @@ -46,48 +48,56 @@ blt_add_library( INIT3-OMPTarget.cpp INIT_VIEW1D.cpp INIT_VIEW1D-Seq.cpp + INIT_VIEW1D-StdPar.cpp INIT_VIEW1D-Hip.cpp INIT_VIEW1D-Cuda.cpp INIT_VIEW1D-OMP.cpp INIT_VIEW1D-OMPTarget.cpp INIT_VIEW1D_OFFSET.cpp INIT_VIEW1D_OFFSET-Seq.cpp + INIT_VIEW1D_OFFSET-StdPar.cpp INIT_VIEW1D_OFFSET-Hip.cpp INIT_VIEW1D_OFFSET-Cuda.cpp INIT_VIEW1D_OFFSET-OMP.cpp INIT_VIEW1D_OFFSET-OMPTarget.cpp MAT_MAT_SHARED.cpp MAT_MAT_SHARED-Seq.cpp + MAT_MAT_SHARED-StdPar.cpp MAT_MAT_SHARED-Hip.cpp MAT_MAT_SHARED-Cuda.cpp MAT_MAT_SHARED-OMP.cpp MAT_MAT_SHARED-OMPTarget.cpp MULADDSUB.cpp MULADDSUB-Seq.cpp + MULADDSUB-StdPar.cpp MULADDSUB-Hip.cpp MULADDSUB-Cuda.cpp MULADDSUB-OMP.cpp MULADDSUB-OMPTarget.cpp NESTED_INIT.cpp NESTED_INIT-Seq.cpp + NESTED_INIT-StdPar.cpp NESTED_INIT-Hip.cpp NESTED_INIT-Cuda.cpp NESTED_INIT-OMP.cpp NESTED_INIT-OMPTarget.cpp PI_ATOMIC.cpp PI_ATOMIC-Seq.cpp + PI_ATOMIC-StdPar.cpp PI_ATOMIC-Hip.cpp PI_ATOMIC-Cuda.cpp PI_ATOMIC-OMP.cpp PI_ATOMIC-OMPTarget.cpp PI_REDUCE.cpp PI_REDUCE-Seq.cpp + PI_REDUCE-StdPar.cpp PI_REDUCE-Hip.cpp PI_REDUCE-Cuda.cpp PI_REDUCE-OMP.cpp PI_REDUCE-OMPTarget.cpp REDUCE3_INT.cpp REDUCE3_INT-Seq.cpp + REDUCE3_INT-StdPar.cpp REDUCE3_INT-Hip.cpp REDUCE3_INT-Cuda.cpp REDUCE3_INT-OMP.cpp @@ -100,9 +110,11 @@ blt_add_library( REDUCE_STRUCT-OMPTarget.cpp TRAP_INT.cpp TRAP_INT-Seq.cpp + TRAP_INT-StdPar.cpp TRAP_INT-Hip.cpp TRAP_INT-Cuda.cpp TRAP_INT-OMPTarget.cpp TRAP_INT-OMP.cpp + INIT3-StdPar.cpp DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} ) diff --git a/src/basic/DAXPY.cpp b/src/basic/DAXPY.cpp index 69a5a152e..a0cd60977 100644 --- a/src/basic/DAXPY.cpp +++ b/src/basic/DAXPY.cpp @@ -52,6 +52,10 @@ DAXPY::DAXPY(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/DAXPY.hpp b/src/basic/DAXPY.hpp index 82a6fd9ff..840371fc1 100644 --- a/src/basic/DAXPY.hpp +++ b/src/basic/DAXPY.hpp @@ -52,6 +52,7 @@ class DAXPY : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/DAXPY_ATOMIC.hpp b/src/basic/DAXPY_ATOMIC.hpp index dd52d777c..0a702deec 100644 --- a/src/basic/DAXPY_ATOMIC.hpp +++ b/src/basic/DAXPY_ATOMIC.hpp @@ -55,6 +55,7 @@ class DAXPY_ATOMIC : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/IF_QUAD.cpp b/src/basic/IF_QUAD.cpp index 4a8d60035..799c02865 100644 --- a/src/basic/IF_QUAD.cpp +++ b/src/basic/IF_QUAD.cpp @@ -56,6 +56,10 @@ IF_QUAD::IF_QUAD(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/IF_QUAD.hpp b/src/basic/IF_QUAD.hpp index a03727a6c..4aac072fc 100644 --- a/src/basic/IF_QUAD.hpp +++ b/src/basic/IF_QUAD.hpp @@ -69,6 +69,7 @@ class IF_QUAD : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/INDEXLIST.hpp b/src/basic/INDEXLIST.hpp index 0836d8197..0bd51f947 100644 --- a/src/basic/INDEXLIST.hpp +++ b/src/basic/INDEXLIST.hpp @@ -60,6 +60,7 @@ class INDEXLIST : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/INDEXLIST_3LOOP.hpp b/src/basic/INDEXLIST_3LOOP.hpp index e19ee5508..408c6483c 100644 --- a/src/basic/INDEXLIST_3LOOP.hpp +++ b/src/basic/INDEXLIST_3LOOP.hpp @@ -71,6 +71,7 @@ class INDEXLIST_3LOOP : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/INIT3.cpp b/src/basic/INIT3.cpp index a504fa914..990278e36 100644 --- a/src/basic/INIT3.cpp +++ b/src/basic/INIT3.cpp @@ -52,6 +52,10 @@ INIT3::INIT3(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/INIT3.hpp b/src/basic/INIT3.hpp index 0f89b7c54..3ad27a2b2 100644 --- a/src/basic/INIT3.hpp +++ b/src/basic/INIT3.hpp @@ -55,6 +55,7 @@ class INIT3 : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/INIT_VIEW1D.cpp b/src/basic/INIT_VIEW1D.cpp index 2cb2b2376..ea68d0951 100644 --- a/src/basic/INIT_VIEW1D.cpp +++ b/src/basic/INIT_VIEW1D.cpp @@ -53,6 +53,10 @@ INIT_VIEW1D::INIT_VIEW1D(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/INIT_VIEW1D.hpp b/src/basic/INIT_VIEW1D.hpp index b5dfbf097..54c0f54d6 100644 --- a/src/basic/INIT_VIEW1D.hpp +++ b/src/basic/INIT_VIEW1D.hpp @@ -66,6 +66,7 @@ class INIT_VIEW1D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/INIT_VIEW1D_OFFSET.cpp b/src/basic/INIT_VIEW1D_OFFSET.cpp index f31395b07..1c482cec7 100644 --- a/src/basic/INIT_VIEW1D_OFFSET.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET.cpp @@ -53,6 +53,10 @@ INIT_VIEW1D_OFFSET::INIT_VIEW1D_OFFSET(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/INIT_VIEW1D_OFFSET.hpp b/src/basic/INIT_VIEW1D_OFFSET.hpp index 4cc3548c7..b9cd47ee8 100644 --- a/src/basic/INIT_VIEW1D_OFFSET.hpp +++ b/src/basic/INIT_VIEW1D_OFFSET.hpp @@ -65,6 +65,7 @@ class INIT_VIEW1D_OFFSET : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/MAT_MAT_SHARED.cpp b/src/basic/MAT_MAT_SHARED.cpp index 98cd878ce..747aa8413 100644 --- a/src/basic/MAT_MAT_SHARED.cpp +++ b/src/basic/MAT_MAT_SHARED.cpp @@ -60,6 +60,10 @@ MAT_MAT_SHARED::MAT_MAT_SHARED(const RunParams ¶ms) setVariantDefined(Base_HIP); setVariantDefined(Lambda_HIP); setVariantDefined(RAJA_HIP); + + //setVariantDefined( Base_StdPar ); + //setVariantDefined( Lambda_StdPar ); + //setVariantDefined( RAJA_StdPar ); } MAT_MAT_SHARED::~MAT_MAT_SHARED() {} diff --git a/src/basic/MAT_MAT_SHARED.hpp b/src/basic/MAT_MAT_SHARED.hpp index 095721c27..c18682960 100644 --- a/src/basic/MAT_MAT_SHARED.hpp +++ b/src/basic/MAT_MAT_SHARED.hpp @@ -139,6 +139,7 @@ class MAT_MAT_SHARED : public KernelBase { void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/MULADDSUB.cpp b/src/basic/MULADDSUB.cpp index 1d4981ca2..8e6b76b5d 100644 --- a/src/basic/MULADDSUB.cpp +++ b/src/basic/MULADDSUB.cpp @@ -52,6 +52,10 @@ MULADDSUB::MULADDSUB(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/MULADDSUB.hpp b/src/basic/MULADDSUB.hpp index 63d637073..ccec343e2 100644 --- a/src/basic/MULADDSUB.hpp +++ b/src/basic/MULADDSUB.hpp @@ -58,6 +58,7 @@ class MULADDSUB : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/NESTED_INIT-Seq.cpp b/src/basic/NESTED_INIT-Seq.cpp index 48da1b37a..6f0e2265a 100644 --- a/src/basic/NESTED_INIT-Seq.cpp +++ b/src/basic/NESTED_INIT-Seq.cpp @@ -39,6 +39,7 @@ void NESTED_INIT::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_i for (Index_type j = 0; j < nj; ++j ) { for (Index_type i = 0; i < ni; ++i ) { NESTED_INIT_BODY; + //std::cout << i << "," << j << "," << k << ";" << k*nj*ni+j*ni+i << " SEQ\n"; } } } diff --git a/src/basic/NESTED_INIT.cpp b/src/basic/NESTED_INIT.cpp index 30cbd0254..3c01fe350 100644 --- a/src/basic/NESTED_INIT.cpp +++ b/src/basic/NESTED_INIT.cpp @@ -63,6 +63,10 @@ NESTED_INIT::NESTED_INIT(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/NESTED_INIT.hpp b/src/basic/NESTED_INIT.hpp index 6849c9a73..54099a840 100644 --- a/src/basic/NESTED_INIT.hpp +++ b/src/basic/NESTED_INIT.hpp @@ -58,6 +58,7 @@ class NESTED_INIT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/PI_ATOMIC.cpp b/src/basic/PI_ATOMIC.cpp index 607ad1312..0633887a0 100644 --- a/src/basic/PI_ATOMIC.cpp +++ b/src/basic/PI_ATOMIC.cpp @@ -54,6 +54,10 @@ PI_ATOMIC::PI_ATOMIC(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/PI_ATOMIC.hpp b/src/basic/PI_ATOMIC.hpp index e69cbdb56..4b2353a38 100644 --- a/src/basic/PI_ATOMIC.hpp +++ b/src/basic/PI_ATOMIC.hpp @@ -54,6 +54,7 @@ class PI_ATOMIC : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/PI_REDUCE.cpp b/src/basic/PI_REDUCE.cpp index 16d0770ba..5af375f56 100644 --- a/src/basic/PI_REDUCE.cpp +++ b/src/basic/PI_REDUCE.cpp @@ -51,6 +51,10 @@ PI_REDUCE::PI_REDUCE(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } PI_REDUCE::~PI_REDUCE() diff --git a/src/basic/PI_REDUCE.hpp b/src/basic/PI_REDUCE.hpp index c7cc3258a..f3655d503 100644 --- a/src/basic/PI_REDUCE.hpp +++ b/src/basic/PI_REDUCE.hpp @@ -56,6 +56,7 @@ class PI_REDUCE : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/REDUCE3_INT.cpp b/src/basic/REDUCE3_INT.cpp index 941d85ac1..e39f0c031 100644 --- a/src/basic/REDUCE3_INT.cpp +++ b/src/basic/REDUCE3_INT.cpp @@ -57,6 +57,10 @@ REDUCE3_INT::REDUCE3_INT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/REDUCE3_INT.hpp b/src/basic/REDUCE3_INT.hpp index c84fa84b2..7a0a1f2c8 100644 --- a/src/basic/REDUCE3_INT.hpp +++ b/src/basic/REDUCE3_INT.hpp @@ -70,6 +70,7 @@ class REDUCE3_INT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/REDUCE_STRUCT.hpp b/src/basic/REDUCE_STRUCT.hpp index b1d188ca1..0bd168914 100644 --- a/src/basic/REDUCE_STRUCT.hpp +++ b/src/basic/REDUCE_STRUCT.hpp @@ -86,6 +86,7 @@ class REDUCE_STRUCT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/TRAP_INT.cpp b/src/basic/TRAP_INT.cpp index 63da29799..7ddc1991b 100644 --- a/src/basic/TRAP_INT.cpp +++ b/src/basic/TRAP_INT.cpp @@ -52,6 +52,10 @@ TRAP_INT::TRAP_INT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/TRAP_INT.hpp b/src/basic/TRAP_INT.hpp index eff85b90e..d23e34164 100644 --- a/src/basic/TRAP_INT.hpp +++ b/src/basic/TRAP_INT.hpp @@ -67,6 +67,7 @@ class TRAP_INT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/common/KernelBase.cpp b/src/common/KernelBase.cpp index 36efa5170..0b526afd0 100644 --- a/src/common/KernelBase.cpp +++ b/src/common/KernelBase.cpp @@ -246,6 +246,22 @@ void KernelBase::runKernel(VariantID vid, size_t tune_idx) #endif break; } + + case Base_StdPar : + case Lambda_StdPar : + { + runStdParVariant(vid, tune_idx); + break; + } + + case RAJA_StdPar : + { +#if defined(RUN_RAJA_STDPAR) + runStdParVariant(vid, tune_idx); +#endif + break; + } + case Kokkos_Lambda : { #if defined(RUN_KOKKOS) diff --git a/src/common/KernelBase.hpp b/src/common/KernelBase.hpp index ed3429643..8c72e854e 100644 --- a/src/common/KernelBase.hpp +++ b/src/common/KernelBase.hpp @@ -234,6 +234,7 @@ class KernelBase #if defined(RAJA_ENABLE_TARGET_OPENMP) virtual void runOpenMPTargetVariant(VariantID vid, size_t tune_idx) = 0; #endif + virtual void runStdParVariant(VariantID vid, size_t tune_idx) = 0; #if defined(RUN_KOKKOS) virtual void runKokkosVariant(VariantID vid, size_t tune_idx) { diff --git a/src/common/RAJAPerfSuite.cpp b/src/common/RAJAPerfSuite.cpp index 7578f5afd..5c1144ef3 100644 --- a/src/common/RAJAPerfSuite.cpp +++ b/src/common/RAJAPerfSuite.cpp @@ -278,6 +278,10 @@ static const std::string VariantNames [] = std::string("Lambda_HIP"), std::string("RAJA_HIP"), + std::string("Base_StdPar"), + std::string("Lambda_StdPar"), + std::string("RAJA_StdPar"), + std::string("Kokkos_Lambda"), std::string("Unknown Variant") // Keep this at the end and DO NOT remove.... @@ -424,6 +428,16 @@ bool isVariantAvailable(VariantID vid) } #endif + if ( vid == Base_StdPar || + vid == Lambda_StdPar) { + ret_val = true; + } +#if defined(RUN_RAJA_STDPAR) + if ( vid == RAJA_StdPar ) { + ret_val = true; + } +#endif + #if defined(RUN_KOKKOS) if ( vid == Kokkos_Lambda ) { ret_val = true; @@ -485,6 +499,16 @@ bool isVariantGPU(VariantID vid) } #endif + if ( vid == Base_StdPar || + vid == Lambda_StdPar) { + ret_val = true; + } +#if defined(RUN_RAJA_STDPAR) + if ( vid == RAJA_StdPar ) { + ret_val = true; + } +#endif + #if defined(RUN_KOKKOS) if ( vid == Kokkos_Lambda ) { ret_val = true; diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index 07feeede9..ab93280f1 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -194,6 +194,10 @@ enum VariantID { Lambda_HIP, RAJA_HIP, + Base_StdPar, + Lambda_StdPar, + RAJA_StdPar, + Kokkos_Lambda, NumVariants // Keep this one last and NEVER comment out (!!) diff --git a/src/lcals/CMakeLists.txt b/src/lcals/CMakeLists.txt index 5f88c8c69..6e0325bcc 100644 --- a/src/lcals/CMakeLists.txt +++ b/src/lcals/CMakeLists.txt @@ -74,5 +74,16 @@ blt_add_library( TRIDIAG_ELIM-Cuda.cpp TRIDIAG_ELIM-OMP.cpp TRIDIAG_ELIM-OMPTarget.cpp + DIFF_PREDICT-StdPar.cpp + EOS-StdPar.cpp + FIRST_DIFF-StdPar.cpp + FIRST_MIN-StdPar.cpp + FIRST_SUM-StdPar.cpp + GEN_LIN_RECUR-StdPar.cpp + HYDRO_1D-StdPar.cpp + HYDRO_2D-StdPar.cpp + INT_PREDICT-StdPar.cpp + PLANCKIAN-StdPar.cpp + TRIDIAG_ELIM-StdPar.cpp DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} ) diff --git a/src/lcals/DIFF_PREDICT.cpp b/src/lcals/DIFF_PREDICT.cpp index 338ba7d0d..e60d636e1 100644 --- a/src/lcals/DIFF_PREDICT.cpp +++ b/src/lcals/DIFF_PREDICT.cpp @@ -49,6 +49,10 @@ DIFF_PREDICT::DIFF_PREDICT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } DIFF_PREDICT::~DIFF_PREDICT() diff --git a/src/lcals/DIFF_PREDICT.hpp b/src/lcals/DIFF_PREDICT.hpp index 130071412..d7631953d 100644 --- a/src/lcals/DIFF_PREDICT.hpp +++ b/src/lcals/DIFF_PREDICT.hpp @@ -93,6 +93,7 @@ class DIFF_PREDICT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/EOS.cpp b/src/lcals/EOS.cpp index 27bc43d06..b7b3813b3 100644 --- a/src/lcals/EOS.cpp +++ b/src/lcals/EOS.cpp @@ -57,6 +57,10 @@ EOS::EOS(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } EOS::~EOS() diff --git a/src/lcals/EOS.hpp b/src/lcals/EOS.hpp index f2d38b5e9..6715ce857 100644 --- a/src/lcals/EOS.hpp +++ b/src/lcals/EOS.hpp @@ -62,6 +62,7 @@ class EOS : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/FIRST_DIFF.cpp b/src/lcals/FIRST_DIFF.cpp index 9272b20d4..54a7c0326 100644 --- a/src/lcals/FIRST_DIFF.cpp +++ b/src/lcals/FIRST_DIFF.cpp @@ -29,7 +29,7 @@ FIRST_DIFF::FIRST_DIFF(const RunParams& params) m_N = getActualProblemSize()+1; setItsPerRep( getActualProblemSize() ); - setItsPerRep( getActualProblemSize() ); + setItsPerRep( getActualProblemSize() ); // why twice? setKernelsPerRep(1); setBytesPerRep( (1*sizeof(Real_type) + 0*sizeof(Real_type)) * getActualProblemSize() + (0*sizeof(Real_type) + 1*sizeof(Real_type)) * m_N ); @@ -53,6 +53,10 @@ FIRST_DIFF::FIRST_DIFF(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } FIRST_DIFF::~FIRST_DIFF() diff --git a/src/lcals/FIRST_DIFF.hpp b/src/lcals/FIRST_DIFF.hpp index 51de73049..655596c3b 100644 --- a/src/lcals/FIRST_DIFF.hpp +++ b/src/lcals/FIRST_DIFF.hpp @@ -52,6 +52,7 @@ class FIRST_DIFF : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/FIRST_MIN.cpp b/src/lcals/FIRST_MIN.cpp index 8fe9a8c93..a1cffc072 100644 --- a/src/lcals/FIRST_MIN.cpp +++ b/src/lcals/FIRST_MIN.cpp @@ -57,6 +57,10 @@ FIRST_MIN::FIRST_MIN(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } FIRST_MIN::~FIRST_MIN() diff --git a/src/lcals/FIRST_MIN.hpp b/src/lcals/FIRST_MIN.hpp index c10839ec7..0f6f172be 100644 --- a/src/lcals/FIRST_MIN.hpp +++ b/src/lcals/FIRST_MIN.hpp @@ -81,6 +81,7 @@ class FIRST_MIN : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/FIRST_SUM.cpp b/src/lcals/FIRST_SUM.cpp index a9d135446..109c6499a 100644 --- a/src/lcals/FIRST_SUM.cpp +++ b/src/lcals/FIRST_SUM.cpp @@ -52,6 +52,10 @@ FIRST_SUM::FIRST_SUM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } FIRST_SUM::~FIRST_SUM() diff --git a/src/lcals/FIRST_SUM.hpp b/src/lcals/FIRST_SUM.hpp index 5f019c08c..d97b9d264 100644 --- a/src/lcals/FIRST_SUM.hpp +++ b/src/lcals/FIRST_SUM.hpp @@ -55,6 +55,7 @@ class FIRST_SUM : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/GEN_LIN_RECUR.cpp b/src/lcals/GEN_LIN_RECUR.cpp index b0598aa8e..eb21f7f5c 100644 --- a/src/lcals/GEN_LIN_RECUR.cpp +++ b/src/lcals/GEN_LIN_RECUR.cpp @@ -57,6 +57,10 @@ GEN_LIN_RECUR::GEN_LIN_RECUR(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } GEN_LIN_RECUR::~GEN_LIN_RECUR() diff --git a/src/lcals/GEN_LIN_RECUR.hpp b/src/lcals/GEN_LIN_RECUR.hpp index d6d20b43b..21516a0dc 100644 --- a/src/lcals/GEN_LIN_RECUR.hpp +++ b/src/lcals/GEN_LIN_RECUR.hpp @@ -76,6 +76,7 @@ class GEN_LIN_RECUR : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/HYDRO_1D.cpp b/src/lcals/HYDRO_1D.cpp index 5ce1d0700..32d40d978 100644 --- a/src/lcals/HYDRO_1D.cpp +++ b/src/lcals/HYDRO_1D.cpp @@ -56,6 +56,10 @@ HYDRO_1D::HYDRO_1D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } HYDRO_1D::~HYDRO_1D() diff --git a/src/lcals/HYDRO_1D.hpp b/src/lcals/HYDRO_1D.hpp index 692e40a8e..01d039314 100644 --- a/src/lcals/HYDRO_1D.hpp +++ b/src/lcals/HYDRO_1D.hpp @@ -57,6 +57,7 @@ class HYDRO_1D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/HYDRO_2D.cpp b/src/lcals/HYDRO_2D.cpp index 9b6c2a643..331e6e695 100644 --- a/src/lcals/HYDRO_2D.cpp +++ b/src/lcals/HYDRO_2D.cpp @@ -71,6 +71,10 @@ HYDRO_2D::HYDRO_2D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } HYDRO_2D::~HYDRO_2D() diff --git a/src/lcals/HYDRO_2D.hpp b/src/lcals/HYDRO_2D.hpp index 4363ea633..93cce3305 100644 --- a/src/lcals/HYDRO_2D.hpp +++ b/src/lcals/HYDRO_2D.hpp @@ -153,6 +153,7 @@ class HYDRO_2D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/INT_PREDICT.cpp b/src/lcals/INT_PREDICT.cpp index c2062fffa..dd4ff83d8 100644 --- a/src/lcals/INT_PREDICT.cpp +++ b/src/lcals/INT_PREDICT.cpp @@ -49,6 +49,10 @@ INT_PREDICT::INT_PREDICT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } INT_PREDICT::~INT_PREDICT() diff --git a/src/lcals/INT_PREDICT.hpp b/src/lcals/INT_PREDICT.hpp index 7a3c6fda6..92d87ab3a 100644 --- a/src/lcals/INT_PREDICT.hpp +++ b/src/lcals/INT_PREDICT.hpp @@ -72,6 +72,7 @@ class INT_PREDICT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/PLANCKIAN.cpp b/src/lcals/PLANCKIAN.cpp index 59de57231..74c65e31b 100644 --- a/src/lcals/PLANCKIAN.cpp +++ b/src/lcals/PLANCKIAN.cpp @@ -49,6 +49,10 @@ PLANCKIAN::PLANCKIAN(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } PLANCKIAN::~PLANCKIAN() diff --git a/src/lcals/PLANCKIAN.hpp b/src/lcals/PLANCKIAN.hpp index 46fba63db..0d85614fa 100644 --- a/src/lcals/PLANCKIAN.hpp +++ b/src/lcals/PLANCKIAN.hpp @@ -57,6 +57,7 @@ class PLANCKIAN : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/TRIDIAG_ELIM.cpp b/src/lcals/TRIDIAG_ELIM.cpp index 05d0100a8..d606e39be 100644 --- a/src/lcals/TRIDIAG_ELIM.cpp +++ b/src/lcals/TRIDIAG_ELIM.cpp @@ -51,6 +51,10 @@ TRIDIAG_ELIM::TRIDIAG_ELIM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } TRIDIAG_ELIM::~TRIDIAG_ELIM() diff --git a/src/lcals/TRIDIAG_ELIM.hpp b/src/lcals/TRIDIAG_ELIM.hpp index f593985a5..336fb2219 100644 --- a/src/lcals/TRIDIAG_ELIM.hpp +++ b/src/lcals/TRIDIAG_ELIM.hpp @@ -57,6 +57,7 @@ class TRIDIAG_ELIM : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/CMakeLists.txt b/src/polybench/CMakeLists.txt index 5805926f3..67343059e 100644 --- a/src/polybench/CMakeLists.txt +++ b/src/polybench/CMakeLists.txt @@ -86,5 +86,18 @@ blt_add_library( POLYBENCH_MVT-Cuda.cpp POLYBENCH_MVT-OMP.cpp POLYBENCH_MVT-OMPTarget.cpp + POLYBENCH_2MM-StdPar.cpp + POLYBENCH_3MM-StdPar.cpp + POLYBENCH_ADI-StdPar.cpp + POLYBENCH_ATAX-StdPar.cpp + POLYBENCH_FDTD_2D-StdPar.cpp + POLYBENCH_FLOYD_WARSHALL-StdPar.cpp + POLYBENCH_GEMM-StdPar.cpp + POLYBENCH_GEMVER-StdPar.cpp + POLYBENCH_GESUMMV-StdPar.cpp + POLYBENCH_HEAT_3D-StdPar.cpp + POLYBENCH_JACOBI_1D-StdPar.cpp + POLYBENCH_JACOBI_2D-StdPar.cpp + POLYBENCH_MVT-StdPar.cpp DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} ) diff --git a/src/polybench/POLYBENCH_2MM.cpp b/src/polybench/POLYBENCH_2MM.cpp index 03119a863..c1284791f 100644 --- a/src/polybench/POLYBENCH_2MM.cpp +++ b/src/polybench/POLYBENCH_2MM.cpp @@ -78,6 +78,10 @@ POLYBENCH_2MM::POLYBENCH_2MM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_2MM::~POLYBENCH_2MM() diff --git a/src/polybench/POLYBENCH_2MM.hpp b/src/polybench/POLYBENCH_2MM.hpp index 0624257f7..00c2de6d5 100644 --- a/src/polybench/POLYBENCH_2MM.hpp +++ b/src/polybench/POLYBENCH_2MM.hpp @@ -127,6 +127,7 @@ class POLYBENCH_2MM : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_3MM-Seq.cpp b/src/polybench/POLYBENCH_3MM-Seq.cpp index c1ca8c56d..9b99fb889 100644 --- a/src/polybench/POLYBENCH_3MM-Seq.cpp +++ b/src/polybench/POLYBENCH_3MM-Seq.cpp @@ -19,7 +19,6 @@ namespace rajaperf namespace polybench { - void POLYBENCH_3MM::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps = getRunReps(); diff --git a/src/polybench/POLYBENCH_3MM.cpp b/src/polybench/POLYBENCH_3MM.cpp index 75990394c..3cc6fce66 100644 --- a/src/polybench/POLYBENCH_3MM.cpp +++ b/src/polybench/POLYBENCH_3MM.cpp @@ -86,6 +86,10 @@ POLYBENCH_3MM::POLYBENCH_3MM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_3MM::~POLYBENCH_3MM() diff --git a/src/polybench/POLYBENCH_3MM.hpp b/src/polybench/POLYBENCH_3MM.hpp index 0cf9aabff..35c6407ba 100644 --- a/src/polybench/POLYBENCH_3MM.hpp +++ b/src/polybench/POLYBENCH_3MM.hpp @@ -153,6 +153,7 @@ class POLYBENCH_3MM : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_ADI.cpp b/src/polybench/POLYBENCH_ADI.cpp index 7d0844e69..5ad7544dd 100644 --- a/src/polybench/POLYBENCH_ADI.cpp +++ b/src/polybench/POLYBENCH_ADI.cpp @@ -63,6 +63,10 @@ POLYBENCH_ADI::POLYBENCH_ADI(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_ADI::~POLYBENCH_ADI() diff --git a/src/polybench/POLYBENCH_ADI.hpp b/src/polybench/POLYBENCH_ADI.hpp index 7cd579964..0941506ec 100644 --- a/src/polybench/POLYBENCH_ADI.hpp +++ b/src/polybench/POLYBENCH_ADI.hpp @@ -195,6 +195,7 @@ class POLYBENCH_ADI : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_ATAX.cpp b/src/polybench/POLYBENCH_ATAX.cpp index 44a805518..440586561 100644 --- a/src/polybench/POLYBENCH_ATAX.cpp +++ b/src/polybench/POLYBENCH_ATAX.cpp @@ -65,6 +65,10 @@ POLYBENCH_ATAX::POLYBENCH_ATAX(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_ATAX::~POLYBENCH_ATAX() diff --git a/src/polybench/POLYBENCH_ATAX.hpp b/src/polybench/POLYBENCH_ATAX.hpp index 8f28a1470..baf2d24f9 100644 --- a/src/polybench/POLYBENCH_ATAX.hpp +++ b/src/polybench/POLYBENCH_ATAX.hpp @@ -115,6 +115,7 @@ class POLYBENCH_ATAX : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_FDTD_2D.cpp b/src/polybench/POLYBENCH_FDTD_2D.cpp index dce05e76a..47bb79ce2 100644 --- a/src/polybench/POLYBENCH_FDTD_2D.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D.cpp @@ -84,6 +84,10 @@ POLYBENCH_FDTD_2D::POLYBENCH_FDTD_2D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_FDTD_2D::~POLYBENCH_FDTD_2D() diff --git a/src/polybench/POLYBENCH_FDTD_2D.hpp b/src/polybench/POLYBENCH_FDTD_2D.hpp index 7d3696293..1e8a0f68b 100644 --- a/src/polybench/POLYBENCH_FDTD_2D.hpp +++ b/src/polybench/POLYBENCH_FDTD_2D.hpp @@ -113,6 +113,7 @@ class POLYBENCH_FDTD_2D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-Seq.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-Seq.cpp index b9f42b0ed..749da2c3b 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-Seq.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-Seq.cpp @@ -17,7 +17,6 @@ namespace rajaperf namespace polybench { - void POLYBENCH_FLOYD_WARSHALL::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps= getRunReps(); diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp index 1022ffe4f..d48f141f0 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp @@ -60,6 +60,10 @@ POLYBENCH_FLOYD_WARSHALL::POLYBENCH_FLOYD_WARSHALL(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_FLOYD_WARSHALL::~POLYBENCH_FLOYD_WARSHALL() diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp index 283231d29..e543a188f 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp @@ -76,6 +76,7 @@ class POLYBENCH_FLOYD_WARSHALL : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_GEMM.cpp b/src/polybench/POLYBENCH_GEMM.cpp index 0ee1f41be..a7dec71b4 100644 --- a/src/polybench/POLYBENCH_GEMM.cpp +++ b/src/polybench/POLYBENCH_GEMM.cpp @@ -70,6 +70,10 @@ POLYBENCH_GEMM::POLYBENCH_GEMM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_GEMM::~POLYBENCH_GEMM() diff --git a/src/polybench/POLYBENCH_GEMM.hpp b/src/polybench/POLYBENCH_GEMM.hpp index ae218397d..72d653eb0 100644 --- a/src/polybench/POLYBENCH_GEMM.hpp +++ b/src/polybench/POLYBENCH_GEMM.hpp @@ -99,6 +99,7 @@ class POLYBENCH_GEMM : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_GEMVER-Seq.cpp b/src/polybench/POLYBENCH_GEMVER-Seq.cpp index eeee6f0ec..7ea35e871 100644 --- a/src/polybench/POLYBENCH_GEMVER-Seq.cpp +++ b/src/polybench/POLYBENCH_GEMVER-Seq.cpp @@ -131,7 +131,7 @@ void POLYBENCH_GEMVER::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t auto poly_gemver_lam1 = [=] (Index_type i, Index_type j) { POLYBENCH_GEMVER_BODY1_RAJA; }; - auto poly_gemver_lam2 = [=] (Index_type /* i */, Real_type &dot) { + auto poly_gemver_lam2 = [=] (Real_type &dot) { POLYBENCH_GEMVER_BODY2_RAJA; }; auto poly_gemver_lam3 = [=] (Index_type i, Index_type j, Real_type &dot) { @@ -162,10 +162,10 @@ void POLYBENCH_GEMVER::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t > >; - using EXEC_POL24 = + using EXEC_POL2 = RAJA::KernelPolicy< RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Segs<0>, RAJA::Params<0>>, + RAJA::statement::Lambda<0, RAJA::Params<0>>, RAJA::statement::For<1, RAJA::loop_exec, RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> >, @@ -175,6 +175,17 @@ void POLYBENCH_GEMVER::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t using EXEC_POL3 = RAJA::loop_exec; + using EXEC_POL4 = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Segs<0>, RAJA::Params<0>>, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> + > + >; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { @@ -183,7 +194,7 @@ void POLYBENCH_GEMVER::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t poly_gemver_lam1 ); - RAJA::kernel_param( + RAJA::kernel_param( RAJA::make_tuple(RAJA::RangeSegment{0, n}, RAJA::RangeSegment{0, n}), RAJA::tuple{0.0}, @@ -197,7 +208,7 @@ void POLYBENCH_GEMVER::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t poly_gemver_lam5 ); - RAJA::kernel_param( + RAJA::kernel_param( RAJA::make_tuple(RAJA::RangeSegment{0, n}, RAJA::RangeSegment{0, n}), RAJA::tuple{0.0}, diff --git a/src/polybench/POLYBENCH_GEMVER.cpp b/src/polybench/POLYBENCH_GEMVER.cpp index 24a3f3d1b..22a4837af 100644 --- a/src/polybench/POLYBENCH_GEMVER.cpp +++ b/src/polybench/POLYBENCH_GEMVER.cpp @@ -79,6 +79,10 @@ POLYBENCH_GEMVER::POLYBENCH_GEMVER(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_GEMVER::~POLYBENCH_GEMVER() diff --git a/src/polybench/POLYBENCH_GEMVER.hpp b/src/polybench/POLYBENCH_GEMVER.hpp index 80c96fa94..dd308fccf 100644 --- a/src/polybench/POLYBENCH_GEMVER.hpp +++ b/src/polybench/POLYBENCH_GEMVER.hpp @@ -18,11 +18,9 @@ /// Note: this part of the kernel is modified to avoid /// excessively large checksums /// for (Index_type i = 0; i < N; i++) { -/// Real_type dot = 0.0; /// for (Index_type j = 0; j < N; j++) { -/// dot += beta * A[j][i] * y[j]; +/// x[i] = x[i] + beta * A[j][i] * y[j]; /// } -/// x[i] = dot; /// } /// /// for (Index_type i = 0; i < N; i++) { @@ -98,7 +96,7 @@ xview(i) += zview(i); #define POLYBENCH_GEMVER_BODY6_RAJA \ - dot = wview(i); + dot = w[i]; #define POLYBENCH_GEMVER_BODY7_RAJA \ dot += alpha * Aview(i,j) * xview(j); @@ -152,6 +150,7 @@ class POLYBENCH_GEMVER : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_GESUMMV-Seq.cpp b/src/polybench/POLYBENCH_GESUMMV-Seq.cpp index c65897e5d..14a86a0fe 100644 --- a/src/polybench/POLYBENCH_GESUMMV-Seq.cpp +++ b/src/polybench/POLYBENCH_GESUMMV-Seq.cpp @@ -93,9 +93,9 @@ void POLYBENCH_GESUMMV::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( using EXEC_POL = RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, // i + RAJA::statement::For<0, RAJA::loop_exec, RAJA::statement::Lambda<0, RAJA::Params<0,1>>, - RAJA::statement::For<1, RAJA::loop_exec, // j + RAJA::statement::For<1, RAJA::loop_exec, RAJA::statement::Lambda<1, RAJA::Segs<0, 1>, RAJA::Params<0,1>> >, RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0,1>> diff --git a/src/polybench/POLYBENCH_GESUMMV.cpp b/src/polybench/POLYBENCH_GESUMMV.cpp index eb527af27..f1f10c645 100644 --- a/src/polybench/POLYBENCH_GESUMMV.cpp +++ b/src/polybench/POLYBENCH_GESUMMV.cpp @@ -59,6 +59,10 @@ POLYBENCH_GESUMMV::POLYBENCH_GESUMMV(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_GESUMMV::~POLYBENCH_GESUMMV() diff --git a/src/polybench/POLYBENCH_GESUMMV.hpp b/src/polybench/POLYBENCH_GESUMMV.hpp index c8f71ee84..00361d757 100644 --- a/src/polybench/POLYBENCH_GESUMMV.hpp +++ b/src/polybench/POLYBENCH_GESUMMV.hpp @@ -98,6 +98,7 @@ class POLYBENCH_GESUMMV : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_HEAT_3D-Seq.cpp b/src/polybench/POLYBENCH_HEAT_3D-Seq.cpp index 4afb06d21..be1feed9c 100644 --- a/src/polybench/POLYBENCH_HEAT_3D-Seq.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D-Seq.cpp @@ -107,6 +107,13 @@ void POLYBENCH_HEAT_3D::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( POLYBENCH_HEAT_3D_VIEWS_RAJA; + auto poly_heat3d_lam1 = [=](Index_type i, Index_type j, Index_type k) { + POLYBENCH_HEAT_3D_BODY1_RAJA; + }; + auto poly_heat3d_lam2 = [=](Index_type i, Index_type j, Index_type k) { + POLYBENCH_HEAT_3D_BODY2_RAJA; + }; + using EXEC_POL = RAJA::KernelPolicy< RAJA::statement::For<0, RAJA::loop_exec, @@ -115,6 +122,13 @@ void POLYBENCH_HEAT_3D::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( RAJA::statement::Lambda<0> > > + >, + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<1> + > + > > >; @@ -127,20 +141,8 @@ void POLYBENCH_HEAT_3D::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( RAJA::RangeSegment{1, N-1}, RAJA::RangeSegment{1, N-1}), - [=](Index_type i, Index_type j, Index_type k) { - POLYBENCH_HEAT_3D_BODY1_RAJA; - } - - ); - - RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{1, N-1}, - RAJA::RangeSegment{1, N-1}, - RAJA::RangeSegment{1, N-1}), - - [=](Index_type i, Index_type j, Index_type k) { - POLYBENCH_HEAT_3D_BODY2_RAJA; - } - + poly_heat3d_lam1, + poly_heat3d_lam2 ); } diff --git a/src/polybench/POLYBENCH_HEAT_3D.cpp b/src/polybench/POLYBENCH_HEAT_3D.cpp index 567192b9a..ec86de900 100644 --- a/src/polybench/POLYBENCH_HEAT_3D.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D.cpp @@ -70,6 +70,10 @@ POLYBENCH_HEAT_3D::POLYBENCH_HEAT_3D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_HEAT_3D::~POLYBENCH_HEAT_3D() diff --git a/src/polybench/POLYBENCH_HEAT_3D.hpp b/src/polybench/POLYBENCH_HEAT_3D.hpp index 81ab06e0e..850efee83 100644 --- a/src/polybench/POLYBENCH_HEAT_3D.hpp +++ b/src/polybench/POLYBENCH_HEAT_3D.hpp @@ -124,6 +124,7 @@ class POLYBENCH_HEAT_3D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); @@ -140,6 +141,8 @@ class POLYBENCH_HEAT_3D : public KernelBase Index_type m_N; Index_type m_tsteps; + Real_type m_factor; + Real_ptr m_A; Real_ptr m_B; Real_ptr m_Ainit; diff --git a/src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp b/src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp index f23ccdf06..0dec5ba7b 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp @@ -18,7 +18,6 @@ namespace rajaperf namespace polybench { - void POLYBENCH_JACOBI_1D::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps= getRunReps(); diff --git a/src/polybench/POLYBENCH_JACOBI_1D.cpp b/src/polybench/POLYBENCH_JACOBI_1D.cpp index f86bb5956..a8aa3e089 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D.cpp @@ -67,6 +67,10 @@ POLYBENCH_JACOBI_1D::POLYBENCH_JACOBI_1D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_JACOBI_1D::~POLYBENCH_JACOBI_1D() diff --git a/src/polybench/POLYBENCH_JACOBI_1D.hpp b/src/polybench/POLYBENCH_JACOBI_1D.hpp index cb3131490..7974d9b47 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D.hpp +++ b/src/polybench/POLYBENCH_JACOBI_1D.hpp @@ -70,6 +70,7 @@ class POLYBENCH_JACOBI_1D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_JACOBI_2D.cpp b/src/polybench/POLYBENCH_JACOBI_2D.cpp index 1b4f9378a..a8d54e751 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D.cpp @@ -69,6 +69,10 @@ POLYBENCH_JACOBI_2D::POLYBENCH_JACOBI_2D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_JACOBI_2D::~POLYBENCH_JACOBI_2D() diff --git a/src/polybench/POLYBENCH_JACOBI_2D.hpp b/src/polybench/POLYBENCH_JACOBI_2D.hpp index a2ba63181..8adb8bef5 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D.hpp +++ b/src/polybench/POLYBENCH_JACOBI_2D.hpp @@ -90,6 +90,7 @@ class POLYBENCH_JACOBI_2D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_MVT.cpp b/src/polybench/POLYBENCH_MVT.cpp index 3354ca97d..3ac9d680f 100644 --- a/src/polybench/POLYBENCH_MVT.cpp +++ b/src/polybench/POLYBENCH_MVT.cpp @@ -62,6 +62,10 @@ POLYBENCH_MVT::POLYBENCH_MVT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_MVT::~POLYBENCH_MVT() diff --git a/src/polybench/POLYBENCH_MVT.hpp b/src/polybench/POLYBENCH_MVT.hpp index dce40baf2..63c8b4287 100644 --- a/src/polybench/POLYBENCH_MVT.hpp +++ b/src/polybench/POLYBENCH_MVT.hpp @@ -112,6 +112,7 @@ class POLYBENCH_MVT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/stream/ADD-Seq.cpp b/src/stream/ADD-Seq.cpp index f421d44c2..224c92e7a 100644 --- a/src/stream/ADD-Seq.cpp +++ b/src/stream/ADD-Seq.cpp @@ -17,7 +17,6 @@ namespace rajaperf namespace stream { - void ADD::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps = getRunReps(); diff --git a/src/stream/ADD.cpp b/src/stream/ADD.cpp index 904c0804b..534deee28 100644 --- a/src/stream/ADD.cpp +++ b/src/stream/ADD.cpp @@ -52,6 +52,10 @@ ADD::ADD(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } ADD::~ADD() diff --git a/src/stream/ADD.hpp b/src/stream/ADD.hpp index 07d0dea79..5b144ff1b 100644 --- a/src/stream/ADD.hpp +++ b/src/stream/ADD.hpp @@ -52,6 +52,7 @@ class ADD : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/stream/CMakeLists.txt b/src/stream/CMakeLists.txt index 2122b7867..5b475dc91 100644 --- a/src/stream/CMakeLists.txt +++ b/src/stream/CMakeLists.txt @@ -10,30 +10,35 @@ blt_add_library( NAME stream SOURCES ADD.cpp ADD-Seq.cpp + ADD-StdPar.cpp ADD-Hip.cpp ADD-Cuda.cpp ADD-OMP.cpp ADD-OMPTarget.cpp COPY.cpp COPY-Seq.cpp + COPY-StdPar.cpp COPY-Hip.cpp COPY-Cuda.cpp COPY-OMP.cpp COPY-OMPTarget.cpp DOT.cpp DOT-Seq.cpp + DOT-StdPar.cpp DOT-Hip.cpp DOT-Cuda.cpp DOT-OMP.cpp DOT-OMPTarget.cpp MUL.cpp MUL-Seq.cpp + MUL-StdPar.cpp MUL-Hip.cpp MUL-Cuda.cpp MUL-OMP.cpp MUL-OMPTarget.cpp TRIAD.cpp TRIAD-Seq.cpp + TRIAD-StdPar.cpp TRIAD-Hip.cpp TRIAD-Cuda.cpp TRIAD-OMPTarget.cpp diff --git a/src/stream/COPY.cpp b/src/stream/COPY.cpp index 251208a4d..40fae2467 100644 --- a/src/stream/COPY.cpp +++ b/src/stream/COPY.cpp @@ -52,6 +52,10 @@ COPY::COPY(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } COPY::~COPY() diff --git a/src/stream/COPY.hpp b/src/stream/COPY.hpp index 0f23bfa68..ced0ad20a 100644 --- a/src/stream/COPY.hpp +++ b/src/stream/COPY.hpp @@ -51,6 +51,7 @@ class COPY : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/stream/DOT.cpp b/src/stream/DOT.cpp index 0d9657a8a..d1f701431 100644 --- a/src/stream/DOT.cpp +++ b/src/stream/DOT.cpp @@ -52,6 +52,10 @@ DOT::DOT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } DOT::~DOT() diff --git a/src/stream/DOT.hpp b/src/stream/DOT.hpp index 64d70c630..1a846e747 100644 --- a/src/stream/DOT.hpp +++ b/src/stream/DOT.hpp @@ -51,6 +51,7 @@ class DOT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/stream/MUL.cpp b/src/stream/MUL.cpp index 55eced2b0..38bfe4aca 100644 --- a/src/stream/MUL.cpp +++ b/src/stream/MUL.cpp @@ -52,6 +52,10 @@ MUL::MUL(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } MUL::~MUL() diff --git a/src/stream/MUL.hpp b/src/stream/MUL.hpp index 1e79e17f9..c8fbbda30 100644 --- a/src/stream/MUL.hpp +++ b/src/stream/MUL.hpp @@ -52,6 +52,7 @@ class MUL : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/stream/TRIAD.cpp b/src/stream/TRIAD.cpp index 543b19642..e4064b9fd 100644 --- a/src/stream/TRIAD.cpp +++ b/src/stream/TRIAD.cpp @@ -56,6 +56,10 @@ TRIAD::TRIAD(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } TRIAD::~TRIAD() diff --git a/src/stream/TRIAD.hpp b/src/stream/TRIAD.hpp index 80685ce3c..4426560a1 100644 --- a/src/stream/TRIAD.hpp +++ b/src/stream/TRIAD.hpp @@ -53,6 +53,7 @@ class TRIAD : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); From a435877a05d4dcc77692624b30071d78d466d42c Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 7 Jul 2022 17:26:10 +0300 Subject: [PATCH 002/174] starting over with StdPar because git submodules are trash --- src/algorithm/SORT-StdPar.cpp | 75 ++++ src/algorithm/SORTPAIRS-StdPar.cpp | 101 ++++++ src/apps/DEL_DOT_VEC_2D-StdPar.cpp | 123 +++++++ src/apps/DIFFUSION3DPA-StdPar.cpp | 334 ++++++++++++++++++ src/apps/ENERGY-StdPar.cpp | 198 +++++++++++ src/apps/FIR-StdPar.cpp | 109 ++++++ src/apps/HALOEXCHANGE-StdPar.cpp | 182 ++++++++++ src/apps/HALOEXCHANGE_FUSED-StdPar.cpp | 265 ++++++++++++++ src/apps/LTIMES-StdPar.cpp | 137 +++++++ src/apps/LTIMES_NOVIEW-StdPar.cpp | 131 +++++++ src/apps/MASS3DPA-StdPar.cpp | 231 ++++++++++++ src/apps/PRESSURE-StdPar.cpp | 126 +++++++ src/apps/VOL3D-StdPar.cpp | 111 ++++++ src/basic/DAXPY-StdPar.cpp | 103 ++++++ src/basic/IF_QUAD-StdPar.cpp | 104 ++++++ src/basic/INIT3-StdPar.cpp | 104 ++++++ src/basic/INIT_VIEW1D-StdPar.cpp | 110 ++++++ src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp | 110 ++++++ src/basic/MAT_MAT_SHARED-StdPar.cpp | 254 +++++++++++++ src/basic/MULADDSUB-StdPar.cpp | 103 ++++++ src/basic/NESTED_INIT-StdPar.cpp | 150 ++++++++ src/basic/PI_ATOMIC-StdPar.cpp | 121 +++++++ src/basic/PI_REDUCE-StdPar.cpp | 118 +++++++ src/basic/REDUCE3_INT-StdPar.cpp | 141 ++++++++ src/basic/TRAP_INT-StdPar.cpp | 132 +++++++ src/lcals/DIFF_PREDICT-StdPar.cpp | 103 ++++++ src/lcals/EOS-StdPar.cpp | 103 ++++++ src/lcals/FIRST_DIFF-StdPar.cpp | 103 ++++++ src/lcals/FIRST_MIN-StdPar.cpp | 114 ++++++ src/lcals/FIRST_SUM-StdPar.cpp | 103 ++++++ src/lcals/GEN_LIN_RECUR-StdPar.cpp | 126 +++++++ src/lcals/HYDRO_1D-StdPar.cpp | 104 ++++++ src/lcals/HYDRO_2D-StdPar.cpp | 195 ++++++++++ src/lcals/INT_PREDICT-StdPar.cpp | 104 ++++++ src/lcals/PLANCKIAN-StdPar.cpp | 105 ++++++ src/lcals/TRIDIAG_ELIM-StdPar.cpp | 104 ++++++ src/polybench/POLYBENCH_2MM-StdPar.cpp | 255 +++++++++++++ src/polybench/POLYBENCH_3MM-StdPar.cpp | 331 +++++++++++++++++ src/polybench/POLYBENCH_ADI-StdPar.cpp | 236 +++++++++++++ src/polybench/POLYBENCH_ATAX-StdPar.cpp | 213 +++++++++++ src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp | 226 ++++++++++++ .../POLYBENCH_FLOYD_WARSHALL-StdPar.cpp | 158 +++++++++ src/polybench/POLYBENCH_GEMM-StdPar.cpp | 192 ++++++++++ src/polybench/POLYBENCH_GEMVER-StdPar.cpp | 255 +++++++++++++ src/polybench/POLYBENCH_GESUMMV-StdPar.cpp | 145 ++++++++ src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp | 188 ++++++++++ src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp | 137 +++++++ src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp | 176 +++++++++ src/polybench/POLYBENCH_MVT-StdPar.cpp | 204 +++++++++++ src/stream/ADD-StdPar.cpp | 103 ++++++ src/stream/COPY-StdPar.cpp | 88 +++++ src/stream/DOT-StdPar.cpp | 116 ++++++ src/stream/MUL-StdPar.cpp | 103 ++++++ src/stream/TRIAD-StdPar.cpp | 103 ++++++ 54 files changed, 8166 insertions(+) create mode 100644 src/algorithm/SORT-StdPar.cpp create mode 100644 src/algorithm/SORTPAIRS-StdPar.cpp create mode 100644 src/apps/DEL_DOT_VEC_2D-StdPar.cpp create mode 100644 src/apps/DIFFUSION3DPA-StdPar.cpp create mode 100644 src/apps/ENERGY-StdPar.cpp create mode 100644 src/apps/FIR-StdPar.cpp create mode 100644 src/apps/HALOEXCHANGE-StdPar.cpp create mode 100644 src/apps/HALOEXCHANGE_FUSED-StdPar.cpp create mode 100644 src/apps/LTIMES-StdPar.cpp create mode 100644 src/apps/LTIMES_NOVIEW-StdPar.cpp create mode 100644 src/apps/MASS3DPA-StdPar.cpp create mode 100644 src/apps/PRESSURE-StdPar.cpp create mode 100644 src/apps/VOL3D-StdPar.cpp create mode 100644 src/basic/DAXPY-StdPar.cpp create mode 100644 src/basic/IF_QUAD-StdPar.cpp create mode 100644 src/basic/INIT3-StdPar.cpp create mode 100644 src/basic/INIT_VIEW1D-StdPar.cpp create mode 100644 src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp create mode 100644 src/basic/MAT_MAT_SHARED-StdPar.cpp create mode 100644 src/basic/MULADDSUB-StdPar.cpp create mode 100644 src/basic/NESTED_INIT-StdPar.cpp create mode 100644 src/basic/PI_ATOMIC-StdPar.cpp create mode 100644 src/basic/PI_REDUCE-StdPar.cpp create mode 100644 src/basic/REDUCE3_INT-StdPar.cpp create mode 100644 src/basic/TRAP_INT-StdPar.cpp create mode 100644 src/lcals/DIFF_PREDICT-StdPar.cpp create mode 100644 src/lcals/EOS-StdPar.cpp create mode 100644 src/lcals/FIRST_DIFF-StdPar.cpp create mode 100644 src/lcals/FIRST_MIN-StdPar.cpp create mode 100644 src/lcals/FIRST_SUM-StdPar.cpp create mode 100644 src/lcals/GEN_LIN_RECUR-StdPar.cpp create mode 100644 src/lcals/HYDRO_1D-StdPar.cpp create mode 100644 src/lcals/HYDRO_2D-StdPar.cpp create mode 100644 src/lcals/INT_PREDICT-StdPar.cpp create mode 100644 src/lcals/PLANCKIAN-StdPar.cpp create mode 100644 src/lcals/TRIDIAG_ELIM-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_2MM-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_3MM-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_ADI-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_ATAX-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_GEMM-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_GEMVER-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_GESUMMV-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_MVT-StdPar.cpp create mode 100644 src/stream/ADD-StdPar.cpp create mode 100644 src/stream/COPY-StdPar.cpp create mode 100644 src/stream/DOT-StdPar.cpp create mode 100644 src/stream/MUL-StdPar.cpp create mode 100644 src/stream/TRIAD-StdPar.cpp diff --git a/src/algorithm/SORT-StdPar.cpp b/src/algorithm/SORT-StdPar.cpp new file mode 100644 index 000000000..ba808313e --- /dev/null +++ b/src/algorithm/SORT-StdPar.cpp @@ -0,0 +1,75 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "SORT.hpp" + +#include "RAJA/RAJA.hpp" + +#include +#include + +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void SORT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + SORT_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::sort( std::execution::par_unseq, + STD_SORT_ARGS); + + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::sort(RAJA_SORT_ARGS); + + } + stopTimer(); + + break; + } +#endif + + default : { + std::cout << "\n SORT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace algorithm +} // end namespace rajaperf diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp new file mode 100644 index 000000000..f82b260e5 --- /dev/null +++ b/src/algorithm/SORTPAIRS-StdPar.cpp @@ -0,0 +1,101 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "SORTPAIRS.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include +#include +#include +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + SORTPAIRS_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + using pair_type = std::pair; + + std::vector vector_of_pairs; + vector_of_pairs.reserve(iend-ibegin); + + for (Index_type iemp = ibegin; iemp < iend; ++iemp) { + vector_of_pairs.emplace_back(x[iend*irep + iemp], i[iend*irep + iemp]); + } + + std::sort( std::execution::par_unseq, + vector_of_pairs.begin(), vector_of_pairs.end(), + [](pair_type const& lhs, pair_type const& rhs) { + return lhs.first < rhs.first; + }); + + //for (Index_type iemp = ibegin; iemp < iend; ++iemp) { + std::for_each( //std::execution::par_unseq, + begin, end, + [=](Index_type iemp) { + const pair_type& pair = vector_of_pairs[iemp - ibegin]; + x[iend*irep + iemp] = pair.first; + i[iend*irep + iemp] = pair.second; + }); + + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::sort_pairs(RAJA_SORTPAIRS_ARGS); + + } + stopTimer(); + + break; + } +#endif + + default : { + std::cout << "\n SORTPAIRS : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace algorithm +} // end namespace rajaperf diff --git a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp new file mode 100644 index 000000000..93fde5151 --- /dev/null +++ b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp @@ -0,0 +1,123 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DEL_DOT_VEC_2D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include "AppsData.hpp" + +#include "camp/resource.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void DEL_DOT_VEC_2D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = m_domain->n_real_zones; + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + DEL_DOT_VEC_2D_DATA_SETUP; + + NDSET2D(m_domain->jp, x,x1,x2,x3,x4) ; + NDSET2D(m_domain->jp, y,y1,y2,y3,y4) ; + NDSET2D(m_domain->jp, xdot,fx1,fx2,fx3,fx4) ; + NDSET2D(m_domain->jp, ydot,fy1,fy2,fy3,fy4) ; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type ii) { + DEL_DOT_VEC_2D_BODY_INDEX; + DEL_DOT_VEC_2D_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto deldotvec2d_base_lam = [=](Index_type ii) { + DEL_DOT_VEC_2D_BODY_INDEX; + DEL_DOT_VEC_2D_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type ii) { + deldotvec2d_base_lam(ii); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + camp::resources::Resource working_res{camp::resources::Host()}; + RAJA::TypedListSegment zones(m_domain->real_zones, + m_domain->n_real_zones, + working_res); + + auto deldotvec2d_lam = [=](Index_type i) { + DEL_DOT_VEC_2D_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall(zones, deldotvec2d_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n DEL_DOT_VEC_2D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/DIFFUSION3DPA-StdPar.cpp b/src/apps/DIFFUSION3DPA-StdPar.cpp new file mode 100644 index 000000000..38ee4da02 --- /dev/null +++ b/src/apps/DIFFUSION3DPA-StdPar.cpp @@ -0,0 +1,334 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +// Uncomment to add compiler directives for loop unrolling +//#define USE_RAJAPERF_UNROLL + +#include "DIFFUSION3DPA.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf { +namespace apps { + +void DIFFUSION3DPA::runStdParVariant(VariantID vid, size_t tune_idx) { + +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + DIFFUSION3DPA_DATA_SETUP; + + auto begin = counting_iterator(0); + auto end = counting_iterator(NE); + + switch (vid) { + + case Base_StdPar: { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](int e) { + + DIFFUSION3DPA_0_CPU; + + CPU_FOREACH(dz, z, DPA_D1D) { + CPU_FOREACH(dy, y, DPA_D1D) { + CPU_FOREACH(dx, x, DPA_D1D) { + DIFFUSION3DPA_1; + } + } + } + + CPU_FOREACH(dy, y, DPA_D1D) { + CPU_FOREACH(qx, x, DPA_Q1D) { + DIFFUSION3DPA_2; + } + } + + CPU_FOREACH(dz, z, DPA_D1D) { + CPU_FOREACH(dy, y, DPA_D1D) { + CPU_FOREACH(qx, x, DPA_Q1D) { + DIFFUSION3DPA_3; + } + } + } + + CPU_FOREACH(dz, z, DPA_D1D) { + CPU_FOREACH(qy, y, DPA_Q1D) { + CPU_FOREACH(qx, x, DPA_Q1D) { + DIFFUSION3DPA_4; + } + } + } + + CPU_FOREACH(qz, z, DPA_Q1D) { + CPU_FOREACH(qy, y, DPA_Q1D) { + CPU_FOREACH(qx, x, DPA_Q1D) { + DIFFUSION3DPA_5; + } + } + } + + CPU_FOREACH(d, y, DPA_D1D) { + CPU_FOREACH(q, x, DPA_Q1D) { + DIFFUSION3DPA_6; + } + } + + CPU_FOREACH(qz, z, DPA_Q1D) { + CPU_FOREACH(qy, y, DPA_Q1D) { + CPU_FOREACH(dx, x, DPA_D1D) { + DIFFUSION3DPA_7; + } + } + } + + CPU_FOREACH(qz, z, DPA_Q1D) { + CPU_FOREACH(dy, y, DPA_D1D) { + CPU_FOREACH(dx, x, DPA_D1D) { + DIFFUSION3DPA_8; + } + } + } + + CPU_FOREACH(dz, z, DPA_D1D) { + CPU_FOREACH(dy, y, DPA_D1D) { + CPU_FOREACH(dx, x, DPA_D1D) { + DIFFUSION3DPA_9; + } + } + } + + }); // element loop + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_Seq: { + + // Currently Teams requires two policies if compiled with a device + using launch_policy = RAJA::expt::LaunchPolicy; + + using outer_x = RAJA::expt::LoopPolicy; + + using inner_x = RAJA::expt::LoopPolicy; + + using inner_y = RAJA::expt::LoopPolicy; + + using inner_z = RAJA::expt::LoopPolicy; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // Grid is empty as the host does not need a compute grid to be specified + RAJA::expt::launch( + RAJA::expt::Grid(), + [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, NE), + [&](int e) { + + DIFFUSION3DPA_0_CPU; + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dx) { + + DIFFUSION3DPA_1; + + } // lambda (dx) + ); // RAJA::expt::loop + } // lambda (dy) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, 1), + [&](int RAJA_UNUSED_ARG(dz)) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int qx) { + + DIFFUSION3DPA_2; + + } // lambda (qx) + ); // RAJA::expt::loop + } // lambda (dy) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int qx) { + + DIFFUSION3DPA_3; + + } // lambda (qx) + ); // RAJA::expt::loop + } // lambda (dy) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int qy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int qx) { + + DIFFUSION3DPA_4; + + } // lambda (qx) + ); // RAJA::expt::loop + } // lambda (qy) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int qz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int qy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int qx) { + + DIFFUSION3DPA_5; + + } // lambda (qx) + ); // RAJA::expt::loop + } // lambda (qy) + ); //RAJA::expt::loop + } // lambda (qz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, 1), + [&](int RAJA_UNUSED_ARG(dz)) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int d) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int q) { + + DIFFUSION3DPA_6; + + } // lambda (q) + ); // RAJA::expt::loop + } // lambda (d) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int qz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int qy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dx) { + + DIFFUSION3DPA_7; + + } // lambda (dx) + ); // RAJA::expt::loop + } // lambda (qy) + ); //RAJA::expt::loop + } // lambda (qz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int qz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dx) { + + DIFFUSION3DPA_8; + + } // lambda (dx) + ); // RAJA::expt::loop + } // lambda (dy) + ); //RAJA::expt::loop + } // lambda (qz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dx) { + + DIFFUSION3DPA_9; + + } // lambda (dx) + ); // RAJA::expt::loop + } // lambda (dy) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + } // lambda (e) + ); // RAJA::expt::loop + + } // outer lambda (ctx) + ); // RAJA::expt::launch + } // loop over kernel reps + stopTimer(); + + return; + } +#endif // RUN_RAJA_STDPAR + + default: + std::cout << "\n DIFFUSION3DPA : Unknown StdPar variant id = " << vid + << std::endl; + } + +#else + RAJA_UNUSED_VAR(vid); +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/ENERGY-StdPar.cpp b/src/apps/ENERGY-StdPar.cpp new file mode 100644 index 000000000..fceadd05e --- /dev/null +++ b/src/apps/ENERGY-StdPar.cpp @@ -0,0 +1,198 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "ENERGY.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void ENERGY::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + ENERGY_DATA_SETUP; + + auto energy_lam1 = [=](Index_type i) { + ENERGY_BODY1; + }; + auto energy_lam2 = [=](Index_type i) { + ENERGY_BODY2; + }; + auto energy_lam3 = [=](Index_type i) { + ENERGY_BODY3; + }; + auto energy_lam4 = [=](Index_type i) { + ENERGY_BODY4; + }; + auto energy_lam5 = [=](Index_type i) { + ENERGY_BODY5; + }; + auto energy_lam6 = [=](Index_type i) { + ENERGY_BODY6; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + ENERGY_BODY1; + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + ENERGY_BODY2; + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + ENERGY_BODY3; + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + ENERGY_BODY4; + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + ENERGY_BODY5; + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + ENERGY_BODY6; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + energy_lam1(i); + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + energy_lam2(i); + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + energy_lam3(i); + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + energy_lam4(i); + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + energy_lam5(i); + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + energy_lam6(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::region( [=]() { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), energy_lam1); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), energy_lam2); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), energy_lam3); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), energy_lam4); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), energy_lam5); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), energy_lam6); + + }); // end sequential region (for single-source code) + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n ENERGY : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/FIR-StdPar.cpp b/src/apps/FIR-StdPar.cpp new file mode 100644 index 000000000..782a36321 --- /dev/null +++ b/src/apps/FIR-StdPar.cpp @@ -0,0 +1,109 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "FIR.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void FIR::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize() - m_coefflen; + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + FIR_COEFF; + + FIR_DATA_SETUP; + + Real_type coeff[FIR_COEFFLEN]; + std::copy(std::begin(coeff_array), std::end(coeff_array), std::begin(coeff)); + + auto fir_lam = [=](Index_type i) { + FIR_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + FIR_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + fir_lam(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), fir_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n FIR : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/HALOEXCHANGE-StdPar.cpp b/src/apps/HALOEXCHANGE-StdPar.cpp new file mode 100644 index 000000000..11e551fda --- /dev/null +++ b/src/apps/HALOEXCHANGE-StdPar.cpp @@ -0,0 +1,182 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "HALOEXCHANGE.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + HALOEXCHANGE_DATA_SETUP; + + auto begin = counting_iterator(0); + auto end = counting_iterator(num_neighbors); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = pack_index_lists[l]; + Index_type len = pack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + for (Index_type i = 0; i < len; i++) { + HALOEXCHANGE_PACK_BODY; + } + buffer += len; + } + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = unpack_index_lists[l]; + Index_type len = unpack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + for (Index_type i = 0; i < len; i++) { + HALOEXCHANGE_UNPACK_BODY; + } + buffer += len; + } + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = pack_index_lists[l]; + Index_type len = pack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + auto haloexchange_pack_base_lam = [=](Index_type i) { + HALOEXCHANGE_PACK_BODY; + }; + for (Index_type i = 0; i < len; i++) { + haloexchange_pack_base_lam(i); + } + buffer += len; + } + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = unpack_index_lists[l]; + Index_type len = unpack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + auto haloexchange_unpack_base_lam = [=](Index_type i) { + HALOEXCHANGE_UNPACK_BODY; + }; + for (Index_type i = 0; i < len; i++) { + haloexchange_unpack_base_lam(i); + } + buffer += len; + } + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + using EXEC_POL = RAJA::loop_exec; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type l = 0; l < num_neighbors; ++l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = pack_index_lists[l]; + Index_type len = pack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + auto haloexchange_pack_base_lam = [=](Index_type i) { + HALOEXCHANGE_PACK_BODY; + }; + RAJA::forall( + RAJA::TypedRangeSegment(0, len), + haloexchange_pack_base_lam ); + buffer += len; + } + } + + for (Index_type l = 0; l < num_neighbors; ++l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = unpack_index_lists[l]; + Index_type len = unpack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + auto haloexchange_unpack_base_lam = [=](Index_type i) { + HALOEXCHANGE_UNPACK_BODY; + }; + RAJA::forall( + RAJA::TypedRangeSegment(0, len), + haloexchange_unpack_base_lam ); + buffer += len; + } + } + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n HALOEXCHANGE : Unknown variant id = " << vid << std::endl; + } + + } +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp new file mode 100644 index 000000000..86967eac6 --- /dev/null +++ b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp @@ -0,0 +1,265 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "HALOEXCHANGE_FUSED.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void HALOEXCHANGE_FUSED::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + HALOEXCHANGE_FUSED_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + HALOEXCHANGE_FUSED_MANUAL_FUSER_SETUP; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Index_type pack_index = 0; + + for (Index_type l = 0; l < num_neighbors; ++l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = pack_index_lists[l]; + Index_type len = pack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + pack_ptr_holders[pack_index] = ptr_holder{buffer, list, var}; + pack_lens[pack_index] = len; + pack_index += 1; + buffer += len; + } + } + + auto begin = counting_iterator(0); + auto end = counting_iterator(pack_index); + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type j) { + Real_ptr buffer = pack_ptr_holders[j].buffer; + Int_ptr list = pack_ptr_holders[j].list; + Real_ptr var = pack_ptr_holders[j].var; + Index_type len = pack_lens[j]; + for (Index_type i = 0; i < len; i++) { + HALOEXCHANGE_FUSED_PACK_BODY; + } + }); + + Index_type unpack_index = 0; + + for (Index_type l = 0; l < num_neighbors; ++l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = unpack_index_lists[l]; + Index_type len = unpack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + unpack_ptr_holders[unpack_index] = ptr_holder{buffer, list, var}; + unpack_lens[unpack_index] = len; + unpack_index += 1; + buffer += len; + } + } + + auto begin2 = counting_iterator(0); + auto end2 = counting_iterator(unpack_index); + std::for_each( std::execution::par_unseq, + begin2, end2, + [=](Index_type j) { + Real_ptr buffer = unpack_ptr_holders[j].buffer; + Int_ptr list = unpack_ptr_holders[j].list; + Real_ptr var = unpack_ptr_holders[j].var; + Index_type len = unpack_lens[j]; + for (Index_type i = 0; i < len; i++) { + HALOEXCHANGE_FUSED_UNPACK_BODY; + } + }); + + } + stopTimer(); + + HALOEXCHANGE_FUSED_MANUAL_FUSER_TEARDOWN; + + break; + } + + case Lambda_StdPar : { + + HALOEXCHANGE_FUSED_MANUAL_LAMBDA_FUSER_SETUP; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Index_type pack_index = 0; + + for (Index_type l = 0; l < num_neighbors; ++l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = pack_index_lists[l]; + Index_type len = pack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + new(&pack_lambdas[pack_index]) pack_lambda_type(make_pack_lambda(buffer, list, var)); + pack_lens[pack_index] = len; + pack_index += 1; + buffer += len; + } + } + auto begin = counting_iterator(0); + auto end = counting_iterator(pack_index); + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type j) { + auto pack_lambda = pack_lambdas[j]; + Index_type len = pack_lens[j]; + for (Index_type i = 0; i < len; i++) { + pack_lambda(i); + } + }); + + Index_type unpack_index = 0; + + for (Index_type l = 0; l < num_neighbors; ++l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = unpack_index_lists[l]; + Index_type len = unpack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + new(&unpack_lambdas[unpack_index]) unpack_lambda_type(make_unpack_lambda(buffer, list, var)); + unpack_lens[unpack_index] = len; + unpack_index += 1; + buffer += len; + } + } + auto begin2 = counting_iterator(0); + auto end2 = counting_iterator(unpack_index); + std::for_each( std::execution::par_unseq, + begin2, end2, + [=](Index_type j) { + //for (Index_type j = 0; j < unpack_index; j++) { + auto unpack_lambda = unpack_lambdas[j]; + Index_type len = unpack_lens[j]; + for (Index_type i = 0; i < len; i++) { + unpack_lambda(i); + } + }); + + } + stopTimer(); + + HALOEXCHANGE_FUSED_MANUAL_LAMBDA_FUSER_TEARDOWN; + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + using AllocatorHolder = RAJAPoolAllocatorHolder< + RAJA::basic_mempool::MemPool>; + using Allocator = AllocatorHolder::Allocator; + + AllocatorHolder allocatorHolder; + + using workgroup_policy = RAJA::WorkGroupPolicy < + RAJA::loop_work, + RAJA::ordered, + RAJA::constant_stride_array_of_objects >; + + using workpool = RAJA::WorkPool< workgroup_policy, + Index_type, + RAJA::xargs<>, + Allocator >; + + using workgroup = RAJA::WorkGroup< workgroup_policy, + Index_type, + RAJA::xargs<>, + Allocator >; + + using worksite = RAJA::WorkSite< workgroup_policy, + Index_type, + RAJA::xargs<>, + Allocator >; + + workpool pool_pack (allocatorHolder.template getAllocator()); + workpool pool_unpack(allocatorHolder.template getAllocator()); + pool_pack.reserve(num_neighbors * num_vars, 1024ull*1024ull); + pool_unpack.reserve(num_neighbors * num_vars, 1024ull*1024ull); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type l = 0; l < num_neighbors; ++l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = pack_index_lists[l]; + Index_type len = pack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + auto haloexchange_fused_pack_base_lam = [=](Index_type i) { + HALOEXCHANGE_FUSED_PACK_BODY; + }; + pool_pack.enqueue( + RAJA::TypedRangeSegment(0, len), + haloexchange_fused_pack_base_lam ); + buffer += len; + } + } + workgroup group_pack = pool_pack.instantiate(); + worksite site_pack = group_pack.run(); + + for (Index_type l = 0; l < num_neighbors; ++l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = unpack_index_lists[l]; + Index_type len = unpack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + auto haloexchange_fused_unpack_base_lam = [=](Index_type i) { + HALOEXCHANGE_FUSED_UNPACK_BODY; + }; + pool_unpack.enqueue( + RAJA::TypedRangeSegment(0, len), + haloexchange_fused_unpack_base_lam ); + buffer += len; + } + } + workgroup group_unpack = pool_unpack.instantiate(); + worksite site_unpack = group_unpack.run(); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n HALOEXCHANGE_FUSED : Unknown variant id = " << vid << std::endl; + } + + } +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/LTIMES-StdPar.cpp b/src/apps/LTIMES-StdPar.cpp new file mode 100644 index 000000000..59422d859 --- /dev/null +++ b/src/apps/LTIMES-StdPar.cpp @@ -0,0 +1,137 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "LTIMES.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void LTIMES::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + LTIMES_DATA_SETUP; + + auto begin = counting_iterator(0); + auto end = counting_iterator(num_z); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type z) { + for (Index_type g = 0; g < num_g; ++g ) { + for (Index_type m = 0; m < num_m; ++m ) { + for (Index_type d = 0; d < num_d; ++d ) { + LTIMES_BODY; + } + } + } + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto ltimes_base_lam = [=](Index_type d, Index_type z, + Index_type g, Index_type m) { + LTIMES_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type z) { + for (Index_type g = 0; g < num_g; ++g ) { + for (Index_type m = 0; m < num_m; ++m ) { + for (Index_type d = 0; d < num_d; ++d ) { + ltimes_base_lam(d, z, g, m); + } + } + } + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + LTIMES_VIEWS_RANGES_RAJA; + + auto ltimes_lam = [=](ID d, IZ z, IG g, IM m) { + LTIMES_BODY_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<1, RAJA::loop_exec, // z + RAJA::statement::For<2, RAJA::loop_exec, // g + RAJA::statement::For<3, RAJA::loop_exec, // m + RAJA::statement::For<0, RAJA::loop_exec, // d + RAJA::statement::Lambda<0> + > + > + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel( RAJA::make_tuple(IDRange(0, num_d), + IZRange(0, num_z), + IGRange(0, num_g), + IMRange(0, num_m)), + ltimes_lam + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n LTIMES : Unknown variant id = " << vid << std::endl; + } + + } +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/LTIMES_NOVIEW-StdPar.cpp b/src/apps/LTIMES_NOVIEW-StdPar.cpp new file mode 100644 index 000000000..4039f4ffc --- /dev/null +++ b/src/apps/LTIMES_NOVIEW-StdPar.cpp @@ -0,0 +1,131 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "LTIMES_NOVIEW.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void LTIMES_NOVIEW::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + LTIMES_NOVIEW_DATA_SETUP; + + auto begin = counting_iterator(0); + auto end = counting_iterator(num_z); + + auto ltimesnoview_lam = [=](Index_type d, Index_type z, + Index_type g, Index_type m) { + LTIMES_NOVIEW_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type z) { + for (Index_type g = 0; g < num_g; ++g ) { + for (Index_type m = 0; m < num_m; ++m ) { + for (Index_type d = 0; d < num_d; ++d ) { + LTIMES_NOVIEW_BODY; + } + } + } + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type z) { + for (Index_type g = 0; g < num_g; ++g ) { + for (Index_type m = 0; m < num_m; ++m ) { + for (Index_type d = 0; d < num_d; ++d ) { + ltimesnoview_lam(d, z, g, m); + } + } + } + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<1, RAJA::loop_exec, // z + RAJA::statement::For<2, RAJA::loop_exec, // g + RAJA::statement::For<3, RAJA::loop_exec, // m + RAJA::statement::For<0, RAJA::loop_exec, // d + RAJA::statement::Lambda<0> + > + > + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment(0, num_d), + RAJA::RangeSegment(0, num_z), + RAJA::RangeSegment(0, num_g), + RAJA::RangeSegment(0, num_m)), + ltimesnoview_lam + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n LTIMES_NOVIEW : Unknown variant id = " << vid << std::endl; + } + + } +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/MASS3DPA-StdPar.cpp b/src/apps/MASS3DPA-StdPar.cpp new file mode 100644 index 000000000..8c8a6a328 --- /dev/null +++ b/src/apps/MASS3DPA-StdPar.cpp @@ -0,0 +1,231 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MASS3DPA.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf { +namespace apps { + +//#define USE_RAJA_UNROLL +#define RAJA_DIRECT_PRAGMA(X) _Pragma(#X) +#if defined(USE_RAJA_UNROLL) +#define RAJA_UNROLL(N) RAJA_DIRECT_PRAGMA(unroll(N)) +#else +#define RAJA_UNROLL(N) +#endif +#define CPU_FOREACH(i, k, N) for (int i = 0; i < N; i++) + +void MASS3DPA::runStdParVariant(VariantID vid, size_t tune_idx) { +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + MASS3DPA_DATA_SETUP; + + switch (vid) { + + case Base_StdPar: { + + auto begin = counting_iterator(0); + auto end = counting_iterator((int)NE); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](int e) { + + MASS3DPA_0_CPU + + CPU_FOREACH(dy, y, MPA_D1D) { + CPU_FOREACH(dx, x, MPA_D1D){ + MASS3DPA_1 + } + CPU_FOREACH(dx, x, MPA_Q1D) { + MASS3DPA_2 + } + } + + CPU_FOREACH(dy, y, MPA_D1D) { + CPU_FOREACH(qx, x, MPA_Q1D) { + MASS3DPA_3 + } + } + + CPU_FOREACH(qy, y, MPA_Q1D) { + CPU_FOREACH(qx, x, MPA_Q1D) { + MASS3DPA_4 + } + } + + CPU_FOREACH(qy, y, MPA_Q1D) { + CPU_FOREACH(qx, x, MPA_Q1D) { + MASS3DPA_5 + } + } + + CPU_FOREACH(d, y, MPA_D1D) { + CPU_FOREACH(q, x, MPA_Q1D) { + MASS3DPA_6 + } + } + + CPU_FOREACH(qy, y, MPA_Q1D) { + CPU_FOREACH(dx, x, MPA_D1D) { + MASS3DPA_7 + } + } + + CPU_FOREACH(dy, y, MPA_D1D) { + CPU_FOREACH(dx, x, MPA_D1D) { + MASS3DPA_8 + } + } + + CPU_FOREACH(dy, y, MPA_D1D) { + CPU_FOREACH(dx, x, MPA_D1D) { + MASS3DPA_9 + } + } + + }); // element loop + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar: { + + //Currently Teams requires two policies if compiled with a device + using launch_policy = RAJA::expt::LaunchPolicy; + + using outer_x = RAJA::expt::LoopPolicy; + + using inner_x = RAJA::expt::LoopPolicy; + + using inner_y = RAJA::expt::LoopPolicy; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::expt::launch( + RAJA::expt::HOST, RAJA::expt::Resources(), + [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, NE), [&](int e) { + + MASS3DPA_0_CPU + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { + MASS3DPA_1 + }); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int dx) { + MASS3DPA_2 + }); + }); + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { + MASS3DPA_3 + }); + }); + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { + MASS3DPA_4 + }); + }); + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { + MASS3DPA_5 + }); + }); + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int d) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int q) { + MASS3DPA_6 + }); + }); + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { + MASS3DPA_7 + }); + }); + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { + MASS3DPA_8 + }); + }); + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { + MASS3DPA_9 + }); + }); + }); + }); + } + stopTimer(); + + return; + } +#endif // RUN_RAJA_STDPAR + + default: + std::cout << "\n MASS3DPA : Unknown StdPar variant id = " << vid << std::endl; + } +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/PRESSURE-StdPar.cpp b/src/apps/PRESSURE-StdPar.cpp new file mode 100644 index 000000000..cc1eb2c0a --- /dev/null +++ b/src/apps/PRESSURE-StdPar.cpp @@ -0,0 +1,126 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "PRESSURE.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void PRESSURE::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + PRESSURE_DATA_SETUP; + + auto pressure_lam1 = [=](Index_type i) { + PRESSURE_BODY1; + }; + auto pressure_lam2 = [=](Index_type i) { + PRESSURE_BODY2; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + PRESSURE_BODY1; + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + PRESSURE_BODY2; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + pressure_lam1(i); + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + pressure_lam2(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::region( [=]() { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), pressure_lam1); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), pressure_lam2); + + }); // end sequential region (for single-source code) + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n PRESSURE : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/VOL3D-StdPar.cpp b/src/apps/VOL3D-StdPar.cpp new file mode 100644 index 000000000..1997e95cf --- /dev/null +++ b/src/apps/VOL3D-StdPar.cpp @@ -0,0 +1,111 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "VOL3D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "AppsData.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void VOL3D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = m_domain->fpz; + const Index_type iend = m_domain->lpz+1; + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + VOL3D_DATA_SETUP; + + NDPTRSET(m_domain->jp, m_domain->kp, x,x0,x1,x2,x3,x4,x5,x6,x7) ; + NDPTRSET(m_domain->jp, m_domain->kp, y,y0,y1,y2,y3,y4,y5,y6,y7) ; + NDPTRSET(m_domain->jp, m_domain->kp, z,z0,z1,z2,z3,z4,z5,z6,z7) ; + + auto vol3d_lam = [=](Index_type i) { + VOL3D_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + VOL3D_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + vol3d_lam(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), vol3d_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n VOL3D : Unknown variant id = " << vid << std::endl; + } + } + +#else + RAJA_UNUSED_VAR(vid); +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/basic/DAXPY-StdPar.cpp b/src/basic/DAXPY-StdPar.cpp new file mode 100644 index 000000000..61ed338ec --- /dev/null +++ b/src/basic/DAXPY-StdPar.cpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DAXPY.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void DAXPY::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + DAXPY_DATA_SETUP; + + auto daxpy_lam = [=](Index_type i) { + DAXPY_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + DAXPY_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + daxpy_lam(i); + }); + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), daxpy_lam); + + } + stopTimer(); + + break; + } +#endif + + default : { + std::cout << "\n DAXPY : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/IF_QUAD-StdPar.cpp b/src/basic/IF_QUAD-StdPar.cpp new file mode 100644 index 000000000..3c86353ef --- /dev/null +++ b/src/basic/IF_QUAD-StdPar.cpp @@ -0,0 +1,104 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "IF_QUAD.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void IF_QUAD::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + IF_QUAD_DATA_SETUP; + + auto ifquad_lam = [=](Index_type i) { + IF_QUAD_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + IF_QUAD_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + ifquad_lam(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), ifquad_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n IF_QUAD : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/INIT3-StdPar.cpp b/src/basic/INIT3-StdPar.cpp new file mode 100644 index 000000000..7105fc9d3 --- /dev/null +++ b/src/basic/INIT3-StdPar.cpp @@ -0,0 +1,104 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INIT3.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void INIT3::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + INIT3_DATA_SETUP; + + auto init3_lam = [=](Index_type i) { + INIT3_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + INIT3_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + init3_lam(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), init3_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n INIT3 : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/INIT_VIEW1D-StdPar.cpp b/src/basic/INIT_VIEW1D-StdPar.cpp new file mode 100644 index 000000000..c79d29b97 --- /dev/null +++ b/src/basic/INIT_VIEW1D-StdPar.cpp @@ -0,0 +1,110 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INIT_VIEW1D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void INIT_VIEW1D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + INIT_VIEW1D_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + INIT_VIEW1D_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto initview1d_base_lam = [=](Index_type i) { + INIT_VIEW1D_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + initview1d_base_lam(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + INIT_VIEW1D_VIEW_RAJA; + + auto initview1d_lam = [=](Index_type i) { + INIT_VIEW1D_BODY_RAJA; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), initview1d_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n INIT_VIEW1D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp new file mode 100644 index 000000000..4014ccacd --- /dev/null +++ b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp @@ -0,0 +1,110 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INIT_VIEW1D_OFFSET.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void INIT_VIEW1D_OFFSET::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 1; + const Index_type iend = getActualProblemSize()+1; + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + INIT_VIEW1D_OFFSET_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + INIT_VIEW1D_OFFSET_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto initview1doffset_base_lam = [=](Index_type i) { + INIT_VIEW1D_OFFSET_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + initview1doffset_base_lam(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + INIT_VIEW1D_OFFSET_VIEW_RAJA; + + auto initview1doffset_lam = [=](Index_type i) { + INIT_VIEW1D_OFFSET_BODY_RAJA; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), initview1doffset_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n INIT_VIEW1D_OFFSET : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/MAT_MAT_SHARED-StdPar.cpp b/src/basic/MAT_MAT_SHARED-StdPar.cpp new file mode 100644 index 000000000..02cb8622d --- /dev/null +++ b/src/basic/MAT_MAT_SHARED-StdPar.cpp @@ -0,0 +1,254 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MAT_MAT_SHARED.hpp" + +#include + +namespace rajaperf { +namespace basic { + +void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) { + + const Index_type run_reps = getRunReps(); + const Index_type N = m_N; + + MAT_MAT_SHARED_DATA_SETUP; + const Index_type Nx = RAJA_DIVIDE_CEILING_INT(N, TL_SZ); + const Index_type Ny = RAJA_DIVIDE_CEILING_INT(N, TL_SZ); + + switch (vid) { + + case Base_StdPar: { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type by = 0; by < Ny; ++by) { + for (Index_type bx = 0; bx < Nx; ++bx) { + + //Work around for when compiling with CLANG and HIP + //See notes in MAT_MAT_SHARED.hpp + MAT_MAT_SHARED_BODY_0_CLANG_HIP_CPU(TL_SZ) + + for (Index_type ty = 0; ty < TL_SZ; ++ty) { + for (Index_type tx = 0; tx < TL_SZ; ++tx) { + MAT_MAT_SHARED_BODY_1(TL_SZ) + } + } + + for (Index_type k = 0; k < (TL_SZ + N - 1) / TL_SZ; ++k) { + + for (Index_type ty = 0; ty < TL_SZ; ++ty) { + for (Index_type tx = 0; tx < TL_SZ; ++tx) { + MAT_MAT_SHARED_BODY_2(TL_SZ) + } + } + + for (Index_type ty = 0; ty < TL_SZ; ++ty) { + for (Index_type tx = 0; tx < TL_SZ; ++tx) { + MAT_MAT_SHARED_BODY_3(TL_SZ) + } + } + + } // Sequential loop + + for (Index_type ty = 0; ty < TL_SZ; ++ty) { + for (Index_type tx = 0; tx < TL_SZ; ++tx) { + MAT_MAT_SHARED_BODY_4(TL_SZ) + } + } + } + } + + } // number of iterations + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case Lambda_StdPar: { + + + startTimer(); + for (Index_type irep = 0; irep < run_reps; ++irep) { + + auto outer_y = [&](Index_type by) { + auto outer_x = [&](Index_type bx) { + + MAT_MAT_SHARED_BODY_0_CLANG_HIP_CPU(TL_SZ) + + auto inner_y_1 = [&](Index_type ty) { + auto inner_x_1 = [&](Index_type tx) { MAT_MAT_SHARED_BODY_1(TL_SZ) }; + + for (Index_type tx = 0; tx < TL_SZ; ++tx) { + if (tx < TL_SZ) + inner_x_1(tx); + } + }; + + for (Index_type ty = 0; ty < TL_SZ; ++ty) { + if (ty < TL_SZ) + inner_y_1(ty); + } + + for (Index_type k = 0; k < (TL_SZ + N - 1) / TL_SZ; ++k) { + + auto inner_y_2 = [&](Index_type ty) { + auto inner_x_2 = [&](Index_type tx) { MAT_MAT_SHARED_BODY_2(TL_SZ) }; + + for (Index_type tx = 0; tx < TL_SZ; ++tx) { + inner_x_2(tx); + } + }; + + for (Index_type ty = 0; ty < TL_SZ; ++ty) { + inner_y_2(ty); + } + + auto inner_y_3 = [&](Index_type ty) { + auto inner_x_3 = [&](Index_type tx) { MAT_MAT_SHARED_BODY_3(TL_SZ) }; + + for (Index_type tx = 0; tx < TL_SZ; ++tx) { + inner_x_3(tx); + } + }; + + for (Index_type ty = 0; ty < TL_SZ; ++ty) { + inner_y_3(ty); + } + } + + auto inner_y_4 = [&](Index_type ty) { + auto inner_x_4 = [&](Index_type tx) { MAT_MAT_SHARED_BODY_4(TL_SZ) }; + + for (Index_type tx = 0; tx < TL_SZ; ++tx) { + inner_x_4(tx); + } + }; + + for (Index_type ty = 0; ty < TL_SZ; ++ty) { + inner_y_4(ty); + } + }; // outer_x + + for (Index_type bx = 0; bx < Nx; ++bx) { + outer_x(bx); + } + }; + + for (Index_type by = 0; by < Ny; ++by) { + outer_y(by); + } + + } // irep + stopTimer(); + + break; + } + + case RAJA_Sq: { + + using launch_policy = RAJA::expt::LaunchPolicy; + + using outer_x = RAJA::expt::LoopPolicy; + + using outer_y = RAJA::expt::LoopPolicy; + + using inner_x = RAJA::expt::LoopPolicy; + + using inner_y = RAJA::expt::LoopPolicy; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + //Grid is empty as the host does not need a compute grid to be specified + RAJA::expt::launch(RAJA::expt::Grid(), + [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, Ny), + [&](Index_type by) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, Nx), + [&](Index_type bx) { + + MAT_MAT_SHARED_BODY_0(TL_SZ) + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + + [&](Index_type ty) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + [&](Index_type tx) { + MAT_MAT_SHARED_BODY_1(TL_SZ) + } + ); // RAJA::expt::loop + } + ); // RAJA::expt::loop + + for (Index_type k = 0; k < (TL_SZ + N - 1) / TL_SZ; k++) { + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + [&](Index_type ty) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + [&](Index_type tx) { + MAT_MAT_SHARED_BODY_2(TL_SZ) + } + ); // RAJA::expt::loop + } + ); // RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + [&](Index_type ty) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + [&](Index_type tx) { + MAT_MAT_SHARED_BODY_3(TL_SZ) + } + ); // RAJA::expt::loop + } + ); // RAJA::expt::loop + + ctx.teamSync(); + + } // for (k) + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + [&](Index_type ty) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + [&](Index_type tx) { + MAT_MAT_SHARED_BODY_4(TL_SZ) + } + ); // RAJA::expt::loop + } + ); // RAJA::expt::loop + + } // lambda (bx) + ); // RAJA::expt::loop + } // lambda (by) + ); // RAJA::expt::loop + + } // outer lambda (ctx) + ); // RAJA::expt::launch + + } // loop over kernel reps + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default: { + std::cout << "\n MAT_MAT_SHARED : Unknown variant id = " << vid + << std::endl; + } + } +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/MULADDSUB-StdPar.cpp b/src/basic/MULADDSUB-StdPar.cpp new file mode 100644 index 000000000..e86287d75 --- /dev/null +++ b/src/basic/MULADDSUB-StdPar.cpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MULADDSUB.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void MULADDSUB::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + MULADDSUB_DATA_SETUP; + + auto mas_lam = [=](Index_type i) { + MULADDSUB_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + MULADDSUB_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + mas_lam(i); + }); + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), mas_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n MULADDSUB : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/NESTED_INIT-StdPar.cpp b/src/basic/NESTED_INIT-StdPar.cpp new file mode 100644 index 000000000..a37a88dda --- /dev/null +++ b/src/basic/NESTED_INIT-StdPar.cpp @@ -0,0 +1,150 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "NESTED_INIT.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void NESTED_INIT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + NESTED_INIT_DATA_SETUP; + +#ifdef USE_STDPAR_COLLAPSE + auto begin = counting_iterator(0); + auto end = counting_iterator(ni*nj*nk); +#else + auto begin = counting_iterator(0); + auto end = counting_iterator(nk); +#endif + + auto nestedinit_lam = [=](Index_type i, Index_type j, Index_type k) { + NESTED_INIT_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type idx) { + const auto k = idx / (nj*ni); + const auto ij = idx % (nj*ni); + const auto j = ij / ni; + const auto i = ij % ni; +#else + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type k) { + for (Index_type j = 0; j < nj; ++j ) + for (Index_type i = 0; i < ni; ++i ) +#endif + { + NESTED_INIT_BODY; + //std::cout << i << "," << j << "," << k << ";" << idx << " PAR\n"; + } + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type idx) { + const auto k = idx / (nj*ni); + const auto ij = idx % (nj*ni); + const auto j = ij / ni; + const auto i = ij % ni; +#else + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type k) { + for (Index_type j = 0; j < nj; ++j ) + for (Index_type i = 0; i < ni; ++i ) +#endif + { + nestedinit_lam(i, j, k); + } + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<2, RAJA::loop_exec, // k + RAJA::statement::For<1, RAJA::loop_exec, // j + RAJA::statement::For<0, RAJA::loop_exec,// i + RAJA::statement::Lambda<0> + > + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment(0, ni), + RAJA::RangeSegment(0, nj), + RAJA::RangeSegment(0, nk)), + nestedinit_lam + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n NESTED_INIT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/PI_ATOMIC-StdPar.cpp b/src/basic/PI_ATOMIC-StdPar.cpp new file mode 100644 index 000000000..27b7557bf --- /dev/null +++ b/src/basic/PI_ATOMIC-StdPar.cpp @@ -0,0 +1,121 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "PI_ATOMIC.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(__NVCOMPILER_CUDA__) || defined(_NVHPC_STDPAR_CUDA) +#include +typedef cuda::std::atomic myAtomic; +#else +#include +typedef std::atomic myAtomic; +#endif + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + PI_ATOMIC_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + myAtomic a_pi{m_pi_init}; + std::for_each( std::execution::par_unseq, + begin, end, + [=,&a_pi](Index_type i) { + double x = (double(i) + 0.5) * dx; + a_pi = a_pi + dx / (1.0 + x * x); + }); + *pi = a_pi * 4.0; + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto piatomic_base_lam = [=](Index_type i, myAtomic &a_pi) { + double x = (double(i) + 0.5) * dx; + a_pi = a_pi + dx / (1.0 + x * x); + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + myAtomic a_pi{m_pi_init}; + for (Index_type i = ibegin; i < iend; ++i ) { + piatomic_base_lam(i,a_pi); + } + *pi = a_pi * 4.0; + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + *pi = m_pi_init; + RAJA::forall( RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + double x = (double(i) + 0.5) * dx; + RAJA::atomicAdd(pi, dx / (1.0 + x * x)); + }); + *pi *= 4.0; + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/PI_REDUCE-StdPar.cpp b/src/basic/PI_REDUCE-StdPar.cpp new file mode 100644 index 000000000..b2c075278 --- /dev/null +++ b/src/basic/PI_REDUCE-StdPar.cpp @@ -0,0 +1,118 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "PI_REDUCE.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void PI_REDUCE::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + PI_REDUCE_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type pi = m_pi_init; + pi += std::transform_reduce( std::execution::par_unseq, + begin, end, + Real_type(0), std::plus(), + [=](Index_type i) { + Real_type x = (Real_type(i) + 0.5) * dx; + return dx / (1.0 + x * x); + }); + m_pi = 4.0 * pi; + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto pireduce_base_lam = [=](Index_type i) -> Real_type { + Real_type x = (Real_type(i) + 0.5) * dx; + return dx / (1.0 + x * x); + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type pi = m_pi_init; + + pi += std::transform_reduce( std::execution::par_unseq, + begin, end, + Real_type(0), std::plus(), pireduce_base_lam); + + m_pi = 4.0 * pi; + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::ReduceSum pi(m_pi_init); + + RAJA::forall( RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + PI_REDUCE_BODY; + }); + + m_pi = 4.0 * pi.get(); + + } + stopTimer(); + + break; + } +#endif + + default : { + std::cout << "\n PI_REDUCE : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/REDUCE3_INT-StdPar.cpp b/src/basic/REDUCE3_INT-StdPar.cpp new file mode 100644 index 000000000..b40129c17 --- /dev/null +++ b/src/basic/REDUCE3_INT-StdPar.cpp @@ -0,0 +1,141 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "REDUCE3_INT.hpp" + +#include "RAJA/RAJA.hpp" + +#include +#include "common/StdParUtils.hpp" +#include +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void REDUCE3_INT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + REDUCE3_INT_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + typedef std::array Reduce_type; + Reduce_type result = + std::transform_reduce( std::execution::par_unseq, + begin, end, + Reduce_type{m_vsum_init,m_vmin_init,m_vmax_init}, + [=](Reduce_type a, Reduce_type b) -> Reduce_type { + auto plus = a[0] + b[0]; + auto min = std::min(a[1],b[1]); + auto max = std::max(a[2],b[2]); + Reduce_type red{ plus, min, max }; + return red; + }, + [=](Index_type i) -> std::array{ + Reduce_type val{ vec[i], vec[i], vec[i] }; + return val; + + } + ); + + m_vsum += result[0]; + m_vmin = RAJA_MIN(m_vmin, result[1]); + m_vmax = RAJA_MAX(m_vmax, result[2]); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto init3_base_lam = [=](Index_type i) -> Int_type { + return vec[i]; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Int_type vsum = m_vsum_init; + Int_type vmin = m_vmin_init; + Int_type vmax = m_vmax_init; + + for (Index_type i = ibegin; i < iend; ++i ) { + vsum += init3_base_lam(i); + vmin = RAJA_MIN(vmin, init3_base_lam(i)); + vmax = RAJA_MAX(vmax, init3_base_lam(i)); + } + + m_vsum += vsum; + m_vmin = RAJA_MIN(m_vmin, vmin); + m_vmax = RAJA_MAX(m_vmax, vmax); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::ReduceSum vsum(m_vsum_init); + RAJA::ReduceMin vmin(m_vmin_init); + RAJA::ReduceMax vmax(m_vmax_init); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { + REDUCE3_INT_BODY_RAJA; + }); + + m_vsum += static_cast(vsum.get()); + m_vmin = RAJA_MIN(m_vmin, static_cast(vmin.get())); + m_vmax = RAJA_MAX(m_vmax, static_cast(vmax.get())); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n REDUCE3_INT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/TRAP_INT-StdPar.cpp b/src/basic/TRAP_INT-StdPar.cpp new file mode 100644 index 000000000..359ed363a --- /dev/null +++ b/src/basic/TRAP_INT-StdPar.cpp @@ -0,0 +1,132 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "TRAP_INT.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + +// +// Function used in TRAP_INT loop. +// +RAJA_INLINE +Real_type trap_int_func(Real_type x, + Real_type y, + Real_type xp, + Real_type yp) +{ + Real_type denom = (x - xp)*(x - xp) + (y - yp)*(y - yp); + denom = 1.0/sqrt(denom); + return denom; +} + +void TRAP_INT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + TRAP_INT_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type sumx = m_sumx_init; + + sumx += std::transform_reduce( std::execution::par_unseq, + begin, end, + Real_type(0), std::plus(), + [=](Index_type i) { + Real_type x = x0 + i*h; + return trap_int_func(x, y, xp, yp); + }); + m_sumx += sumx * h; + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto trapint_base_lam = [=](Index_type i) -> Real_type { + Real_type x = x0 + i*h; + return trap_int_func(x, y, xp, yp); + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type sumx = m_sumx_init; + + sumx += std::transform_reduce( std::execution::par_unseq, + begin, end, + Real_type(0), std::plus(), trapint_base_lam); + + m_sumx += sumx * h; + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::ReduceSum sumx(m_sumx_init); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { + TRAP_INT_BODY; + }); + + m_sumx += static_cast(sumx.get()) * h; + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n TRAP_INT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/lcals/DIFF_PREDICT-StdPar.cpp b/src/lcals/DIFF_PREDICT-StdPar.cpp new file mode 100644 index 000000000..b86723185 --- /dev/null +++ b/src/lcals/DIFF_PREDICT-StdPar.cpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DIFF_PREDICT.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void DIFF_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + DIFF_PREDICT_DATA_SETUP; + + auto diffpredict_lam = [=](Index_type i) { + DIFF_PREDICT_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + DIFF_PREDICT_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + diffpredict_lam(i); + }); + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), diffpredict_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n DIFF_PREDICT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals/EOS-StdPar.cpp b/src/lcals/EOS-StdPar.cpp new file mode 100644 index 000000000..a3aa279f2 --- /dev/null +++ b/src/lcals/EOS-StdPar.cpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "EOS.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void EOS::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + EOS_DATA_SETUP; + + auto eos_lam = [=](Index_type i) { + EOS_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + EOS_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + eos_lam(i); + }); + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), eos_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n EOS : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals/FIRST_DIFF-StdPar.cpp b/src/lcals/FIRST_DIFF-StdPar.cpp new file mode 100644 index 000000000..1a2d15e6c --- /dev/null +++ b/src/lcals/FIRST_DIFF-StdPar.cpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "FIRST_DIFF.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void FIRST_DIFF::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + FIRST_DIFF_DATA_SETUP; + + auto firstdiff_lam = [=](Index_type i) { + FIRST_DIFF_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + FIRST_DIFF_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + firstdiff_lam(i); + }); + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), firstdiff_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n FIRST_DIFF : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals/FIRST_MIN-StdPar.cpp b/src/lcals/FIRST_MIN-StdPar.cpp new file mode 100644 index 000000000..4a019b5b3 --- /dev/null +++ b/src/lcals/FIRST_MIN-StdPar.cpp @@ -0,0 +1,114 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "FIRST_MIN.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void FIRST_MIN::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + FIRST_MIN_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + auto result = + std::min_element( std::execution::par_unseq, + &x[ibegin], &x[iend]); + auto loc = std::distance(&x[ibegin], result); + + m_minloc = RAJA_MAX(m_minloc, loc); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto firstmin_base_lam = [=](Index_type i) -> Real_type { + return x[i]; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + FIRST_MIN_MINLOC_INIT; + + for (Index_type i = ibegin; i < iend; ++i ) { + if ( firstmin_base_lam(i) < mymin.val ) { \ + mymin.val = x[i]; \ + mymin.loc = i; \ + } + } + + m_minloc = RAJA_MAX(m_minloc, mymin.loc); + + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::ReduceMinLoc loc( + m_xmin_init, m_initloc); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { + FIRST_MIN_BODY_RAJA; + }); + + m_minloc = RAJA_MAX(m_minloc, loc.getLoc()); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n FIRST_MIN : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals/FIRST_SUM-StdPar.cpp b/src/lcals/FIRST_SUM-StdPar.cpp new file mode 100644 index 000000000..1f47f9412 --- /dev/null +++ b/src/lcals/FIRST_SUM-StdPar.cpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "FIRST_SUM.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void FIRST_SUM::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 1; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + FIRST_SUM_DATA_SETUP; + + auto firstsum_lam = [=](Index_type i) { + FIRST_SUM_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + FIRST_SUM_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + firstsum_lam(i); + }); + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), firstsum_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n FIRST_SUM : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals/GEN_LIN_RECUR-StdPar.cpp b/src/lcals/GEN_LIN_RECUR-StdPar.cpp new file mode 100644 index 000000000..f1cd69a0d --- /dev/null +++ b/src/lcals/GEN_LIN_RECUR-StdPar.cpp @@ -0,0 +1,126 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "GEN_LIN_RECUR.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void GEN_LIN_RECUR::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + GEN_LIN_RECUR_DATA_SETUP; + + auto beginK = counting_iterator(0); + auto endK = counting_iterator(N); + auto beginI = counting_iterator(1); + auto endI = counting_iterator(N+1); + + auto genlinrecur_lam1 = [=](Index_type k) { + GEN_LIN_RECUR_BODY1; + }; + auto genlinrecur_lam2 = [=](Index_type i) { + GEN_LIN_RECUR_BODY2; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + //for (Index_type k = 0; k < N; ++k ) { + std::for_each( std::execution::par_unseq, + beginK, endK, + [=](Index_type k) { + GEN_LIN_RECUR_BODY1; + }); + + //for (Index_type i = 1; i < N+1; ++i ) { + std::for_each( std::execution::par_unseq, + beginI, endI, + [=](Index_type i) { + GEN_LIN_RECUR_BODY2; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + //for (Index_type k = 0; k < N; ++k ) { + std::for_each( std::execution::par_unseq, + beginK, endK, + [=](Index_type k) { + genlinrecur_lam1(k); + }); + + //for (Index_type i = 1; i < N+1; ++i ) { + std::for_each( std::execution::par_unseq, + beginI, endI, + [=](Index_type i) { + genlinrecur_lam2(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(0, N), genlinrecur_lam1); + + RAJA::forall( + RAJA::RangeSegment(1, N+1), genlinrecur_lam2); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n GEN_LIN_RECUR : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals/HYDRO_1D-StdPar.cpp b/src/lcals/HYDRO_1D-StdPar.cpp new file mode 100644 index 000000000..45601b347 --- /dev/null +++ b/src/lcals/HYDRO_1D-StdPar.cpp @@ -0,0 +1,104 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "HYDRO_1D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void HYDRO_1D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + HYDRO_1D_DATA_SETUP; + + auto hydro1d_lam = [=](Index_type i) { + HYDRO_1D_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + HYDRO_1D_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + hydro1d_lam(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), hydro1d_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n HYDRO_1D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals/HYDRO_2D-StdPar.cpp b/src/lcals/HYDRO_2D-StdPar.cpp new file mode 100644 index 000000000..1650dffd5 --- /dev/null +++ b/src/lcals/HYDRO_2D-StdPar.cpp @@ -0,0 +1,195 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "HYDRO_2D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void HYDRO_2D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type kbeg = 1; + const Index_type kend = m_kn - 1; + const Index_type jbeg = 1; + const Index_type jend = m_jn - 1; + + auto beginK = counting_iterator(kbeg); + auto endK = counting_iterator(kend); + auto beginJ = counting_iterator(jbeg); + auto endJ = counting_iterator(jend); + + HYDRO_2D_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par, + beginK, endK, + [=](Index_type k) { + std::for_each( std::execution::unseq, + beginJ, endJ, + [=](Index_type j) { + HYDRO_2D_BODY1; + }); + }); + + std::for_each( std::execution::par, + beginK, endK, + [=](Index_type k) { + std::for_each( std::execution::unseq, + beginJ, endJ, + [=](Index_type j) { + HYDRO_2D_BODY2; + }); + }); + + std::for_each( std::execution::par, + beginK, endK, + [=](Index_type k) { + std::for_each( std::execution::unseq, + beginJ, endJ, + [=](Index_type j) { + HYDRO_2D_BODY3; + }); + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto hydro2d_base_lam1 = [=] (Index_type k, Index_type j) { + HYDRO_2D_BODY1; + }; + auto hydro2d_base_lam2 = [=] (Index_type k, Index_type j) { + HYDRO_2D_BODY2; + }; + auto hydro2d_base_lam3 = [=] (Index_type k, Index_type j) { + HYDRO_2D_BODY3; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par, + beginK, endK, + [=](Index_type k) { + std::for_each( std::execution::unseq, + beginJ, endJ, + [=](Index_type j) { + hydro2d_base_lam1(k, j); + }); + }); + + std::for_each( std::execution::par, + beginK, endK, + [=](Index_type k) { + std::for_each( std::execution::unseq, + beginJ, endJ, + [=](Index_type j) { + hydro2d_base_lam2(k, j); + }); + }); + + std::for_each( std::execution::par, + beginK, endK, + [=](Index_type k) { + std::for_each( std::execution::unseq, + beginJ, endJ, + [=](Index_type j) { + hydro2d_base_lam3(k, j); + }); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + HYDRO_2D_VIEWS_RAJA; + + auto hydro2d_lam1 = [=] (Index_type k, Index_type j) { + HYDRO_2D_BODY1_RAJA; + }; + auto hydro2d_lam2 = [=] (Index_type k, Index_type j) { + HYDRO_2D_BODY2_RAJA; + }; + auto hydro2d_lam3 = [=] (Index_type k, Index_type j) { + HYDRO_2D_BODY3_RAJA; + }; + + using EXECPOL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, // k + RAJA::statement::For<1, RAJA::loop_exec, // j + RAJA::statement::Lambda<0> + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel( + RAJA::make_tuple( RAJA::RangeSegment(kbeg, kend), + RAJA::RangeSegment(jbeg, jend)), + hydro2d_lam1); + + RAJA::kernel( + RAJA::make_tuple( RAJA::RangeSegment(kbeg, kend), + RAJA::RangeSegment(jbeg, jend)), + hydro2d_lam2); + + RAJA::kernel( + RAJA::make_tuple( RAJA::RangeSegment(kbeg, kend), + RAJA::RangeSegment(jbeg, jend)), + hydro2d_lam3); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n HYDRO_2D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals/INT_PREDICT-StdPar.cpp b/src/lcals/INT_PREDICT-StdPar.cpp new file mode 100644 index 000000000..d8139dfbe --- /dev/null +++ b/src/lcals/INT_PREDICT-StdPar.cpp @@ -0,0 +1,104 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INT_PREDICT.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void INT_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + INT_PREDICT_DATA_SETUP; + + auto intpredict_lam = [=](Index_type i) { + INT_PREDICT_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + INT_PREDICT_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + intpredict_lam(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), intpredict_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n INT_PREDICT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals/PLANCKIAN-StdPar.cpp b/src/lcals/PLANCKIAN-StdPar.cpp new file mode 100644 index 000000000..3d937bb22 --- /dev/null +++ b/src/lcals/PLANCKIAN-StdPar.cpp @@ -0,0 +1,105 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "PLANCKIAN.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void PLANCKIAN::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + PLANCKIAN_DATA_SETUP; + + auto planckian_lam = [=](Index_type i) { + PLANCKIAN_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + PLANCKIAN_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + planckian_lam(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), planckian_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n PLANCKIAN : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals/TRIDIAG_ELIM-StdPar.cpp b/src/lcals/TRIDIAG_ELIM-StdPar.cpp new file mode 100644 index 000000000..ff1986bc1 --- /dev/null +++ b/src/lcals/TRIDIAG_ELIM-StdPar.cpp @@ -0,0 +1,104 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "TRIDIAG_ELIM.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void TRIDIAG_ELIM::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 1; + const Index_type iend = m_N; + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + TRIDIAG_ELIM_DATA_SETUP; + + auto tridiag_elim_lam = [=](Index_type i) { + TRIDIAG_ELIM_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + TRIDIAG_ELIM_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + tridiag_elim_lam(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), tridiag_elim_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n TRIDIAG_ELIM : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_2MM-StdPar.cpp b/src/polybench/POLYBENCH_2MM-StdPar.cpp new file mode 100644 index 000000000..feb441614 --- /dev/null +++ b/src/polybench/POLYBENCH_2MM-StdPar.cpp @@ -0,0 +1,255 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_2MM.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +#define USE_STDPAR_COLLAPSE 1 + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_2MM::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_2MM_DATA_SETUP; + +#ifdef USE_STDPAR_COLLAPSE + counting_iterator beginIJ(0); + counting_iterator endIJ(ni*nj); + counting_iterator beginIL(0); + counting_iterator endIL(ni*nl); +#else + counting_iterator beginI(0); + counting_iterator endI(ni); + counting_iterator beginL(0); + counting_iterator endL(nl); +#endif + counting_iterator beginJ(0); + counting_iterator endJ(nj); + counting_iterator beginK(0); + counting_iterator endK(nk); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginIJ, endIJ, [=](Index_type ij) { + const auto i = ij / nj; + const auto j = ij % nj; +#else + std::for_each( std::execution::par_unseq, + beginI, endI, [=](Index_type i) { + std::for_each(beginJ, endJ, [=](Index_type j) { +#endif + POLYBENCH_2MM_BODY1; + std::for_each(beginK, endK, [=,&dot](Index_type k) { + POLYBENCH_2MM_BODY2; + }); + POLYBENCH_2MM_BODY3; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginIL, endIL, [=](Index_type il) { + const auto i = il / nl; + const auto l = il % nl; +#else + std::for_each( std::execution::par_unseq, + beginI, endI, [=](Index_type i) { + std::for_each(beginL, endL, [=](Index_type l) { +#endif + POLYBENCH_2MM_BODY4; + std::for_each(beginJ, endJ, [=,&dot](Index_type j) { + POLYBENCH_2MM_BODY5; + }); + POLYBENCH_2MM_BODY6; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_2mm_base_lam2 = [=](Index_type i, Index_type j, + Index_type k, Real_type &dot) { + POLYBENCH_2MM_BODY2; + }; + auto poly_2mm_base_lam3 = [=](Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_2MM_BODY3; + }; + auto poly_2mm_base_lam5 = [=](Index_type i, Index_type l, + Index_type j, Real_type &dot) { + POLYBENCH_2MM_BODY5; + }; + auto poly_2mm_base_lam6 = [=](Index_type i, Index_type l, + Real_type &dot) { + POLYBENCH_2MM_BODY6; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginIJ, endIJ, [=](Index_type ij) { + const auto i = ij / nj; + const auto j = ij % nj; +#else + std::for_each( std::execution::par_unseq, + beginI, endI, [=](Index_type i) { + std::for_each(beginJ, endJ, [=](Index_type j) { +#endif + POLYBENCH_2MM_BODY1; + std::for_each(beginK, endK, [=,&dot](Index_type k) { + poly_2mm_base_lam2(i, j, k, dot); + }); + poly_2mm_base_lam3(i, j, dot); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginIL, endIL, [=](Index_type il) { + const auto i = il / nl; + const auto l = il % nl; +#else + std::for_each( std::execution::par_unseq, + beginI, endI, [=](Index_type i) { + std::for_each(beginL, endL, [=](Index_type l) { +#endif + POLYBENCH_2MM_BODY4; + std::for_each(beginJ, endJ, [=,&dot](Index_type j) { + poly_2mm_base_lam5(i, l, j, dot); + }); + poly_2mm_base_lam6(i, l, dot); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_2MM_VIEWS_RAJA; + + auto poly_2mm_lam1 = [=](Real_type &dot) { + POLYBENCH_2MM_BODY1_RAJA; + }; + auto poly_2mm_lam2 = [=](Index_type i, Index_type j, Index_type k, + Real_type &dot) { + POLYBENCH_2MM_BODY2_RAJA; + }; + auto poly_2mm_lam3 = [=](Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_2MM_BODY3_RAJA; + }; + auto poly_2mm_lam4 = [=](Real_type &dot) { + POLYBENCH_2MM_BODY4_RAJA; + }; + auto poly_2mm_lam5 = [=](Index_type i, Index_type l, Index_type j, + Real_type &dot) { + POLYBENCH_2MM_BODY5_RAJA; + }; + auto poly_2mm_lam6 = [=](Index_type i, Index_type l, + Real_type &dot) { + POLYBENCH_2MM_BODY6_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Params<0>>, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1,2>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0,1>, RAJA::Params<0>> + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, ni}, + RAJA::RangeSegment{0, nj}, + RAJA::RangeSegment{0, nk}), + RAJA::tuple{0.0}, + + poly_2mm_lam1, + poly_2mm_lam2, + poly_2mm_lam3 + ); + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, ni}, + RAJA::RangeSegment{0, nl}, + RAJA::RangeSegment{0, nj}), + RAJA::tuple{0.0}, + + poly_2mm_lam4, + poly_2mm_lam5, + poly_2mm_lam6 + + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_2MM : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_3MM-StdPar.cpp b/src/polybench/POLYBENCH_3MM-StdPar.cpp new file mode 100644 index 000000000..189caa032 --- /dev/null +++ b/src/polybench/POLYBENCH_3MM-StdPar.cpp @@ -0,0 +1,331 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_3MM.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +#define USE_STDPAR_COLLAPSE 1 + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_3MM_DATA_SETUP; + +#ifdef USE_STDPAR_COLLAPSE + counting_iterator beginIJ(0); + counting_iterator endIJ(ni*nj); + counting_iterator beginIL(0); + counting_iterator endIL(ni*nl); + counting_iterator beginJL(0); + counting_iterator endJL(nj*nl); +#else + counting_iterator beginI(0); + counting_iterator endI(ni); + counting_iterator beginL(0); + counting_iterator endL(nl); +#endif + counting_iterator beginJ(0); + counting_iterator endJ(nj); + counting_iterator beginK(0); + counting_iterator endK(nk); + counting_iterator beginM(0); + counting_iterator endM(nm); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginIJ, endIJ, [=](Index_type ij) { + const auto i = ij / nj; + const auto j = ij % nj; +#else + std::for_each( std::execution::par_unseq, + beginI, endI, [=](Index_type i) { + std::for_each(beginJ, endJ, [=](Index_type j) { +#endif + POLYBENCH_3MM_BODY1; + std::for_each(beginK, endK, [=,&dot](Index_type k) { + POLYBENCH_3MM_BODY2; + }); + POLYBENCH_3MM_BODY3; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginJL, endJL, [=](Index_type jl) { + const auto j = jl / nl; + const auto l = jl % nl; +#else + std::for_each( std::execution::par_unseq, + beginJ, endJ, [=](Index_type j) { + std::for_each(beginL, endL, [=](Index_type l) { +#endif + POLYBENCH_3MM_BODY4; + std::for_each(beginM, endM, [=,&dot](Index_type m) { + POLYBENCH_3MM_BODY5; + }); + POLYBENCH_3MM_BODY6; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginIL, endIL, [=](Index_type il) { + const auto i = il / nl; + const auto l = il % nl; +#else + std::for_each( std::execution::par_unseq, + beginI, endI, [=](Index_type i) { + std::for_each(beginL, endL, [=](Index_type l) { +#endif + POLYBENCH_3MM_BODY7; + std::for_each(beginJ, endJ, [=,&dot](Index_type j) { + POLYBENCH_3MM_BODY8; + }); + POLYBENCH_3MM_BODY9; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_3mm_base_lam2 = [=] (Index_type i, Index_type j, Index_type k, + Real_type &dot) { + POLYBENCH_3MM_BODY2; + }; + auto poly_3mm_base_lam3 = [=] (Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_3MM_BODY3; + }; + auto poly_3mm_base_lam5 = [=] (Index_type j, Index_type l, Index_type m, + Real_type &dot) { + POLYBENCH_3MM_BODY5; + }; + auto poly_3mm_base_lam6 = [=] (Index_type j, Index_type l, + Real_type &dot) { + POLYBENCH_3MM_BODY6; + }; + auto poly_3mm_base_lam8 = [=] (Index_type i, Index_type l, Index_type j, + Real_type &dot) { + POLYBENCH_3MM_BODY8; + }; + auto poly_3mm_base_lam9 = [=] (Index_type i, Index_type l, + Real_type &dot) { + POLYBENCH_3MM_BODY9; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginIJ, endIJ, [=](Index_type ij) { + const auto i = ij / nj; + const auto j = ij % nj; +#else + std::for_each( std::execution::par_unseq, + beginI, endI, [=](Index_type i) { + std::for_each(beginJ, endJ, [=](Index_type j) { +#endif + POLYBENCH_3MM_BODY1; + std::for_each(beginK, endK, [=,&dot](Index_type k) { + poly_3mm_base_lam2(i, j, k, dot); + }); + poly_3mm_base_lam3(i, j, dot); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginJL, endJL, [=](Index_type jl) { + const auto j = jl / nl; + const auto l = jl % nl; +#else + std::for_each( std::execution::par_unseq, + beginJ, endJ, [=](Index_type j) { + std::for_each(beginL, endL, [=](Index_type l) { +#endif + POLYBENCH_3MM_BODY4; + std::for_each(beginM, endM, [=,&dot](Index_type m) { + poly_3mm_base_lam5(j, l, m, dot); + }); + poly_3mm_base_lam6(j, l, dot); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginIL, endIL, [=](Index_type il) { + const auto i = il / nl; + const auto l = il % nl; +#else + std::for_each( std::execution::par_unseq, + beginI, endI, [=](Index_type i) { + std::for_each(beginL, endL, [=](Index_type l) { +#endif + POLYBENCH_3MM_BODY7; + std::for_each(beginJ, endJ, [=,&dot](Index_type j) { + poly_3mm_base_lam8(i, l, j, dot); + }); + poly_3mm_base_lam9(i, l, dot); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_3MM_VIEWS_RAJA; + + auto poly_3mm_lam1 = [=] (Real_type &dot) { + POLYBENCH_3MM_BODY1_RAJA; + }; + auto poly_3mm_lam2 = [=] (Index_type i, Index_type j, Index_type k, + Real_type &dot) { + POLYBENCH_3MM_BODY2_RAJA; + }; + auto poly_3mm_lam3 = [=] (Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_3MM_BODY3_RAJA; + }; + auto poly_3mm_lam4 = [=] (Real_type &dot) { + POLYBENCH_3MM_BODY4_RAJA; + }; + auto poly_3mm_lam5 = [=] (Index_type j, Index_type l, Index_type m, + Real_type &dot) { + POLYBENCH_3MM_BODY5_RAJA; + }; + auto poly_3mm_lam6 = [=] (Index_type j, Index_type l, + Real_type &dot) { + POLYBENCH_3MM_BODY6_RAJA; + }; + auto poly_3mm_lam7 = [=] (Real_type &dot) { + POLYBENCH_3MM_BODY7_RAJA; + }; + auto poly_3mm_lam8 = [=] (Index_type i, Index_type l, Index_type j, + Real_type &dot) { + POLYBENCH_3MM_BODY8_RAJA; + }; + auto poly_3mm_lam9 = [=] (Index_type i, Index_type l, + Real_type &dot) { + POLYBENCH_3MM_BODY9_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Params<0>>, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1,2>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0,1>, RAJA::Params<0>> + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, ni}, + RAJA::RangeSegment{0, nj}, + RAJA::RangeSegment{0, nk}), + RAJA::tuple{0.0}, + + poly_3mm_lam1, + poly_3mm_lam2, + poly_3mm_lam3 + + ); + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, nj}, + RAJA::RangeSegment{0, nl}, + RAJA::RangeSegment{0, nm}), + RAJA::tuple{0.0}, + + poly_3mm_lam4, + poly_3mm_lam5, + poly_3mm_lam6 + + ); + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, ni}, + RAJA::RangeSegment{0, nl}, + RAJA::RangeSegment{0, nj}), + RAJA::tuple{0.0}, + + poly_3mm_lam7, + poly_3mm_lam8, + poly_3mm_lam9 + + ); + + } // end run_reps + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_3MM : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_ADI-StdPar.cpp b/src/polybench/POLYBENCH_ADI-StdPar.cpp new file mode 100644 index 000000000..6d2a99650 --- /dev/null +++ b/src/polybench/POLYBENCH_ADI-StdPar.cpp @@ -0,0 +1,236 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_ADI.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_ADI::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_ADI_DATA_SETUP; + + counting_iterator begin(1); + counting_iterator end(n-1); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 1; t <= tsteps; ++t) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + POLYBENCH_ADI_BODY2; + for (Index_type j = 1; j < n-1; ++j) { + POLYBENCH_ADI_BODY3; + } + POLYBENCH_ADI_BODY4; + for (Index_type k = n-2; k >= 1; --k) { + POLYBENCH_ADI_BODY5; + } + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + POLYBENCH_ADI_BODY6; + for (Index_type j = 1; j < n-1; ++j) { + POLYBENCH_ADI_BODY7; + } + POLYBENCH_ADI_BODY8; + for (Index_type k = n-2; k >= 1; --k) { + POLYBENCH_ADI_BODY9; + } + }); + + } // tstep loop + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_adi_base_lam2 = [=](Index_type i) { + POLYBENCH_ADI_BODY2; + }; + auto poly_adi_base_lam3 = [=](Index_type i, Index_type j) { + POLYBENCH_ADI_BODY3; + }; + auto poly_adi_base_lam4 = [=](Index_type i) { + POLYBENCH_ADI_BODY4; + }; + auto poly_adi_base_lam5 = [=](Index_type i, Index_type k) { + POLYBENCH_ADI_BODY5; + }; + auto poly_adi_base_lam6 = [=](Index_type i) { + POLYBENCH_ADI_BODY6; + }; + auto poly_adi_base_lam7 = [=](Index_type i, Index_type j) { + POLYBENCH_ADI_BODY7; + }; + auto poly_adi_base_lam8 = [=](Index_type i) { + POLYBENCH_ADI_BODY8; + }; + auto poly_adi_base_lam9 = [=](Index_type i, Index_type k) { + POLYBENCH_ADI_BODY9; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 1; t <= tsteps; ++t) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + poly_adi_base_lam2(i); + for (Index_type j = 1; j < n-1; ++j) { + poly_adi_base_lam3(i, j); + } + poly_adi_base_lam4(i); + for (Index_type k = n-2; k >= 1; --k) { + poly_adi_base_lam5(i, k); + } + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + poly_adi_base_lam6(i); + for (Index_type j = 1; j < n-1; ++j) { + poly_adi_base_lam7(i, j); + } + poly_adi_base_lam8(i); + for (Index_type k = n-2; k >= 1; --k) { + poly_adi_base_lam9(i, k); + } + }); + + } // tstep loop + + } // run_reps + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_ADI_VIEWS_RAJA; + + auto poly_adi_lam2 = [=](Index_type i) { + POLYBENCH_ADI_BODY2_RAJA; + }; + auto poly_adi_lam3 = [=](Index_type i, Index_type j) { + POLYBENCH_ADI_BODY3_RAJA; + }; + auto poly_adi_lam4 = [=](Index_type i) { + POLYBENCH_ADI_BODY4_RAJA; + }; + auto poly_adi_lam5 = [=](Index_type i, Index_type k) { + POLYBENCH_ADI_BODY5_RAJA; + }; + auto poly_adi_lam6 = [=](Index_type i) { + POLYBENCH_ADI_BODY6_RAJA; + }; + auto poly_adi_lam7 = [=](Index_type i, Index_type j) { + POLYBENCH_ADI_BODY7_RAJA; + }; + auto poly_adi_lam8 = [=](Index_type i) { + POLYBENCH_ADI_BODY8_RAJA; + }; + auto poly_adi_lam9 = [=](Index_type i, Index_type k) { + POLYBENCH_ADI_BODY9_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Segs<0>>, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0>>, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<3, RAJA::Segs<0,2>> + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 1; t <= tsteps; ++t) { + + RAJA::kernel( + RAJA::make_tuple(RAJA::RangeSegment{1, n-1}, + RAJA::RangeSegment{1, n-1}, + RAJA::RangeStrideSegment{n-2, 0, -1}), + + poly_adi_lam2, + poly_adi_lam3, + poly_adi_lam4, + poly_adi_lam5 + + ); + + RAJA::kernel( + RAJA::make_tuple(RAJA::RangeSegment{1, n-1}, + RAJA::RangeSegment{1, n-1}, + RAJA::RangeStrideSegment{n-2, 0, -1}), + + poly_adi_lam6, + poly_adi_lam7, + poly_adi_lam8, + poly_adi_lam9 + + ); + + } // tstep loop + + } // run_reps + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\nPOLYBENCH_ADI Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_ATAX-StdPar.cpp b/src/polybench/POLYBENCH_ATAX-StdPar.cpp new file mode 100644 index 000000000..1c3d1a3a9 --- /dev/null +++ b/src/polybench/POLYBENCH_ATAX-StdPar.cpp @@ -0,0 +1,213 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_ATAX.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_ATAX::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps= getRunReps(); + + POLYBENCH_ATAX_DATA_SETUP; + + counting_iterator begin(0); + counting_iterator end(N); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + POLYBENCH_ATAX_BODY1; + std::for_each( std::execution::unseq, + begin, end, + [=,&dot](Index_type j) { + POLYBENCH_ATAX_BODY2; + }); + POLYBENCH_ATAX_BODY3; + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type j) { + POLYBENCH_ATAX_BODY4; + std::for_each( std::execution::unseq, + begin, end, + [=,&dot](Index_type i) { + POLYBENCH_ATAX_BODY5; + }); + POLYBENCH_ATAX_BODY6; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_atax_base_lam2 = [=] (Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_ATAX_BODY2; + }; + auto poly_atax_base_lam3 = [=] (Index_type i, + Real_type &dot) { + POLYBENCH_ATAX_BODY3; + }; + auto poly_atax_base_lam5 = [=] (Index_type i, Index_type j , + Real_type &dot) { + POLYBENCH_ATAX_BODY5; + }; + auto poly_atax_base_lam6 = [=] (Index_type j, + Real_type &dot) { + POLYBENCH_ATAX_BODY6; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + POLYBENCH_ATAX_BODY1; + std::for_each( std::execution::unseq, + begin, end, + [=,&dot](Index_type j) { + poly_atax_base_lam2(i, j, dot); + }); + poly_atax_base_lam3(i, dot); + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type j) { + POLYBENCH_ATAX_BODY4; + std::for_each( std::execution::unseq, + begin, end, + [=,&dot](Index_type i) { + poly_atax_base_lam5(i, j, dot); + }); + poly_atax_base_lam6(j, dot); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_ATAX_VIEWS_RAJA; + + auto poly_atax_lam1 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_ATAX_BODY1_RAJA; + }; + auto poly_atax_lam2 = [=] (Index_type i, Index_type j, Real_type &dot) { + POLYBENCH_ATAX_BODY2_RAJA; + }; + auto poly_atax_lam3 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_ATAX_BODY3_RAJA; + }; + auto poly_atax_lam4 = [=] (Index_type j, Real_type &dot) { + POLYBENCH_ATAX_BODY4_RAJA; + }; + auto poly_atax_lam5 = [=] (Index_type i, Index_type j , Real_type &dot) { + POLYBENCH_ATAX_BODY5_RAJA; + }; + auto poly_atax_lam6 = [=] (Index_type j, Real_type &dot) { + POLYBENCH_ATAX_BODY6_RAJA; + }; + + using EXEC_POL1 = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Segs<0>, RAJA::Params<0>>, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> + > + >; + + using EXEC_POL2 = + RAJA::KernelPolicy< + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Segs<1>, RAJA::Params<0>>, + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<1>, RAJA::Params<0>> + > + >; + + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, N}, + RAJA::RangeSegment{0, N}), + RAJA::tuple{0.0}, + + poly_atax_lam1, + poly_atax_lam2, + poly_atax_lam3 + + ); + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, N}, + RAJA::RangeSegment{0, N}), + RAJA::tuple{0.0}, + + poly_atax_lam4, + poly_atax_lam5, + poly_atax_lam6 + + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_ATAX : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp new file mode 100644 index 000000000..5bd7435dd --- /dev/null +++ b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp @@ -0,0 +1,226 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_FDTD_2D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_FDTD_2D_DATA_SETUP; + + counting_iterator beginX(0); + counting_iterator endX(nx); + counting_iterator beginY(0); + counting_iterator endY(ny); + counting_iterator begin1X(1); + counting_iterator end1X(nx); + counting_iterator beginXm1(0); + counting_iterator endXm1(nx-1); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (t = 0; t < tsteps; ++t) { + + std::for_each( std::execution::par_unseq, + beginY, endY, + [=](Index_type j) { + POLYBENCH_FDTD_2D_BODY1; + }); + std::for_each( std::execution::par_unseq, + begin1X, end1X, + [=](Index_type i) { + for (Index_type j = 0; j < ny; j++) { + POLYBENCH_FDTD_2D_BODY2; + } + }); + std::for_each( std::execution::par_unseq, + beginX, endX, + [=](Index_type i) { + for (Index_type j = 1; j < ny; j++) { + POLYBENCH_FDTD_2D_BODY3; + } + }); + std::for_each( std::execution::par_unseq, + beginXm1, endXm1, + [=](Index_type i) { + for (Index_type j = 0; j < ny - 1; j++) { + POLYBENCH_FDTD_2D_BODY4; + } + }); + + } // tstep loop + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + // + // Note: first lambda must use capture by reference so that the + // scalar variable 't' used in it is updated for each + // t-loop iteration. + // + auto poly_fdtd2d_base_lam1 = [&](Index_type j) { + POLYBENCH_FDTD_2D_BODY1; + }; + auto poly_fdtd2d_base_lam2 = [=](Index_type i, Index_type j) { + POLYBENCH_FDTD_2D_BODY2; + }; + auto poly_fdtd2d_base_lam3 = [=](Index_type i, Index_type j) { + POLYBENCH_FDTD_2D_BODY3; + }; + auto poly_fdtd2d_base_lam4 = [=](Index_type i, Index_type j) { + POLYBENCH_FDTD_2D_BODY4; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (t = 0; t < tsteps; ++t) { + + std::for_each( //std::execution::par_unseq, + beginY, endY, + [=](Index_type j) { + poly_fdtd2d_base_lam1(j); + }); + std::for_each( //std::execution::par_unseq, + begin1X, end1X, + [=](Index_type i) { + for (Index_type j = 0; j < ny; j++) { + poly_fdtd2d_base_lam2(i, j); + } + }); + std::for_each( //std::execution::par_unseq, + beginX, endX, + [=](Index_type i) { + for (Index_type j = 1; j < ny; j++) { + poly_fdtd2d_base_lam3(i, j); + } + }); + std::for_each( //std::execution::par_unseq, + beginXm1, endXm1, + [=](Index_type i) { + for (Index_type j = 0; j < ny - 1; j++) { + poly_fdtd2d_base_lam4(i, j); + } + }); + + } // tstep loop + + } // run_reps + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_FDTD_2D_VIEWS_RAJA; + + // + // Note: first lambda must use capture by reference so that the + // scalar variable 't' used in it is updated for each + // t-loop iteration. + // + auto poly_fdtd2d_lam1 = [&](Index_type j) { + POLYBENCH_FDTD_2D_BODY1_RAJA; + }; + auto poly_fdtd2d_lam2 = [=](Index_type i, Index_type j) { + POLYBENCH_FDTD_2D_BODY2_RAJA; + }; + auto poly_fdtd2d_lam3 = [=](Index_type i, Index_type j) { + POLYBENCH_FDTD_2D_BODY3_RAJA; + }; + auto poly_fdtd2d_lam4 = [=](Index_type i, Index_type j) { + POLYBENCH_FDTD_2D_BODY4_RAJA; + }; + + using EXEC_POL1 = RAJA::loop_exec; + + using EXEC_POL234 = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<0> + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (t = 0; t < tsteps; ++t) { + + RAJA::forall( RAJA::RangeSegment(0, ny), + poly_fdtd2d_lam1 + ); + + RAJA::kernel( + RAJA::make_tuple(RAJA::RangeSegment{1, nx}, + RAJA::RangeSegment{0, ny}), + poly_fdtd2d_lam2 + ); + + RAJA::kernel( + RAJA::make_tuple(RAJA::RangeSegment{0, nx}, + RAJA::RangeSegment{1, ny}), + poly_fdtd2d_lam3 + ); + + RAJA::kernel( + RAJA::make_tuple(RAJA::RangeSegment{0, nx-1}, + RAJA::RangeSegment{0, ny-1}), + poly_fdtd2d_lam4 + ); + + } // tstep loop + + } // run_reps + stopTimer(); + + break; + } + +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\nPOLYBENCH_FDTD_2D Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp new file mode 100644 index 000000000..023b125d3 --- /dev/null +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp @@ -0,0 +1,158 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_FLOYD_WARSHALL.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +//#define USE_STDPAR_COLLAPSE 1 + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_FLOYD_WARSHALL::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_FLOYD_WARSHALL_DATA_SETUP; + +#ifdef USE_STDPAR_COLLAPSE + counting_iterator begin2(0); + counting_iterator end2(N*N); +#else + counting_iterator begin(0); + counting_iterator end(N); +#endif + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin2, end2, [=](Index_type ki) { + const auto k = ki / N; + const auto i = ki % N; +#else + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type k) { + std::for_each(begin, end, + [=](Index_type i) { +#endif + for (Index_type j = 0; j < N; ++j) { + POLYBENCH_FLOYD_WARSHALL_BODY; + } +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_floydwarshall_base_lam = [=](Index_type k, Index_type i, + Index_type j) { + POLYBENCH_FLOYD_WARSHALL_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin2, end2, [=](Index_type ki) { + const auto k = ki / N; + const auto i = ki % N; +#else + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type k) { + std::for_each(begin, end, + [=](Index_type i) { +#endif + for (Index_type j = 0; j < N; ++j) { + poly_floydwarshall_base_lam(k, i, j); + } +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_FLOYD_WARSHALL_VIEWS_RAJA; + + auto poly_floydwarshall_lam = [=](Index_type k, Index_type i, + Index_type j) { + POLYBENCH_FLOYD_WARSHALL_BODY_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<0> + > + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{0, N}, + RAJA::RangeSegment{0, N}, + RAJA::RangeSegment{0, N}), + poly_floydwarshall_lam + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_FLOYD_WARSHALL : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_GEMM-StdPar.cpp b/src/polybench/POLYBENCH_GEMM-StdPar.cpp new file mode 100644 index 000000000..1fd75528e --- /dev/null +++ b/src/polybench/POLYBENCH_GEMM-StdPar.cpp @@ -0,0 +1,192 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_GEMM.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +#define USE_STDPAR_COLLAPSE 1 + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_GEMM::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_GEMM_DATA_SETUP; + +#ifdef USE_STDPAR_COLLAPSE + counting_iterator beginIJ(0); + counting_iterator endIJ(ni*nj); +#else + counting_iterator beginI(0); + counting_iterator beginJ(0); + counting_iterator endJ(nj); + counting_iterator endI(ni); +#endif + counting_iterator beginK(0); + counting_iterator endK(nk); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginIJ, endIJ, [=](Index_type ij) { + const auto i = ij / nj; + const auto j = ij % nj; +#else + std::for_each( std::execution::par_unseq, + beginI, endI, [=](Index_type i) { + std::for_each(beginJ, endJ, [=](Index_type j) { +#endif + POLYBENCH_GEMM_BODY1; + POLYBENCH_GEMM_BODY2; + std::for_each(beginK, endK, [=,&dot](Index_type k) { + POLYBENCH_GEMM_BODY3; + }); + POLYBENCH_GEMM_BODY4; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_gemm_base_lam2 = [=](Index_type i, Index_type j) { + POLYBENCH_GEMM_BODY2; + }; + auto poly_gemm_base_lam3 = [=](Index_type i, Index_type j, Index_type k, + Real_type& dot) { + POLYBENCH_GEMM_BODY3; + }; + auto poly_gemm_base_lam4 = [=](Index_type i, Index_type j, + Real_type& dot) { + POLYBENCH_GEMM_BODY4; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginIJ, endIJ, [=](Index_type ij) { + const auto i = ij / nj; + const auto j = ij % nj; +#else + std::for_each( std::execution::par_unseq, + beginI, endI, [=](Index_type i) { + std::for_each(beginJ, endJ, [=](Index_type j) { +#endif + POLYBENCH_GEMM_BODY1; + poly_gemm_base_lam2(i, j); + std::for_each(beginK, endK, [=,&dot](Index_type k) { + poly_gemm_base_lam3(i, j, k, dot); + }); + poly_gemm_base_lam4(i, j, dot); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_GEMM_VIEWS_RAJA; + + auto poly_gemm_lam1 = [=](Real_type& dot) { + POLYBENCH_GEMM_BODY1_RAJA; + }; + auto poly_gemm_lam2 = [=](Index_type i, Index_type j) { + POLYBENCH_GEMM_BODY2_RAJA; + }; + auto poly_gemm_lam3 = [=](Index_type i, Index_type j, Index_type k, + Real_type& dot) { + POLYBENCH_GEMM_BODY3_RAJA; + }; + auto poly_gemm_lam4 = [=](Index_type i, Index_type j, + Real_type& dot) { + POLYBENCH_GEMM_BODY4_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Params<0>>, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>>, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<2, RAJA::Segs<0,1,2>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<3, RAJA::Segs<0,1>, RAJA::Params<0>> + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel_param( + + RAJA::make_tuple( RAJA::RangeSegment{0, ni}, + RAJA::RangeSegment{0, nj}, + RAJA::RangeSegment{0, nk} ), + RAJA::tuple{0.0}, // variable for dot + + poly_gemm_lam1, + poly_gemm_lam2, + poly_gemm_lam3, + poly_gemm_lam4 + + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_GEMM : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp new file mode 100644 index 000000000..2673abd45 --- /dev/null +++ b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp @@ -0,0 +1,255 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_GEMVER.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +#define USE_STDPAR_COLLAPSE 1 + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_GEMVER_DATA_SETUP; + + counting_iterator begin(0); + counting_iterator end(n); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type i) { + std::for_each(begin, end, [=](Index_type j) { + POLYBENCH_GEMVER_BODY1; + }); + }); + + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type i) { + POLYBENCH_GEMVER_BODY2; + std::for_each(begin, end, [=,&dot](Index_type j) { + POLYBENCH_GEMVER_BODY3; + }); + POLYBENCH_GEMVER_BODY4; + }); + + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type i) { + POLYBENCH_GEMVER_BODY5; + }); + + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type i) { + POLYBENCH_GEMVER_BODY6; + std::for_each(begin, end, [=,&dot](Index_type j) { + POLYBENCH_GEMVER_BODY7; + }); + POLYBENCH_GEMVER_BODY8; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_gemver_base_lam1 = [=](Index_type i, Index_type j) { + POLYBENCH_GEMVER_BODY1; + }; + auto poly_gemver_base_lam3 = [=](Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_GEMVER_BODY3; + }; + auto poly_gemver_base_lam4 = [=](Index_type i, Real_type &dot) { + POLYBENCH_GEMVER_BODY4; + }; + auto poly_gemver_base_lam5 = [=](Index_type i) { + POLYBENCH_GEMVER_BODY5; + }; + auto poly_gemver_base_lam7 = [=](Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_GEMVER_BODY7; + }; + auto poly_gemver_base_lam8 = [=](Index_type i, Real_type &dot) { + POLYBENCH_GEMVER_BODY8; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type i) { + std::for_each(begin, end, [=](Index_type j) { + poly_gemver_base_lam1(i, j); + }); + }); + + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type i) { + POLYBENCH_GEMVER_BODY2; + std::for_each(begin, end, [=,&dot](Index_type j) { + poly_gemver_base_lam3(i, j, dot); + }); + poly_gemver_base_lam4(i, dot); + }); + + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type i) { + poly_gemver_base_lam5(i); + }); + + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type i) { + POLYBENCH_GEMVER_BODY6; + std::for_each(begin, end, [=,&dot](Index_type j) { + poly_gemver_base_lam7(i, j, dot); + }); + poly_gemver_base_lam8(i, dot); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_GEMVER_VIEWS_RAJA; + + auto poly_gemver_lam1 = [=] (Index_type i, Index_type j) { + POLYBENCH_GEMVER_BODY1_RAJA; + }; + auto poly_gemver_lam2 = [=] (Real_type &dot) { + POLYBENCH_GEMVER_BODY2_RAJA; + }; + auto poly_gemver_lam3 = [=] (Index_type i, Index_type j, Real_type &dot) { + POLYBENCH_GEMVER_BODY3_RAJA; + }; + auto poly_gemver_lam4 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_GEMVER_BODY4_RAJA; + }; + auto poly_gemver_lam5 = [=] (Index_type i) { + POLYBENCH_GEMVER_BODY5_RAJA; + }; + auto poly_gemver_lam6 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_GEMVER_BODY6_RAJA; + }; + auto poly_gemver_lam7 = [=] (Index_type i, Index_type j, Real_type &dot) { + POLYBENCH_GEMVER_BODY7_RAJA; + }; + auto poly_gemver_lam8 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_GEMVER_BODY8_RAJA; + }; + + using EXEC_POL1 = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Segs<0,1>> + > + > + >; + + using EXEC_POL2 = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Params<0>>, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> + > + >; + + using EXEC_POL3 = RAJA::loop_exec; + + using EXEC_POL4 = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Segs<0>, RAJA::Params<0>>, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{0, n}, + RAJA::RangeSegment{0, n}), + poly_gemver_lam1 + ); + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, n}, + RAJA::RangeSegment{0, n}), + RAJA::tuple{0.0}, + + poly_gemver_lam2, + poly_gemver_lam3, + poly_gemver_lam4 + ); + + RAJA::forall (RAJA::RangeSegment{0, n}, + poly_gemver_lam5 + ); + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, n}, + RAJA::RangeSegment{0, n}), + RAJA::tuple{0.0}, + + poly_gemver_lam6, + poly_gemver_lam7, + poly_gemver_lam8 + + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_GEMVER : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp new file mode 100644 index 000000000..070e56c18 --- /dev/null +++ b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp @@ -0,0 +1,145 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_GESUMMV.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_GESUMMV::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps= getRunReps(); + + POLYBENCH_GESUMMV_DATA_SETUP; + + counting_iterator begin(0); + counting_iterator end(N); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type i) { + POLYBENCH_GESUMMV_BODY1; + std::for_each(begin, end, [=,&tmpdot,&ydot](Index_type j) { + POLYBENCH_GESUMMV_BODY2; + }); + POLYBENCH_GESUMMV_BODY3; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_gesummv_base_lam2 = [=](Index_type i, Index_type j, + Real_type& tmpdot, Real_type& ydot) { + POLYBENCH_GESUMMV_BODY2; + }; + auto poly_gesummv_base_lam3 = [=](Index_type i, + Real_type& tmpdot, Real_type& ydot) { + POLYBENCH_GESUMMV_BODY3; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type i) { + POLYBENCH_GESUMMV_BODY1; + std::for_each(begin, end, [=,&tmpdot,&ydot](Index_type j) { + poly_gesummv_base_lam2(i, j, tmpdot, ydot); + }); + poly_gesummv_base_lam3(i, tmpdot, ydot); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_GESUMMV_VIEWS_RAJA; + + auto poly_gesummv_lam1 = [=](Real_type& tmpdot, Real_type& ydot) { + POLYBENCH_GESUMMV_BODY1_RAJA; + }; + auto poly_gesummv_lam2 = [=](Index_type i, Index_type j, + Real_type& tmpdot, Real_type& ydot) { + POLYBENCH_GESUMMV_BODY2_RAJA; + }; + auto poly_gesummv_lam3 = [=](Index_type i, + Real_type& tmpdot, Real_type& ydot) { + POLYBENCH_GESUMMV_BODY3_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Params<0,1>>, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0, 1>, RAJA::Params<0,1>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0,1>> + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel_param( + RAJA::make_tuple( RAJA::RangeSegment{0, N}, + RAJA::RangeSegment{0, N} ), + RAJA::make_tuple(static_cast(0.0), + static_cast(0.0)), + + poly_gesummv_lam1, + poly_gesummv_lam2, + poly_gesummv_lam3 + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_GESUMMV : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp new file mode 100644 index 000000000..d18a359f9 --- /dev/null +++ b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp @@ -0,0 +1,188 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_HEAT_3D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_HEAT_3D_DATA_SETUP; + + counting_iterator begin(1); + counting_iterator end(N-1); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type j) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type k) { + POLYBENCH_HEAT_3D_BODY1; + }); + }); + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type j) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type k) { + POLYBENCH_HEAT_3D_BODY2; + }); + }); + }); + + } + + } + stopTimer(); + + POLYBENCH_HEAT_3D_DATA_RESET; + + break; + } + + case Lambda_StdPar : { + + auto poly_heat3d_base_lam1 = [=](Index_type i, Index_type j, + Index_type k) { + POLYBENCH_HEAT_3D_BODY1; + }; + auto poly_heat3d_base_lam2 = [=](Index_type i, Index_type j, + Index_type k) { + POLYBENCH_HEAT_3D_BODY2; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + for (Index_type i = 1; i < N-1; ++i ) { + for (Index_type j = 1; j < N-1; ++j ) { + for (Index_type k = 1; k < N-1; ++k ) { + poly_heat3d_base_lam1(i, j, k); + } + } + } + + for (Index_type i = 1; i < N-1; ++i ) { + for (Index_type j = 1; j < N-1; ++j ) { + for (Index_type k = 1; k < N-1; ++k ) { + poly_heat3d_base_lam2(i, j, k); + } + } + } + + } + + } + stopTimer(); + + POLYBENCH_HEAT_3D_DATA_RESET; + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_HEAT_3D_VIEWS_RAJA; + + auto poly_heat3d_lam1 = [=](Index_type i, Index_type j, Index_type k) { + POLYBENCH_HEAT_3D_BODY1_RAJA; + }; + auto poly_heat3d_lam2 = [=](Index_type i, Index_type j, Index_type k) { + POLYBENCH_HEAT_3D_BODY2_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<0> + > + > + >, + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<1> + > + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{1, N-1}, + RAJA::RangeSegment{1, N-1}, + RAJA::RangeSegment{1, N-1}), + + poly_heat3d_lam1, + poly_heat3d_lam2 + ); + + } + + } + stopTimer(); + + POLYBENCH_HEAT_3D_DATA_RESET; + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_HEAT_3D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp new file mode 100644 index 000000000..1b1ce72f2 --- /dev/null +++ b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp @@ -0,0 +1,137 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_JACOBI_1D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_JACOBI_1D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps= getRunReps(); + + POLYBENCH_JACOBI_1D_DATA_SETUP; + + counting_iterator begin(1); + counting_iterator end(N-1); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + POLYBENCH_JACOBI_1D_BODY1; + }); + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + POLYBENCH_JACOBI_1D_BODY2; + }); + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_1D_DATA_RESET; + + break; + } + + case Lambda_StdPar : { + + auto poly_jacobi1d_lam1 = [=] (Index_type i) { + POLYBENCH_JACOBI_1D_BODY1; + }; + auto poly_jacobi1d_lam2 = [=] (Index_type i) { + POLYBENCH_JACOBI_1D_BODY2; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + poly_jacobi1d_lam1(i); + }); + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + poly_jacobi1d_lam2(i); + }); + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_1D_DATA_RESET; + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + RAJA::forall ( RAJA::RangeSegment{1, N-1}, + poly_jacobi1d_lam1 + ); + + RAJA::forall ( RAJA::RangeSegment{1, N-1}, + poly_jacobi1d_lam2 + ); + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_1D_DATA_RESET; + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_JACOBI_1D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp new file mode 100644 index 000000000..41cd58b2b --- /dev/null +++ b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp @@ -0,0 +1,176 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_JACOBI_2D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps= getRunReps(); + + POLYBENCH_JACOBI_2D_DATA_SETUP; + + counting_iterator begin(1); + counting_iterator end(N-1); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type j) { + POLYBENCH_JACOBI_2D_BODY1; + }); + }); + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type j) { + POLYBENCH_JACOBI_2D_BODY2; + }); + }); + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_2D_DATA_RESET; + + break; + } + + case Lambda_StdPar : { + + auto poly_jacobi2d_base_lam1 = [=](Index_type i, Index_type j) { + POLYBENCH_JACOBI_2D_BODY1; + }; + auto poly_jacobi2d_base_lam2 = [=](Index_type i, Index_type j) { + POLYBENCH_JACOBI_2D_BODY2; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type j) { + poly_jacobi2d_base_lam1(i, j); + }); + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type j) { + poly_jacobi2d_base_lam2(i, j); + }); + }); + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_2D_DATA_RESET; + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_JACOBI_2D_VIEWS_RAJA; + + auto poly_jacobi2d_lam1 = [=](Index_type i, Index_type j) { + POLYBENCH_JACOBI_2D_BODY1_RAJA; + }; + auto poly_jacobi2d_lam2 = [=](Index_type i, Index_type j) { + POLYBENCH_JACOBI_2D_BODY2_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<0> + > + >, + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<1> + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{1, N-1}, + RAJA::RangeSegment{1, N-1}), + + poly_jacobi2d_lam1, + poly_jacobi2d_lam2 + ); + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_2D_DATA_RESET; + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_JACOBI_2D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_MVT-StdPar.cpp b/src/polybench/POLYBENCH_MVT-StdPar.cpp new file mode 100644 index 000000000..7adc162de --- /dev/null +++ b/src/polybench/POLYBENCH_MVT-StdPar.cpp @@ -0,0 +1,204 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_MVT.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_MVT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps= getRunReps(); + + POLYBENCH_MVT_DATA_SETUP; + + counting_iterator begin(0); + counting_iterator end(N); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + POLYBENCH_MVT_BODY1; + std::for_each( std::execution::unseq, + begin, end, + [=,&dot](Index_type j) { + POLYBENCH_MVT_BODY2; + }); + POLYBENCH_MVT_BODY3; + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + POLYBENCH_MVT_BODY4; + std::for_each( std::execution::unseq, + begin, end, + [=,&dot](Index_type j) { + POLYBENCH_MVT_BODY5; + }); + POLYBENCH_MVT_BODY6; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_mvt_base_lam2 = [=] (Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_MVT_BODY2; + }; + auto poly_mvt_base_lam3 = [=] (Index_type i, + Real_type &dot) { + POLYBENCH_MVT_BODY3; + }; + auto poly_mvt_base_lam5 = [=] (Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_MVT_BODY5; + }; + auto poly_mvt_base_lam6 = [=] (Index_type i, + Real_type &dot) { + POLYBENCH_MVT_BODY6; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + POLYBENCH_MVT_BODY1; + std::for_each( std::execution::unseq, + begin, end, + [=,&dot](Index_type j) { + poly_mvt_base_lam2(i, j, dot); + }); + poly_mvt_base_lam3(i, dot); + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + POLYBENCH_MVT_BODY4; + std::for_each( std::execution::unseq, + begin, end, + [=,&dot](Index_type j) { + poly_mvt_base_lam5(i, j, dot); + }); + poly_mvt_base_lam6(i, dot); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_MVT_VIEWS_RAJA; + + auto poly_mvt_lam1 = [=] (Real_type &dot) { + POLYBENCH_MVT_BODY1_RAJA; + }; + auto poly_mvt_lam2 = [=] (Index_type i, Index_type j, Real_type &dot) { + POLYBENCH_MVT_BODY2_RAJA; + }; + auto poly_mvt_lam3 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_MVT_BODY3_RAJA; + }; + auto poly_mvt_lam4 = [=] (Real_type &dot) { + POLYBENCH_MVT_BODY4_RAJA; + }; + auto poly_mvt_lam5 = [=] (Index_type i, Index_type j, Real_type &dot) { + POLYBENCH_MVT_BODY5_RAJA; + }; + auto poly_mvt_lam6 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_MVT_BODY6_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Params<0>>, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::region( [=]() { + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, N}, + RAJA::RangeSegment{0, N}), + RAJA::tuple{0.0}, + + poly_mvt_lam1, + poly_mvt_lam2, + poly_mvt_lam3 + + ); + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, N}, + RAJA::RangeSegment{0, N}), + RAJA::tuple{0.0}, + + poly_mvt_lam4, + poly_mvt_lam5, + poly_mvt_lam6 + + ); + + }); // end sequential region (for single-source code) + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_MVT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/stream/ADD-StdPar.cpp b/src/stream/ADD-StdPar.cpp new file mode 100644 index 000000000..bde010541 --- /dev/null +++ b/src/stream/ADD-StdPar.cpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "ADD.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace stream +{ + + +void ADD::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + ADD_DATA_SETUP; + + auto add_lam = [=](Index_type i) { + ADD_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin,end, + [=](Index_type i) { + ADD_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin,end, + [=](Index_type i) { + add_lam(i); + }); + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), add_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n ADD : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace stream +} // end namespace rajaperf diff --git a/src/stream/COPY-StdPar.cpp b/src/stream/COPY-StdPar.cpp new file mode 100644 index 000000000..1fc757e22 --- /dev/null +++ b/src/stream/COPY-StdPar.cpp @@ -0,0 +1,88 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "COPY.hpp" + +#include "RAJA/RAJA.hpp" + +#include +#include + +#include + +namespace rajaperf +{ +namespace stream +{ + + +void COPY::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + COPY_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::copy( std::execution::par_unseq, + &a[ibegin], &a[iend], &c[ibegin]); + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::copy( std::execution::par_unseq, + &a[ibegin], &a[iend], &c[ibegin]); + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), copy_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n COPY : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace stream +} // end namespace rajaperf diff --git a/src/stream/DOT-StdPar.cpp b/src/stream/DOT-StdPar.cpp new file mode 100644 index 000000000..cf22a9e35 --- /dev/null +++ b/src/stream/DOT-StdPar.cpp @@ -0,0 +1,116 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DOT.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include +#include + +#include + +namespace rajaperf +{ +namespace stream +{ + + +void DOT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + DOT_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type dot = m_dot_init; + + dot += std::transform_reduce( std::execution::par_unseq, + &a[ibegin], &a[iend], &b[ibegin], + (Real_type)0); + + m_dot += dot; + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto dot_base_lam = [=](Index_type i) -> Real_type { + return a[i] * b[i]; + }; + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type dot = m_dot_init; + + dot += std::transform_reduce( std::execution::par_unseq, + begin,end, + (Real_type)0, + std::plus(), + dot_base_lam); + + m_dot += dot; + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::ReduceSum dot(m_dot_init); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { + DOT_BODY; + }); + + m_dot += static_cast(dot.get()); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n DOT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace stream +} // end namespace rajaperf diff --git a/src/stream/MUL-StdPar.cpp b/src/stream/MUL-StdPar.cpp new file mode 100644 index 000000000..6cf9f418f --- /dev/null +++ b/src/stream/MUL-StdPar.cpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MUL.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace stream +{ + + +void MUL::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + MUL_DATA_SETUP; + + auto mul_lam = [=](Index_type i) { + MUL_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin,end, + [=](Index_type i) { + MUL_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin,end, + [=](Index_type i) { + mul_lam(i); + }); + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), mul_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n MUL : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace stream +} // end namespace rajaperf diff --git a/src/stream/TRIAD-StdPar.cpp b/src/stream/TRIAD-StdPar.cpp new file mode 100644 index 000000000..484d79cc3 --- /dev/null +++ b/src/stream/TRIAD-StdPar.cpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "TRIAD.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace stream +{ + + +void TRIAD::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + TRIAD_DATA_SETUP; + + auto triad_lam = [=](Index_type i) { + TRIAD_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin,end, + [=](Index_type i) { + TRIAD_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin,end, + [=](Index_type i) { + triad_lam(i); + }); + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), triad_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n TRIAD : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace stream +} // end namespace rajaperf From 9cf1e4b70410d786c73837934ca7e63c9d41c602 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 7 Jul 2022 17:26:19 +0300 Subject: [PATCH 003/174] starting over with StdPar because git submodules are trash --- src/common/StdParUtils.hpp | 108 +++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 src/common/StdParUtils.hpp diff --git a/src/common/StdParUtils.hpp b/src/common/StdParUtils.hpp new file mode 100644 index 000000000..f765f517d --- /dev/null +++ b/src/common/StdParUtils.hpp @@ -0,0 +1,108 @@ +/* +Copyright (c) 2021, NVIDIA +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +// This implementation was authored by David Olsen + +#include + +template +struct counting_iterator { + +private: + typedef counting_iterator self; + +public: + typedef T value_type; + typedef typename std::make_signed::type difference_type; + typedef T const* pointer; + typedef T const& reference; + typedef std::random_access_iterator_tag iterator_category; + + explicit counting_iterator(value_type v) : value(v) { } + + value_type operator*() const { return value; } + value_type operator[](difference_type n) const { return value + n; } + + self& operator++() { ++value; return *this; } + self operator++(int) { + self result{value}; + ++value; + return result; + } + self& operator--() { --value; return *this; } + self operator--(int) { + self result{value}; + --value; + return result; + } + self& operator+=(difference_type n) { value += n; return *this; } + self& operator-=(difference_type n) { value -= n; return *this; } + + friend self operator+(self const& i, difference_type n) { + return self(i.value + n); + } + friend self operator+(difference_type n, self const& i) { + return self(i.value + n); + } + friend difference_type operator-(self const& x, self const& y) { + return x.value - y.value; + } + friend self operator-(self const& i, difference_type n) { + return self(i.value - n); + } + + friend bool operator==(self const& x, self const& y) { + return x.value == y.value; + } + friend bool operator!=(self const& x, self const& y) { + return x.value != y.value; + } + friend bool operator<(self const& x, self const& y) { + return x.value < y.value; + } + friend bool operator<=(self const& x, self const& y) { + return x.value <= y.value; + } + friend bool operator>(self const& x, self const& y) { + return x.value > y.value; + } + friend bool operator>=(self const& x, self const& y) { + return x.value >= y.value; + } +private: + value_type value; +}; + +template ::value>::type> +inline counting_iterator make_counter(T value) { + return counting_iterator{value}; +} + From 6bcc4454a5197f5b652bf52662c585a66f65b5c0 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 11:18:53 +0300 Subject: [PATCH 004/174] fix MEM*** --- src/algorithm/CMakeLists.txt | 2 ++ src/algorithm/MEMCPY.cpp | 4 ++++ src/algorithm/MEMCPY.hpp | 3 +++ src/algorithm/MEMSET.cpp | 4 ++++ src/algorithm/MEMSET.hpp | 3 +++ 5 files changed, 16 insertions(+) diff --git a/src/algorithm/CMakeLists.txt b/src/algorithm/CMakeLists.txt index 7c0fcd39f..03d6069ba 100644 --- a/src/algorithm/CMakeLists.txt +++ b/src/algorithm/CMakeLists.txt @@ -33,12 +33,14 @@ blt_add_library( REDUCE_SUM-OMPTarget.cpp MEMSET.cpp MEMSET-Seq.cpp + MEMSET-StdPar.cpp MEMSET-Hip.cpp MEMSET-Cuda.cpp MEMSET-OMP.cpp MEMSET-OMPTarget.cpp MEMCPY.cpp MEMCPY-Seq.cpp + MEMCPY-StdPar.cpp MEMCPY-Hip.cpp MEMCPY-Cuda.cpp MEMCPY-OMP.cpp diff --git a/src/algorithm/MEMCPY.cpp b/src/algorithm/MEMCPY.cpp index 1447fc4f8..80c7f4f62 100644 --- a/src/algorithm/MEMCPY.cpp +++ b/src/algorithm/MEMCPY.cpp @@ -51,6 +51,10 @@ MEMCPY::MEMCPY(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } MEMCPY::~MEMCPY() diff --git a/src/algorithm/MEMCPY.hpp b/src/algorithm/MEMCPY.hpp index 2477115ce..117bd8a6d 100644 --- a/src/algorithm/MEMCPY.hpp +++ b/src/algorithm/MEMCPY.hpp @@ -57,10 +57,13 @@ class MEMCPY : public KernelBase void runStdParVariant(VariantID vid, size_t tune_idx); void setSeqTuningDefinitions(VariantID vid); + void setStdParTuningDefinitions(VariantID vid); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); void runSeqVariantDefault(VariantID vid); void runSeqVariantLibrary(VariantID vid); + void runStdParVariantDefault(VariantID vid); + void runStdParVariantLibrary(VariantID vid); template < size_t block_size > void runCudaVariantBlock(VariantID vid); diff --git a/src/algorithm/MEMSET.cpp b/src/algorithm/MEMSET.cpp index 98152d917..3cf345bd6 100644 --- a/src/algorithm/MEMSET.cpp +++ b/src/algorithm/MEMSET.cpp @@ -52,6 +52,10 @@ MEMSET::MEMSET(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } MEMSET::~MEMSET() diff --git a/src/algorithm/MEMSET.hpp b/src/algorithm/MEMSET.hpp index 0e9630fa7..0cf2a75bf 100644 --- a/src/algorithm/MEMSET.hpp +++ b/src/algorithm/MEMSET.hpp @@ -57,10 +57,13 @@ class MEMSET : public KernelBase void runStdParVariant(VariantID vid, size_t tune_idx); void setSeqTuningDefinitions(VariantID vid); + void setStdParTuningDefinitions(VariantID vid); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); void runSeqVariantDefault(VariantID vid); void runSeqVariantLibrary(VariantID vid); + void runStdParVariantDefault(VariantID vid); + void runStdParVariantLibrary(VariantID vid); template < size_t block_size > void runCudaVariantBlock(VariantID vid); From bb16c99e9a77b39f2eb9097fa0a69d769d83a965 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 11:38:29 +0300 Subject: [PATCH 005/174] fix MEM*** more --- src/algorithm/MEMCPY-OMP.cpp | 4 ++-- src/algorithm/MEMCPY-Seq.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/algorithm/MEMCPY-OMP.cpp b/src/algorithm/MEMCPY-OMP.cpp index ba271577b..6847ddaac 100644 --- a/src/algorithm/MEMCPY-OMP.cpp +++ b/src/algorithm/MEMCPY-OMP.cpp @@ -48,7 +48,7 @@ void MEMCPY::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx case Lambda_OpenMP : { - auto memset_lambda = [=](Index_type i) { + auto memcpy_lambda = [=](Index_type i) { MEMCPY_BODY; }; @@ -57,7 +57,7 @@ void MEMCPY::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx #pragma omp parallel for for (Index_type i = ibegin; i < iend; ++i ) { - memset_lambda(i); + memcpy_lambda(i); } } diff --git a/src/algorithm/MEMCPY-Seq.cpp b/src/algorithm/MEMCPY-Seq.cpp index 99d360d8d..c7994cfff 100644 --- a/src/algorithm/MEMCPY-Seq.cpp +++ b/src/algorithm/MEMCPY-Seq.cpp @@ -94,7 +94,7 @@ void MEMCPY::runSeqVariantDefault(VariantID vid) #if defined(RUN_RAJA_SEQ) case Lambda_Seq : { - auto memset_lambda = [=](Index_type i) { + auto memcpy_lambda = [=](Index_type i) { MEMCPY_BODY; }; @@ -102,7 +102,7 @@ void MEMCPY::runSeqVariantDefault(VariantID vid) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { for (Index_type i = ibegin; i < iend; ++i ) { - memset_lambda(i); + memcpy_lambda(i); } } From 7f784d15792d3c74becaac0b6e169a42d710ab73 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 11:59:28 +0300 Subject: [PATCH 006/174] add exec and alg to stdpar helper header --- src/common/StdParUtils.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/common/StdParUtils.hpp b/src/common/StdParUtils.hpp index f765f517d..ab2e49f46 100644 --- a/src/common/StdParUtils.hpp +++ b/src/common/StdParUtils.hpp @@ -30,6 +30,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // This implementation was authored by David Olsen +#include +#include #include template From 41d9cd29e9d68728f9d5903592182766bbf324bb Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 12:00:54 +0300 Subject: [PATCH 007/174] more stdpar --- src/algorithm/MEMCPY-StdPar.cpp | 193 ++++++++++++++++++ src/algorithm/MEMSET-StdPar.cpp | 193 ++++++++++++++++++ src/apps/CMakeLists.txt | 1 + src/apps/CONVECTION3DPA-StdPar.cpp | 316 +++++++++++++++++++++++++++++ src/apps/CONVECTION3DPA.cpp | 1 + src/basic/CMakeLists.txt | 1 + src/basic/DAXPY_ATOMIC-StdPar.cpp | 93 +++++++++ 7 files changed, 798 insertions(+) create mode 100644 src/algorithm/MEMCPY-StdPar.cpp create mode 100644 src/algorithm/MEMSET-StdPar.cpp create mode 100644 src/apps/CONVECTION3DPA-StdPar.cpp create mode 100644 src/basic/DAXPY_ATOMIC-StdPar.cpp diff --git a/src/algorithm/MEMCPY-StdPar.cpp b/src/algorithm/MEMCPY-StdPar.cpp new file mode 100644 index 000000000..ff8e66c08 --- /dev/null +++ b/src/algorithm/MEMCPY-StdPar.cpp @@ -0,0 +1,193 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MEMCPY.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void MEMCPY::runStdParVariantLibrary(VariantID vid) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMCPY_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::copy_n(std::execution::par_unseq, + x+ibegin, iend-ibegin, y+ibegin); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + camp::resources::Host res = camp::resources::Host::get_default(); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + res.memcpy(MEMCPY_STD_ARGS); + + } + stopTimer(); + + break; + } +#endif + + default : { + getCout() << "\n MEMCPY : Unknown variant id = " << vid << std::endl; + } + + } +#endif +} + +void MEMCPY::runStdParVariantDefault(VariantID vid) +{ +#if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMCPY_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin,end, + [=](Index_type i) { + MEMCPY_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto memcpy_lambda = [=](Index_type i) { + MEMCPY_BODY; + }; + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin,end, + [=](Index_type i) { + memcpy_lambda(i); + }); + + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + MEMCPY_BODY; + }); + + } + stopTimer(); + + break; + } +#endif + + default : { + getCout() << "\n MEMCPY : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +void MEMCPY::runStdParVariant(VariantID vid, size_t tune_idx) +{ + size_t t = 0; + + if (vid == Base_StdPar || vid == RAJA_StdPar) { + + if (tune_idx == t) { + + runStdParVariantLibrary(vid); + + } + + t += 1; + + } + + if (tune_idx == t) { + + runStdParVariantDefault(vid); + + } + + t += 1; +} + +void MEMCPY::setStdParTuningDefinitions(VariantID vid) +{ + if (vid == Base_StdPar || vid == RAJA_StdPar) { + addVariantTuningName(vid, "library"); + } + + addVariantTuningName(vid, "default"); +} + +} // end namespace algorithm +} // end namespace rajaperf diff --git a/src/algorithm/MEMSET-StdPar.cpp b/src/algorithm/MEMSET-StdPar.cpp new file mode 100644 index 000000000..73a61d5cf --- /dev/null +++ b/src/algorithm/MEMSET-StdPar.cpp @@ -0,0 +1,193 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MEMSET.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void MEMSET::runStdParVariantLibrary(VariantID vid) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMSET_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::fill_n(std::execution::par_unseq, + x+ibegin, iend-ibegin, val); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + camp::resources::Host res = camp::resources::Host::get_default(); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + res.memset(MEMSET_STD_ARGS); + + } + stopTimer(); + + break; + } +#endif + + default : { + getCout() << "\n MEMSET : Unknown variant id = " << vid << std::endl; + } + + } +#endif +} + +void MEMSET::runStdParVariantDefault(VariantID vid) +{ +#if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMSET_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin,end, + [=](Index_type i) { + MEMSET_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto memset_lambda = [=](Index_type i) { + MEMSET_BODY; + }; + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin,end, + [=](Index_type i) { + memset_lambda(i); + }); + + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + MEMSET_BODY; + }); + + } + stopTimer(); + + break; + } +#endif + + default : { + getCout() << "\n MEMSET : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +void MEMSET::runStdParVariant(VariantID vid, size_t tune_idx) +{ + size_t t = 0; + + if (vid == Base_StdPar || vid == RAJA_StdPar) { + + if (tune_idx == t) { + + runStdParVariantLibrary(vid); + + } + + t += 1; + + } + + if (tune_idx == t) { + + runStdParVariantDefault(vid); + + } + + t += 1; +} + +void MEMSET::setStdParTuningDefinitions(VariantID vid) +{ + if (vid == Base_StdPar || vid == RAJA_StdPar) { + addVariantTuningName(vid, "library"); + } + + addVariantTuningName(vid, "default"); +} + +} // end namespace algorithm +} // end namespace rajaperf diff --git a/src/apps/CMakeLists.txt b/src/apps/CMakeLists.txt index e79db7717..dbe74ba13 100644 --- a/src/apps/CMakeLists.txt +++ b/src/apps/CMakeLists.txt @@ -13,6 +13,7 @@ blt_add_library( CONVECTION3DPA-Cuda.cpp CONVECTION3DPA-Hip.cpp CONVECTION3DPA-Seq.cpp + CONVECTION3DPA-StdPar.cpp CONVECTION3DPA-OMP.cpp CONVECTION3DPA-OMPTarget.cpp DEL_DOT_VEC_2D.cpp diff --git a/src/apps/CONVECTION3DPA-StdPar.cpp b/src/apps/CONVECTION3DPA-StdPar.cpp new file mode 100644 index 000000000..1bc58d4f1 --- /dev/null +++ b/src/apps/CONVECTION3DPA-StdPar.cpp @@ -0,0 +1,316 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "CONVECTION3DPA.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf { +namespace apps { + +void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + + CONVECTION3DPA_DATA_SETUP; + + switch (vid) { + + case Base_StdPar: { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (int e = 0; e < NE; ++e) { + + CONVECTION3DPA_0_CPU; + + CPU_FOREACH(dz,z,CPA_D1D) + { + CPU_FOREACH(dy,y,CPA_D1D) + { + CPU_FOREACH(dx,x,CPA_D1D) + { + CONVECTION3DPA_1; + } + } + } + + CPU_FOREACH(dz,z,CPA_D1D) + { + CPU_FOREACH(dy,y,CPA_D1D) + { + CPU_FOREACH(qx,x,CPA_Q1D) + { + CONVECTION3DPA_2; + } + } + } + + CPU_FOREACH(dz,z,CPA_D1D) + { + CPU_FOREACH(qx,x,CPA_Q1D) + { + CPU_FOREACH(qy,y,CPA_Q1D) + { + CONVECTION3DPA_3; + } + } + } + + CPU_FOREACH(qx,x,CPA_Q1D) + { + CPU_FOREACH(qy,y,CPA_Q1D) + { + CPU_FOREACH(qz,z,CPA_Q1D) + { + CONVECTION3DPA_4; + } + } + } + + CPU_FOREACH(qz,z,CPA_Q1D) + { + CPU_FOREACH(qy,y,CPA_Q1D) + { + CPU_FOREACH(qx,x,CPA_Q1D) + { + CONVECTION3DPA_5; + } + } + } + + CPU_FOREACH(qx,x,CPA_Q1D) + { + CPU_FOREACH(qy,y,CPA_Q1D) + { + CPU_FOREACH(dz,z,CPA_D1D) + { + CONVECTION3DPA_6; + } + } + } + + CPU_FOREACH(dz,z,CPA_D1D) + { + CPU_FOREACH(qx,x,CPA_Q1D) + { + CPU_FOREACH(dy,y,CPA_D1D) + { + CONVECTION3DPA_7; + } + } + } + + CPU_FOREACH(dz,z,CPA_D1D) + { + CPU_FOREACH(dy,y,CPA_D1D) + { + CPU_FOREACH(dx,x,CPA_D1D) + { + CONVECTION3DPA_8; + } + } + } + } // element loop + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_SEQ) + case RAJA_StdPar: { + + using launch_policy = RAJA::expt::LaunchPolicy; + + using outer_x = RAJA::expt::LoopPolicy; + + using inner_x = RAJA::expt::LoopPolicy; + + using inner_y = RAJA::expt::LoopPolicy; + + using inner_z = RAJA::expt::LoopPolicy; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // Grid is empty as the host does not need a compute grid to be specified + RAJA::expt::launch( + RAJA::expt::Grid(), + [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, NE), + [&](int e) { + + CONVECTION3DPA_0_CPU; + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dx) { + + CONVECTION3DPA_1; + + } // lambda (dx) + ); // RAJA::expt::loop + } // lambda (dy) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + + CONVECTION3DPA_2; + + } // lambda (dx) + ); // RAJA::expt::loop + } // lambda (dy) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + + CONVECTION3DPA_3; + + } // lambda (dy) + ); // RAJA::expt::loop + } // lambda (dx) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qz) { + + CONVECTION3DPA_4; + + } // lambda (qz) + ); // RAJA::expt::loop + } // lambda (qy) + ); //RAJA::expt::loop + } // lambda (qx) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + + CONVECTION3DPA_5; + + } // lambda (qx) + ); // RAJA::expt::loop + } // lambda (qy) + ); //RAJA::expt::loop + } // lambda (qz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + + CONVECTION3DPA_6; + + } // lambda (dz) + ); // RAJA::expt::loop + } // lambda (qy) + ); //RAJA::expt::loop + } // lambda (qx) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + + CONVECTION3DPA_7; + + } // lambda (dy) + ); // RAJA::expt::loop + } // lambda (qx) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dx) { + + CONVECTION3DPA_8; + + } // lambda (dx) + ); // RAJA::expt::loop + } // lambda (dy) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + } // lambda (e) + ); // RAJA::expt::loop + + } // outer lambda (ctx) + ); // RAJA::expt::launch + } // loop over kernel reps + stopTimer(); + + return; + } +#endif // RUN_RAJA_SEQ + + default: + getCout() << "\n CONVECTION3DPA : Unknown StdPar variant id = " << vid + << std::endl; + } +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/CONVECTION3DPA.cpp b/src/apps/CONVECTION3DPA.cpp index 64fcc6063..246ae6b2e 100644 --- a/src/apps/CONVECTION3DPA.cpp +++ b/src/apps/CONVECTION3DPA.cpp @@ -64,6 +64,7 @@ CONVECTION3DPA::CONVECTION3DPA(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); } CONVECTION3DPA::~CONVECTION3DPA() diff --git a/src/basic/CMakeLists.txt b/src/basic/CMakeLists.txt index cca6bf286..ec2eddf98 100644 --- a/src/basic/CMakeLists.txt +++ b/src/basic/CMakeLists.txt @@ -17,6 +17,7 @@ blt_add_library( DAXPY-OMPTarget.cpp DAXPY_ATOMIC.cpp DAXPY_ATOMIC-Seq.cpp + DAXPY_ATOMIC-StdPar.cpp DAXPY_ATOMIC-Hip.cpp DAXPY_ATOMIC-Cuda.cpp DAXPY_ATOMIC-OMP.cpp diff --git a/src/basic/DAXPY_ATOMIC-StdPar.cpp b/src/basic/DAXPY_ATOMIC-StdPar.cpp new file mode 100644 index 000000000..c143e571d --- /dev/null +++ b/src/basic/DAXPY_ATOMIC-StdPar.cpp @@ -0,0 +1,93 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DAXPY_ATOMIC.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + DAXPY_ATOMIC_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = ibegin; i < iend; ++i ) { + DAXPY_ATOMIC_BODY; + } + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case Lambda_StdPar : { + + auto daxpy_atomic_lam = [=](Index_type i) { + DAXPY_ATOMIC_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = ibegin; i < iend; ++i ) { + daxpy_atomic_lam(i); + } + + } + stopTimer(); + + break; + } + + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + DAXPY_ATOMIC_RAJA_BODY(RAJA::seq_atomic); + }); + + } + stopTimer(); + + break; + } +#endif + + default : { + getCout() << "\n DAXPY_ATOMIC : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace basic +} // end namespace rajaperf From 710a08443ee1b1da46aed5c6a40bc6c816ae470f Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 12:15:22 +0300 Subject: [PATCH 008/174] add SCAN and NODAL --- src/algorithm/CMakeLists.txt | 1 + src/algorithm/SCAN-StdPar.cpp | 90 +++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 src/algorithm/SCAN-StdPar.cpp diff --git a/src/algorithm/CMakeLists.txt b/src/algorithm/CMakeLists.txt index 03d6069ba..fc9dfa4ef 100644 --- a/src/algorithm/CMakeLists.txt +++ b/src/algorithm/CMakeLists.txt @@ -10,6 +10,7 @@ blt_add_library( NAME algorithm SOURCES SCAN.cpp SCAN-Seq.cpp + SCAN-StdPar.cpp SCAN-Hip.cpp SCAN-Cuda.cpp SCAN-OMP.cpp diff --git a/src/algorithm/SCAN-StdPar.cpp b/src/algorithm/SCAN-StdPar.cpp new file mode 100644 index 000000000..d6adeaabf --- /dev/null +++ b/src/algorithm/SCAN-StdPar.cpp @@ -0,0 +1,90 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "SCAN.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void SCAN::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + SCAN_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + SCAN_PROLOGUE; + for (Index_type i = ibegin; i < iend; ++i ) { + SCAN_BODY; + } + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + SCAN_PROLOGUE; + auto scan_lam = [=, &scan_var](Index_type i) { + SCAN_BODY; + }; + for (Index_type i = ibegin; i < iend; ++i ) { + scan_lam(i); + } + + } + stopTimer(); + + break; + } + + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::exclusive_scan(RAJA_SCAN_ARGS); + + } + stopTimer(); + + break; + } +#endif + + default : { + getCout() << "\n SCAN : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace algorithm +} // end namespace rajaperf From 87e011112455df6b3bffc450870c0c0dfe31bb9f Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 12:17:16 +0300 Subject: [PATCH 009/174] add more stuff --- src/apps/CMakeLists.txt | 1 + src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp | 104 ++++++++++++++++ src/apps/NODAL_ACCUMULATION_3D.cpp | 4 + src/basic/CMakeLists.txt | 1 + src/basic/REDUCE_STRUCT-StdPar.cpp | 139 ++++++++++++++++++++++ 5 files changed, 249 insertions(+) create mode 100644 src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp create mode 100644 src/basic/REDUCE_STRUCT-StdPar.cpp diff --git a/src/apps/CMakeLists.txt b/src/apps/CMakeLists.txt index dbe74ba13..04c149782 100644 --- a/src/apps/CMakeLists.txt +++ b/src/apps/CMakeLists.txt @@ -72,6 +72,7 @@ blt_add_library( MASS3DPA-OMPTarget.cpp NODAL_ACCUMULATION_3D.cpp NODAL_ACCUMULATION_3D-Seq.cpp + NODAL_ACCUMULATION_3D-StdPar.cpp NODAL_ACCUMULATION_3D-Hip.cpp NODAL_ACCUMULATION_3D-Cuda.cpp NODAL_ACCUMULATION_3D-OMP.cpp diff --git a/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp b/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp new file mode 100644 index 000000000..585e3a000 --- /dev/null +++ b/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp @@ -0,0 +1,104 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "NODAL_ACCUMULATION_3D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "AppsData.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void NODAL_ACCUMULATION_3D::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = m_domain->n_real_zones; + + NODAL_ACCUMULATION_3D_DATA_SETUP; + + NDPTRSET(m_domain->jp, m_domain->kp, x,x0,x1,x2,x3,x4,x5,x6,x7) ; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type ii = ibegin ; ii < iend ; ++ii ) { + NODAL_ACCUMULATION_3D_BODY_INDEX; + NODAL_ACCUMULATION_3D_BODY; + } + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case Lambda_StdPar : { + + auto nodal_accumulation_3d_lam = [=](Index_type ii) { + NODAL_ACCUMULATION_3D_BODY_INDEX; + NODAL_ACCUMULATION_3D_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type ii = ibegin ; ii < iend ; ++ii ) { + nodal_accumulation_3d_lam(ii); + } + + } + stopTimer(); + + break; + } + + case RAJA_StdPar : { + + camp::resources::Resource working_res{camp::resources::Host()}; + RAJA::TypedListSegment zones(m_domain->real_zones, + m_domain->n_real_zones, + working_res); + + auto nodal_accumulation_3d_lam = [=](Index_type i) { + NODAL_ACCUMULATION_3D_RAJA_ATOMIC_BODY(RAJA::seq_atomic); + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall(zones, nodal_accumulation_3d_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + getCout() << "\n NODAL_ACCUMULATION_3D : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/NODAL_ACCUMULATION_3D.cpp b/src/apps/NODAL_ACCUMULATION_3D.cpp index 5fd512fb7..ef652b4a4 100644 --- a/src/apps/NODAL_ACCUMULATION_3D.cpp +++ b/src/apps/NODAL_ACCUMULATION_3D.cpp @@ -67,6 +67,10 @@ NODAL_ACCUMULATION_3D::NODAL_ACCUMULATION_3D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } NODAL_ACCUMULATION_3D::~NODAL_ACCUMULATION_3D() diff --git a/src/basic/CMakeLists.txt b/src/basic/CMakeLists.txt index ec2eddf98..bfc2000b9 100644 --- a/src/basic/CMakeLists.txt +++ b/src/basic/CMakeLists.txt @@ -105,6 +105,7 @@ blt_add_library( REDUCE3_INT-OMPTarget.cpp REDUCE_STRUCT.cpp REDUCE_STRUCT-Seq.cpp + REDUCE_STRUCT-StdPar.cpp REDUCE_STRUCT-Hip.cpp REDUCE_STRUCT-Cuda.cpp REDUCE_STRUCT-OMP.cpp diff --git a/src/basic/REDUCE_STRUCT-StdPar.cpp b/src/basic/REDUCE_STRUCT-StdPar.cpp new file mode 100644 index 000000000..b91513bdb --- /dev/null +++ b/src/basic/REDUCE_STRUCT-StdPar.cpp @@ -0,0 +1,139 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "REDUCE_STRUCT.hpp" + +#include "RAJA/RAJA.hpp" + +#include +#include + +namespace rajaperf +{ +namespace basic +{ + + +void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + REDUCE_STRUCT_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type xsum = m_init_sum; Real_type ysum = m_init_sum; + Real_type xmin = m_init_min; Real_type ymin = m_init_min; + Real_type xmax = m_init_max; Real_type ymax = m_init_max; + + for (Index_type i = ibegin; i < iend; ++i ) { + REDUCE_STRUCT_BODY; + } + + points.SetCenter(xsum/(points.N), ysum/(points.N)); + points.SetXMin(xmin); + points.SetXMax(xmax); + points.SetYMin(ymin); + points.SetYMax(ymax); + m_points=points; + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case Lambda_StdPar : { + + auto reduce_struct_x_base_lam = [=](Index_type i) -> Real_type { + return points.x[i]; + }; + + auto reduce_struct_y_base_lam = [=](Index_type i) -> Real_type { + return points.y[i]; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type xsum = m_init_sum; Real_type ysum = m_init_sum; + Real_type xmin = m_init_min; Real_type ymin = m_init_min; + Real_type xmax = m_init_max; Real_type ymax = m_init_max; + + for (Index_type i = ibegin; i < iend; ++i ) { + xsum += reduce_struct_x_base_lam(i); + xmin = RAJA_MIN(xmin, reduce_struct_x_base_lam(i)); + xmax = RAJA_MAX(xmax, reduce_struct_x_base_lam(i)); + ysum += reduce_struct_y_base_lam(i); + ymin = RAJA_MIN(ymin, reduce_struct_y_base_lam(i)); + ymax = RAJA_MAX(ymax, reduce_struct_y_base_lam(i)); + } + + points.SetCenter(xsum/(points.N), ysum/(points.N)); + points.SetXMin(xmin); + points.SetXMax(xmax); + points.SetYMin(ymin); + points.SetYMax(ymax); + m_points=points; + + } + stopTimer(); + + break; + } + + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::ReduceSum xsum(m_init_sum); + RAJA::ReduceSum ysum(m_init_sum); + RAJA::ReduceMin xmin(m_init_min); + RAJA::ReduceMin ymin(m_init_min); + RAJA::ReduceMax xmax(m_init_max); + RAJA::ReduceMax ymax(m_init_max); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { + REDUCE_STRUCT_BODY_RAJA; + }); + + points.SetCenter(xsum.get()/(points.N), + ysum.get()/(points.N)); + points.SetXMin(xmin.get()); + points.SetXMax(xmax.get()); + points.SetYMin(ymin.get()); + points.SetYMax(ymax.get()); + m_points=points; + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + getCout() << "\n REDUCE_STRUCT : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace basic +} // end namespace rajaperf From 4a674d367c55fd86ca3c3776e555749a773e2b31 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 12:19:21 +0300 Subject: [PATCH 010/174] index list --- src/basic/CMakeLists.txt | 2 + src/basic/INDEXLIST-StdPar.cpp | 84 ++++++++++++++ src/basic/INDEXLIST_3LOOP-StdPar.cpp | 160 +++++++++++++++++++++++++++ 3 files changed, 246 insertions(+) create mode 100644 src/basic/INDEXLIST-StdPar.cpp create mode 100644 src/basic/INDEXLIST_3LOOP-StdPar.cpp diff --git a/src/basic/CMakeLists.txt b/src/basic/CMakeLists.txt index bfc2000b9..24af1abae 100644 --- a/src/basic/CMakeLists.txt +++ b/src/basic/CMakeLists.txt @@ -31,12 +31,14 @@ blt_add_library( IF_QUAD-OMPTarget.cpp INDEXLIST.cpp INDEXLIST-Seq.cpp + INDEXLIST-StdPar.cpp INDEXLIST-Hip.cpp INDEXLIST-Cuda.cpp INDEXLIST-OMP.cpp INDEXLIST-OMPTarget.cpp INDEXLIST_3LOOP.cpp INDEXLIST_3LOOP-Seq.cpp + INDEXLIST_3LOOP-StdPar.cpp INDEXLIST_3LOOP-Hip.cpp INDEXLIST_3LOOP-Cuda.cpp INDEXLIST_3LOOP-OMP.cpp diff --git a/src/basic/INDEXLIST-StdPar.cpp b/src/basic/INDEXLIST-StdPar.cpp new file mode 100644 index 000000000..8b4565b9c --- /dev/null +++ b/src/basic/INDEXLIST-StdPar.cpp @@ -0,0 +1,84 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INDEXLIST.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + INDEXLIST_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Index_type count = 0; + + for (Index_type i = ibegin; i < iend; ++i ) { + INDEXLIST_BODY; + } + + m_len = count; + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case Lambda_StdPar : { + + auto indexlist_base_lam = [=](Index_type i, Index_type& count) { + INDEXLIST_BODY + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Index_type count = 0; + + for (Index_type i = ibegin; i < iend; ++i ) { + indexlist_base_lam(i, count); + } + + m_len = count; + + } + stopTimer(); + + break; + } +#endif + + default : { + getCout() << "\n INDEXLIST : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/INDEXLIST_3LOOP-StdPar.cpp b/src/basic/INDEXLIST_3LOOP-StdPar.cpp new file mode 100644 index 000000000..315269450 --- /dev/null +++ b/src/basic/INDEXLIST_3LOOP-StdPar.cpp @@ -0,0 +1,160 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INDEXLIST_3LOOP.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + +#define INDEXLIST_3LOOP_DATA_SETUP_StdPar \ + Index_type* counts = new Index_type[iend+1]; + +#define INDEXLIST_3LOOP_DATA_TEARDOWN_StdPar \ + delete[] counts; counts = nullptr; + + + +void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + INDEXLIST_3LOOP_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + INDEXLIST_3LOOP_DATA_SETUP_StdPar; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = ibegin; i < iend; ++i ) { + counts[i] = (INDEXLIST_3LOOP_CONDITIONAL) ? 1 : 0; + } + + Index_type count = 0; + + for (Index_type i = ibegin; i < iend+1; ++i ) { + Index_type inc = counts[i]; + counts[i] = count; + count += inc; + } + + for (Index_type i = ibegin; i < iend; ++i ) { + INDEXLIST_3LOOP_MAKE_LIST; + } + + m_len = counts[iend]; + + } + stopTimer(); + + INDEXLIST_3LOOP_DATA_TEARDOWN_StdPar; + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case Lambda_StdPar : { + + INDEXLIST_3LOOP_DATA_SETUP_StdPar; + + auto indexlist_conditional_lam = [=](Index_type i) { + counts[i] = (INDEXLIST_3LOOP_CONDITIONAL) ? 1 : 0; + }; + + auto indexlist_make_list_lam = [=](Index_type i) { + INDEXLIST_3LOOP_MAKE_LIST; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = ibegin; i < iend; ++i ) { + indexlist_conditional_lam(i); + } + + Index_type count = 0; + + for (Index_type i = ibegin; i < iend+1; ++i ) { + Index_type inc = counts[i]; + counts[i] = count; + count += inc; + } + + for (Index_type i = ibegin; i < iend; ++i ) { + indexlist_make_list_lam(i); + } + + m_len = counts[iend]; + + } + stopTimer(); + + INDEXLIST_3LOOP_DATA_TEARDOWN_StdPar; + + break; + } + + case RAJA_StdPar : { + + INDEXLIST_3LOOP_DATA_SETUP_StdPar; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::ReduceSum len(0); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + counts[i] = (INDEXLIST_3LOOP_CONDITIONAL) ? 1 : 0; + }); + + RAJA::exclusive_scan_inplace( + RAJA::make_span(counts+ibegin, iend+1-ibegin)); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + if (counts[i] != counts[i+1]) { + list[counts[i]] = i; + len += 1; + } + }); + + m_len = len.get(); + + } + stopTimer(); + + INDEXLIST_3LOOP_DATA_TEARDOWN_StdPar; + + break; + } +#endif + + default : { + getCout() << "\n INDEXLIST_3LOOP : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace basic +} // end namespace rajaperf From 48152bc10e428519f1ffdf4653522c127ebad523 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 12:20:26 +0300 Subject: [PATCH 011/174] reduce sum --- src/algorithm/CMakeLists.txt | 1 + src/algorithm/REDUCE_SUM-StdPar.cpp | 104 ++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 src/algorithm/REDUCE_SUM-StdPar.cpp diff --git a/src/algorithm/CMakeLists.txt b/src/algorithm/CMakeLists.txt index fc9dfa4ef..2baa2b414 100644 --- a/src/algorithm/CMakeLists.txt +++ b/src/algorithm/CMakeLists.txt @@ -28,6 +28,7 @@ blt_add_library( SORTPAIRS-OMP.cpp REDUCE_SUM.cpp REDUCE_SUM-Seq.cpp + REDUCE_SUM-StdPar.cpp REDUCE_SUM-Hip.cpp REDUCE_SUM-Cuda.cpp REDUCE_SUM-OMP.cpp diff --git a/src/algorithm/REDUCE_SUM-StdPar.cpp b/src/algorithm/REDUCE_SUM-StdPar.cpp new file mode 100644 index 000000000..261be955a --- /dev/null +++ b/src/algorithm/REDUCE_SUM-StdPar.cpp @@ -0,0 +1,104 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "REDUCE_SUM.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + REDUCE_SUM_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type sum = m_sum_init; + + for (Index_type i = ibegin; i < iend; ++i ) { + REDUCE_SUM_BODY; + } + + m_sum = sum; + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case Lambda_StdPar : { + + auto reduce_sum_base_lam = [=](Index_type i) { + return x[i]; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type sum = m_sum_init; + + for (Index_type i = ibegin; i < iend; ++i ) { + sum += reduce_sum_base_lam(i); + } + + m_sum = sum; + + } + stopTimer(); + + break; + } + + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::ReduceSum sum(m_sum_init); + + RAJA::forall( RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + REDUCE_SUM_BODY; + }); + + m_sum = sum.get(); + + } + stopTimer(); + + break; + } +#endif + + default : { + getCout() << "\n REDUCE_SUM : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace algorithm +} // end namespace rajaperf From f0ad8c2a77b4c6671e569d82c8dcfa88c9a13418 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 12:51:27 +0300 Subject: [PATCH 012/174] cout fix --- src/basic-kokkos/DAXPY-Kokkos.cpp | 2 +- src/basic-kokkos/IF_QUAD-Kokkos.cpp | 2 +- src/basic-kokkos/INIT3-Kokkos.cpp | 2 +- src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp | 2 +- src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp | 2 +- src/basic-kokkos/MULADDSUB-Kokkos.cpp | 2 +- src/basic-kokkos/NESTED_INIT-Kokkos.cpp | 2 +- src/basic-kokkos/PI_ATOMIC-Kokkos.cpp | 2 +- src/basic-kokkos/REDUCE3_INT-Kokkos.cpp | 2 +- src/basic-kokkos/TRAP_INT-Kokkos.cpp | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/basic-kokkos/DAXPY-Kokkos.cpp b/src/basic-kokkos/DAXPY-Kokkos.cpp index 6c3ad5e6e..e6ca5d0ad 100644 --- a/src/basic-kokkos/DAXPY-Kokkos.cpp +++ b/src/basic-kokkos/DAXPY-Kokkos.cpp @@ -53,7 +53,7 @@ void DAXPY::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx) break; } default: { - std::cout << "\n DAXPY : Unknown variant id = " << vid << std::endl; + getCout() << "\n DAXPY : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic-kokkos/IF_QUAD-Kokkos.cpp b/src/basic-kokkos/IF_QUAD-Kokkos.cpp index e1b8cc601..67ed8e615 100644 --- a/src/basic-kokkos/IF_QUAD-Kokkos.cpp +++ b/src/basic-kokkos/IF_QUAD-Kokkos.cpp @@ -61,7 +61,7 @@ void IF_QUAD::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_id } default: { - std::cout << "\n IF_QUAD : Unknown variant id = " << vid << std::endl; + getCout() << "\n IF_QUAD : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic-kokkos/INIT3-Kokkos.cpp b/src/basic-kokkos/INIT3-Kokkos.cpp index eef8ffbaa..97c7a04d4 100644 --- a/src/basic-kokkos/INIT3-Kokkos.cpp +++ b/src/basic-kokkos/INIT3-Kokkos.cpp @@ -57,7 +57,7 @@ void INIT3::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx) } default: { - std::cout << "\n INIT3 : Unknown variant id = " << vid << std::endl; + getCout() << "\n INIT3 : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp index 8d59409d1..05da1be92 100644 --- a/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp +++ b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp @@ -46,7 +46,7 @@ void INIT_VIEW1D::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tun } default: { - std::cout << "\n INIT_VIEW1D : Unknown variant id = " << vid << std::endl; + getCout() << "\n INIT_VIEW1D : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp index ae03fe752..756d517cd 100644 --- a/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp +++ b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp @@ -46,7 +46,7 @@ void INIT_VIEW1D_OFFSET::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ } default: { - std::cout << "\n INIT_VIEW1D_OFFSET : Unknown variant id = " << vid + getCout() << "\n INIT_VIEW1D_OFFSET : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic-kokkos/MULADDSUB-Kokkos.cpp b/src/basic-kokkos/MULADDSUB-Kokkos.cpp index e81cd17d5..1aa8f88bb 100644 --- a/src/basic-kokkos/MULADDSUB-Kokkos.cpp +++ b/src/basic-kokkos/MULADDSUB-Kokkos.cpp @@ -56,7 +56,7 @@ void MULADDSUB::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ } default: { - std::cout << "\n MULADDSUB : Unknown variant id = " << vid << std::endl; + getCout() << "\n MULADDSUB : Unknown variant id = " << vid << std::endl; } } moveDataToHostFromKokkosView(out1, out1_view, iend); diff --git a/src/basic-kokkos/NESTED_INIT-Kokkos.cpp b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp index f69020e57..c482d9f90 100644 --- a/src/basic-kokkos/NESTED_INIT-Kokkos.cpp +++ b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp @@ -68,7 +68,7 @@ void NESTED_INIT::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tun } default: { - std::cout << "\n NESTED_INIT : Unknown variant id = " << vid << std::endl; + getCout() << "\n NESTED_INIT : Unknown variant id = " << vid << std::endl; } } } diff --git a/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp index 066aca7aa..326578e66 100644 --- a/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp +++ b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp @@ -59,7 +59,7 @@ void PI_ATOMIC::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ } default: { - std::cout << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; + getCout() << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; } } } diff --git a/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp index b1566d619..47ca2b222 100644 --- a/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp +++ b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp @@ -66,7 +66,7 @@ void REDUCE3_INT::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tun } default: { - std::cout << "\n REDUCE3_INT : Unknown variant id = " << vid << std::endl; + getCout() << "\n REDUCE3_INT : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic-kokkos/TRAP_INT-Kokkos.cpp b/src/basic-kokkos/TRAP_INT-Kokkos.cpp index 47c4596e6..dee9433e8 100644 --- a/src/basic-kokkos/TRAP_INT-Kokkos.cpp +++ b/src/basic-kokkos/TRAP_INT-Kokkos.cpp @@ -59,7 +59,7 @@ void TRAP_INT::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_i } default: { - std::cout << "\n TRAP_INT : Unknown variant id = " << vid << std::endl; + getCout() << "\n TRAP_INT : Unknown variant id = " << vid << std::endl; } } } From d55a2d5adbc05d0087ed3c112e5b65e33fd45996 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 12:51:34 +0300 Subject: [PATCH 013/174] cout fix --- src/algorithm/SORT-StdPar.cpp | 2 +- src/algorithm/SORTPAIRS-StdPar.cpp | 2 +- src/apps/DEL_DOT_VEC_2D-StdPar.cpp | 2 +- src/apps/DIFFUSION3DPA-StdPar.cpp | 2 +- src/apps/ENERGY-StdPar.cpp | 2 +- src/apps/FIR-StdPar.cpp | 2 +- src/apps/HALOEXCHANGE-StdPar.cpp | 2 +- src/apps/HALOEXCHANGE_FUSED-StdPar.cpp | 2 +- src/apps/LTIMES-StdPar.cpp | 2 +- src/apps/LTIMES_NOVIEW-StdPar.cpp | 2 +- src/apps/MASS3DPA-StdPar.cpp | 2 +- src/apps/PRESSURE-StdPar.cpp | 2 +- src/apps/VOL3D-StdPar.cpp | 2 +- src/basic/DAXPY-StdPar.cpp | 18 +++++----- src/basic/DAXPY_ATOMIC-StdPar.cpp | 24 +++++++++---- src/basic/IF_QUAD-StdPar.cpp | 2 +- src/basic/INDEXLIST-StdPar.cpp | 8 +++-- src/basic/INDEXLIST_3LOOP-StdPar.cpp | 15 +++++++- src/basic/INIT3-StdPar.cpp | 21 +++++------ src/basic/INIT_VIEW1D-StdPar.cpp | 2 +- src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp | 2 +- src/basic/MAT_MAT_SHARED-StdPar.cpp | 35 ++++++++++--------- src/basic/MULADDSUB-StdPar.cpp | 2 +- src/basic/NESTED_INIT-StdPar.cpp | 4 +-- src/basic/PI_ATOMIC-StdPar.cpp | 2 +- src/basic/PI_REDUCE-StdPar.cpp | 5 +-- src/basic/REDUCE3_INT-StdPar.cpp | 9 ++--- src/basic/REDUCE_STRUCT-StdPar.cpp | 19 ++++++++-- src/basic/TRAP_INT-StdPar.cpp | 2 +- src/common/StdParUtils.hpp | 1 + src/lcals/DIFF_PREDICT-StdPar.cpp | 2 +- src/lcals/EOS-StdPar.cpp | 2 +- src/lcals/FIRST_DIFF-StdPar.cpp | 2 +- src/lcals/FIRST_MIN-StdPar.cpp | 2 +- src/lcals/FIRST_SUM-StdPar.cpp | 2 +- src/lcals/GEN_LIN_RECUR-StdPar.cpp | 2 +- src/lcals/HYDRO_1D-StdPar.cpp | 2 +- src/lcals/HYDRO_2D-StdPar.cpp | 2 +- src/lcals/INT_PREDICT-StdPar.cpp | 2 +- src/lcals/PLANCKIAN-StdPar.cpp | 2 +- src/lcals/TRIDIAG_ELIM-StdPar.cpp | 2 +- src/polybench/POLYBENCH_2MM-StdPar.cpp | 2 +- src/polybench/POLYBENCH_3MM-StdPar.cpp | 2 +- src/polybench/POLYBENCH_ADI-StdPar.cpp | 2 +- src/polybench/POLYBENCH_ATAX-StdPar.cpp | 2 +- src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp | 2 +- .../POLYBENCH_FLOYD_WARSHALL-StdPar.cpp | 2 +- src/polybench/POLYBENCH_GEMM-StdPar.cpp | 2 +- src/polybench/POLYBENCH_GEMVER-StdPar.cpp | 2 +- src/polybench/POLYBENCH_GESUMMV-StdPar.cpp | 2 +- src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp | 2 +- src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp | 2 +- src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp | 2 +- src/polybench/POLYBENCH_MVT-StdPar.cpp | 2 +- src/stream/ADD-StdPar.cpp | 2 +- src/stream/COPY-StdPar.cpp | 2 +- src/stream/DOT-StdPar.cpp | 2 +- src/stream/MUL-StdPar.cpp | 2 +- src/stream/TRIAD-StdPar.cpp | 8 ++--- 59 files changed, 147 insertions(+), 114 deletions(-) diff --git a/src/algorithm/SORT-StdPar.cpp b/src/algorithm/SORT-StdPar.cpp index ba808313e..ddf3be505 100644 --- a/src/algorithm/SORT-StdPar.cpp +++ b/src/algorithm/SORT-StdPar.cpp @@ -63,7 +63,7 @@ void SORT::runStdParVariant(VariantID vid, size_t tune_idx) #endif default : { - std::cout << "\n SORT : Unknown variant id = " << vid << std::endl; + getCout() << "\n SORT : Unknown variant id = " << vid << std::endl; } } diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp index f82b260e5..d97ade603 100644 --- a/src/algorithm/SORTPAIRS-StdPar.cpp +++ b/src/algorithm/SORTPAIRS-StdPar.cpp @@ -89,7 +89,7 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) #endif default : { - std::cout << "\n SORTPAIRS : Unknown variant id = " << vid << std::endl; + getCout() << "\n SORTPAIRS : Unknown variant id = " << vid << std::endl; } } diff --git a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp index 93fde5151..7679df246 100644 --- a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp +++ b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp @@ -111,7 +111,7 @@ void DEL_DOT_VEC_2D::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n DEL_DOT_VEC_2D : Unknown variant id = " << vid << std::endl; + getCout() << "\n DEL_DOT_VEC_2D : Unknown variant id = " << vid << std::endl; } } diff --git a/src/apps/DIFFUSION3DPA-StdPar.cpp b/src/apps/DIFFUSION3DPA-StdPar.cpp index 38ee4da02..97d0ad239 100644 --- a/src/apps/DIFFUSION3DPA-StdPar.cpp +++ b/src/apps/DIFFUSION3DPA-StdPar.cpp @@ -321,7 +321,7 @@ void DIFFUSION3DPA::runStdParVariant(VariantID vid, size_t tune_idx) { #endif // RUN_RAJA_STDPAR default: - std::cout << "\n DIFFUSION3DPA : Unknown StdPar variant id = " << vid + getCout() << "\n DIFFUSION3DPA : Unknown StdPar variant id = " << vid << std::endl; } diff --git a/src/apps/ENERGY-StdPar.cpp b/src/apps/ENERGY-StdPar.cpp index fceadd05e..d02b68d4a 100644 --- a/src/apps/ENERGY-StdPar.cpp +++ b/src/apps/ENERGY-StdPar.cpp @@ -186,7 +186,7 @@ void ENERGY::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n ENERGY : Unknown variant id = " << vid << std::endl; + getCout() << "\n ENERGY : Unknown variant id = " << vid << std::endl; } } diff --git a/src/apps/FIR-StdPar.cpp b/src/apps/FIR-StdPar.cpp index 782a36321..07040148f 100644 --- a/src/apps/FIR-StdPar.cpp +++ b/src/apps/FIR-StdPar.cpp @@ -97,7 +97,7 @@ void FIR::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n FIR : Unknown variant id = " << vid << std::endl; + getCout() << "\n FIR : Unknown variant id = " << vid << std::endl; } } diff --git a/src/apps/HALOEXCHANGE-StdPar.cpp b/src/apps/HALOEXCHANGE-StdPar.cpp index 11e551fda..e06612b6b 100644 --- a/src/apps/HALOEXCHANGE-StdPar.cpp +++ b/src/apps/HALOEXCHANGE-StdPar.cpp @@ -171,7 +171,7 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n HALOEXCHANGE : Unknown variant id = " << vid << std::endl; + getCout() << "\n HALOEXCHANGE : Unknown variant id = " << vid << std::endl; } } diff --git a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp index 86967eac6..b320e7fe3 100644 --- a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp +++ b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp @@ -254,7 +254,7 @@ void HALOEXCHANGE_FUSED::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n HALOEXCHANGE_FUSED : Unknown variant id = " << vid << std::endl; + getCout() << "\n HALOEXCHANGE_FUSED : Unknown variant id = " << vid << std::endl; } } diff --git a/src/apps/LTIMES-StdPar.cpp b/src/apps/LTIMES-StdPar.cpp index 59422d859..60e8aec63 100644 --- a/src/apps/LTIMES-StdPar.cpp +++ b/src/apps/LTIMES-StdPar.cpp @@ -126,7 +126,7 @@ void LTIMES::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n LTIMES : Unknown variant id = " << vid << std::endl; + getCout() << "\n LTIMES : Unknown variant id = " << vid << std::endl; } } diff --git a/src/apps/LTIMES_NOVIEW-StdPar.cpp b/src/apps/LTIMES_NOVIEW-StdPar.cpp index 4039f4ffc..7ada8a148 100644 --- a/src/apps/LTIMES_NOVIEW-StdPar.cpp +++ b/src/apps/LTIMES_NOVIEW-StdPar.cpp @@ -120,7 +120,7 @@ void LTIMES_NOVIEW::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n LTIMES_NOVIEW : Unknown variant id = " << vid << std::endl; + getCout() << "\n LTIMES_NOVIEW : Unknown variant id = " << vid << std::endl; } } diff --git a/src/apps/MASS3DPA-StdPar.cpp b/src/apps/MASS3DPA-StdPar.cpp index 8c8a6a328..58b08b9d1 100644 --- a/src/apps/MASS3DPA-StdPar.cpp +++ b/src/apps/MASS3DPA-StdPar.cpp @@ -222,7 +222,7 @@ void MASS3DPA::runStdParVariant(VariantID vid, size_t tune_idx) { #endif // RUN_RAJA_STDPAR default: - std::cout << "\n MASS3DPA : Unknown StdPar variant id = " << vid << std::endl; + getCout() << "\n MASS3DPA : Unknown StdPar variant id = " << vid << std::endl; } #endif } diff --git a/src/apps/PRESSURE-StdPar.cpp b/src/apps/PRESSURE-StdPar.cpp index cc1eb2c0a..787def3e6 100644 --- a/src/apps/PRESSURE-StdPar.cpp +++ b/src/apps/PRESSURE-StdPar.cpp @@ -114,7 +114,7 @@ void PRESSURE::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n PRESSURE : Unknown variant id = " << vid << std::endl; + getCout() << "\n PRESSURE : Unknown variant id = " << vid << std::endl; } } diff --git a/src/apps/VOL3D-StdPar.cpp b/src/apps/VOL3D-StdPar.cpp index 1997e95cf..fb369f192 100644 --- a/src/apps/VOL3D-StdPar.cpp +++ b/src/apps/VOL3D-StdPar.cpp @@ -98,7 +98,7 @@ void VOL3D::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n VOL3D : Unknown variant id = " << vid << std::endl; + getCout() << "\n VOL3D : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/DAXPY-StdPar.cpp b/src/basic/DAXPY-StdPar.cpp index 61ed338ec..3615eeee8 100644 --- a/src/basic/DAXPY-StdPar.cpp +++ b/src/basic/DAXPY-StdPar.cpp @@ -1,7 +1,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. -// See the RAJAPerf/COPYRIGHT file for details. +// See the RAJAPerf/LICENSE file for details. // // SPDX-License-Identifier: (BSD-3-Clause) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// @@ -11,12 +11,10 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include -namespace rajaperf +namespace rajaperf { namespace basic { @@ -35,10 +33,6 @@ void DAXPY::runStdParVariant(VariantID vid, size_t tune_idx) DAXPY_DATA_SETUP; - auto daxpy_lam = [=](Index_type i) { - DAXPY_BODY; - }; - switch ( vid ) { case Base_StdPar : { @@ -60,6 +54,10 @@ void DAXPY::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { + auto daxpy_lam = [=](Index_type i) { + DAXPY_BODY; + }; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { @@ -91,7 +89,7 @@ void DAXPY::runStdParVariant(VariantID vid, size_t tune_idx) #endif default : { - std::cout << "\n DAXPY : Unknown variant id = " << vid << std::endl; + getCout() << "\n DAXPY : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/DAXPY_ATOMIC-StdPar.cpp b/src/basic/DAXPY_ATOMIC-StdPar.cpp index c143e571d..a0e887e7c 100644 --- a/src/basic/DAXPY_ATOMIC-StdPar.cpp +++ b/src/basic/DAXPY_ATOMIC-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#include "common/StdParUtils.hpp" + #include namespace rajaperf @@ -20,10 +22,15 @@ namespace basic void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { +#if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + DAXPY_ATOMIC_DATA_SETUP; switch ( vid ) { @@ -33,9 +40,11 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - for (Index_type i = ibegin; i < iend; ++i ) { + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { DAXPY_ATOMIC_BODY; - } + }); } stopTimer(); @@ -43,7 +52,6 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu break; } -#if defined(RUN_RAJA_STDPAR) case Lambda_StdPar : { auto daxpy_atomic_lam = [=](Index_type i) { @@ -53,9 +61,11 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - for (Index_type i = ibegin; i < iend; ++i ) { + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { daxpy_atomic_lam(i); - } + }); } stopTimer(); @@ -63,12 +73,13 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu break; } +#ifdef RAJA_ENABLE_STDPAR case RAJA_StdPar : { startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::forall( + RAJA::forall( RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { DAXPY_ATOMIC_RAJA_BODY(RAJA::seq_atomic); @@ -87,6 +98,7 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu } +#endif } } // end namespace basic diff --git a/src/basic/IF_QUAD-StdPar.cpp b/src/basic/IF_QUAD-StdPar.cpp index 3c86353ef..605a1258e 100644 --- a/src/basic/IF_QUAD-StdPar.cpp +++ b/src/basic/IF_QUAD-StdPar.cpp @@ -92,7 +92,7 @@ void IF_QUAD::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n IF_QUAD : Unknown variant id = " << vid << std::endl; + getCout() << "\n IF_QUAD : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/INDEXLIST-StdPar.cpp b/src/basic/INDEXLIST-StdPar.cpp index 8b4565b9c..f2b8cb828 100644 --- a/src/basic/INDEXLIST-StdPar.cpp +++ b/src/basic/INDEXLIST-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#include "common/StdParUtils.hpp" + #include namespace rajaperf @@ -20,6 +22,7 @@ namespace basic void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { +#if defined(RUN_STDPAR) const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); @@ -35,6 +38,7 @@ void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ Index_type count = 0; +#warning needs parallel inscan for (Index_type i = ibegin; i < iend; ++i ) { INDEXLIST_BODY; } @@ -47,7 +51,6 @@ void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ break; } -#if defined(RUN_RAJA_STDPAR) case Lambda_StdPar : { auto indexlist_base_lam = [=](Index_type i, Index_type& count) { @@ -59,6 +62,7 @@ void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ Index_type count = 0; +#warning needs parallel inscan for (Index_type i = ibegin; i < iend; ++i ) { indexlist_base_lam(i, count); } @@ -70,7 +74,6 @@ void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ break; } -#endif default : { getCout() << "\n INDEXLIST : Unknown variant id = " << vid << std::endl; @@ -78,6 +81,7 @@ void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ } +#endif } } // end namespace basic diff --git a/src/basic/INDEXLIST_3LOOP-StdPar.cpp b/src/basic/INDEXLIST_3LOOP-StdPar.cpp index 315269450..39657f1ce 100644 --- a/src/basic/INDEXLIST_3LOOP-StdPar.cpp +++ b/src/basic/INDEXLIST_3LOOP-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#include "common/StdParUtils.hpp" + #include namespace rajaperf @@ -27,10 +29,14 @@ namespace basic void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { +#if defined(RUN_STDPAR) const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + INDEXLIST_3LOOP_DATA_SETUP; switch ( vid ) { @@ -42,18 +48,21 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#warning needs parallel for for (Index_type i = ibegin; i < iend; ++i ) { counts[i] = (INDEXLIST_3LOOP_CONDITIONAL) ? 1 : 0; } Index_type count = 0; +#warning needs parallel scan for (Index_type i = ibegin; i < iend+1; ++i ) { Index_type inc = counts[i]; counts[i] = count; count += inc; } +#warning needs parallel for for (Index_type i = ibegin; i < iend; ++i ) { INDEXLIST_3LOOP_MAKE_LIST; } @@ -68,7 +77,6 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG break; } -#if defined(RUN_RAJA_STDPAR) case Lambda_StdPar : { INDEXLIST_3LOOP_DATA_SETUP_StdPar; @@ -84,18 +92,21 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#warning needs parallel for for (Index_type i = ibegin; i < iend; ++i ) { indexlist_conditional_lam(i); } Index_type count = 0; +#warning needs parallel scan for (Index_type i = ibegin; i < iend+1; ++i ) { Index_type inc = counts[i]; counts[i] = count; count += inc; } +#warning needs parallel for for (Index_type i = ibegin; i < iend; ++i ) { indexlist_make_list_lam(i); } @@ -110,6 +121,7 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG break; } +#if defined(RUN_RAJA_STDPAR) case RAJA_StdPar : { INDEXLIST_3LOOP_DATA_SETUP_StdPar; @@ -154,6 +166,7 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG } +#endif } } // end namespace basic diff --git a/src/basic/INIT3-StdPar.cpp b/src/basic/INIT3-StdPar.cpp index 7105fc9d3..d176c3b42 100644 --- a/src/basic/INIT3-StdPar.cpp +++ b/src/basic/INIT3-StdPar.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/COPYRIGHT file for details. // @@ -11,12 +11,10 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include -namespace rajaperf +namespace rajaperf { namespace basic { @@ -25,20 +23,15 @@ namespace basic void INIT3::runStdParVariant(VariantID vid, size_t tune_idx) { #if defined(RUN_STDPAR) - const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - + auto begin = counting_iterator(ibegin); auto end = counting_iterator(iend); INIT3_DATA_SETUP; - auto init3_lam = [=](Index_type i) { - INIT3_BODY; - }; - switch ( vid ) { case Base_StdPar : { @@ -60,6 +53,10 @@ void INIT3::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { + auto init3_lam = [=](Index_type i) { + INIT3_BODY; + }; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { @@ -89,10 +86,10 @@ void INIT3::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#endif // RUN_RAJA_STDPAR +#endif default : { - std::cout << "\n INIT3 : Unknown variant id = " << vid << std::endl; + getCout() << "\n INIT3 : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/INIT_VIEW1D-StdPar.cpp b/src/basic/INIT_VIEW1D-StdPar.cpp index c79d29b97..1ffbde1a0 100644 --- a/src/basic/INIT_VIEW1D-StdPar.cpp +++ b/src/basic/INIT_VIEW1D-StdPar.cpp @@ -98,7 +98,7 @@ void INIT_VIEW1D::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n INIT_VIEW1D : Unknown variant id = " << vid << std::endl; + getCout() << "\n INIT_VIEW1D : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp index 4014ccacd..ca7bf0130 100644 --- a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp @@ -98,7 +98,7 @@ void INIT_VIEW1D_OFFSET::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n INIT_VIEW1D_OFFSET : Unknown variant id = " << vid << std::endl; + getCout() << "\n INIT_VIEW1D_OFFSET : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/MAT_MAT_SHARED-StdPar.cpp b/src/basic/MAT_MAT_SHARED-StdPar.cpp index 02cb8622d..6aa32ea3b 100644 --- a/src/basic/MAT_MAT_SHARED-StdPar.cpp +++ b/src/basic/MAT_MAT_SHARED-StdPar.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -8,13 +8,16 @@ #include "MAT_MAT_SHARED.hpp" +#include "common/StdParUtils.hpp" + #include namespace rajaperf { namespace basic { -void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) { - +void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) const Index_type run_reps = getRunReps(); const Index_type N = m_N; @@ -29,12 +32,11 @@ void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) { startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#warning need parallel for for (Index_type by = 0; by < Ny; ++by) { for (Index_type bx = 0; bx < Nx; ++bx) { - //Work around for when compiling with CLANG and HIP - //See notes in MAT_MAT_SHARED.hpp - MAT_MAT_SHARED_BODY_0_CLANG_HIP_CPU(TL_SZ) + MAT_MAT_SHARED_BODY_0(TL_SZ) for (Index_type ty = 0; ty < TL_SZ; ++ty) { for (Index_type tx = 0; tx < TL_SZ; ++tx) { @@ -56,7 +58,7 @@ void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) { } } - } // Sequential loop + } for (Index_type ty = 0; ty < TL_SZ; ++ty) { for (Index_type tx = 0; tx < TL_SZ; ++tx) { @@ -65,24 +67,21 @@ void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) { } } } - - } // number of iterations + } stopTimer(); break; } -#if defined(RUN_RAJA_STDPAR) case Lambda_StdPar: { startTimer(); - for (Index_type irep = 0; irep < run_reps; ++irep) { + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { auto outer_y = [&](Index_type by) { auto outer_x = [&](Index_type bx) { - - MAT_MAT_SHARED_BODY_0_CLANG_HIP_CPU(TL_SZ) + MAT_MAT_SHARED_BODY_0(TL_SZ) auto inner_y_1 = [&](Index_type ty) { auto inner_x_1 = [&](Index_type tx) { MAT_MAT_SHARED_BODY_1(TL_SZ) }; @@ -143,16 +142,17 @@ void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) { } }; +#warning need parallel for for (Index_type by = 0; by < Ny; ++by) { outer_y(by); } - - } // irep + } stopTimer(); break; } +#ifdef RAJA_ENABLE_STDPAR case RAJA_Sq: { using launch_policy = RAJA::expt::LaunchPolicy; @@ -241,13 +241,14 @@ void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) { break; } -#endif // RUN_RAJA_STDPAR +#endif default: { - std::cout << "\n MAT_MAT_SHARED : Unknown variant id = " << vid + getCout() << "\n MAT_MAT_SHARED : Unknown variant id = " << vid << std::endl; } } +#endif } } // end namespace basic diff --git a/src/basic/MULADDSUB-StdPar.cpp b/src/basic/MULADDSUB-StdPar.cpp index e86287d75..f89c3b179 100644 --- a/src/basic/MULADDSUB-StdPar.cpp +++ b/src/basic/MULADDSUB-StdPar.cpp @@ -91,7 +91,7 @@ void MULADDSUB::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n MULADDSUB : Unknown variant id = " << vid << std::endl; + getCout() << "\n MULADDSUB : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/NESTED_INIT-StdPar.cpp b/src/basic/NESTED_INIT-StdPar.cpp index a37a88dda..be46d2e06 100644 --- a/src/basic/NESTED_INIT-StdPar.cpp +++ b/src/basic/NESTED_INIT-StdPar.cpp @@ -66,7 +66,7 @@ void NESTED_INIT::runStdParVariant(VariantID vid, size_t tune_idx) #endif { NESTED_INIT_BODY; - //std::cout << i << "," << j << "," << k << ";" << idx << " PAR\n"; + //getCout() << i << "," << j << "," << k << ";" << idx << " PAR\n"; } }); @@ -138,7 +138,7 @@ void NESTED_INIT::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n NESTED_INIT : Unknown variant id = " << vid << std::endl; + getCout() << "\n NESTED_INIT : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/PI_ATOMIC-StdPar.cpp b/src/basic/PI_ATOMIC-StdPar.cpp index 27b7557bf..6b2f80fdc 100644 --- a/src/basic/PI_ATOMIC-StdPar.cpp +++ b/src/basic/PI_ATOMIC-StdPar.cpp @@ -109,7 +109,7 @@ void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; + getCout() << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/PI_REDUCE-StdPar.cpp b/src/basic/PI_REDUCE-StdPar.cpp index b2c075278..e1f37eea4 100644 --- a/src/basic/PI_REDUCE-StdPar.cpp +++ b/src/basic/PI_REDUCE-StdPar.cpp @@ -11,9 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include -#include #include @@ -106,7 +103,7 @@ void PI_REDUCE::runStdParVariant(VariantID vid, size_t tune_idx) #endif default : { - std::cout << "\n PI_REDUCE : Unknown variant id = " << vid << std::endl; + getCout() << "\n PI_REDUCE : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/REDUCE3_INT-StdPar.cpp b/src/basic/REDUCE3_INT-StdPar.cpp index b40129c17..f7b5f5dd5 100644 --- a/src/basic/REDUCE3_INT-StdPar.cpp +++ b/src/basic/REDUCE3_INT-StdPar.cpp @@ -1,7 +1,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. -// See the RAJAPerf/COPYRIGHT file for details. +// See the RAJAPerf/LICENSE file for details. // // SPDX-License-Identifier: (BSD-3-Clause) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// @@ -12,9 +12,6 @@ #include #include "common/StdParUtils.hpp" -#include -#include -#include #include @@ -129,7 +126,7 @@ void REDUCE3_INT::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n REDUCE3_INT : Unknown variant id = " << vid << std::endl; + getCout() << "\n REDUCE3_INT : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/REDUCE_STRUCT-StdPar.cpp b/src/basic/REDUCE_STRUCT-StdPar.cpp index b91513bdb..1264f8257 100644 --- a/src/basic/REDUCE_STRUCT-StdPar.cpp +++ b/src/basic/REDUCE_STRUCT-StdPar.cpp @@ -11,6 +11,8 @@ #include "RAJA/RAJA.hpp" #include +#include "common/StdParUtils.hpp" + #include namespace rajaperf @@ -21,10 +23,15 @@ namespace basic void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { +#if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + REDUCE_STRUCT_DATA_SETUP; switch ( vid ) { @@ -38,8 +45,14 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t Real_type xmin = m_init_min; Real_type ymin = m_init_min; Real_type xmax = m_init_max; Real_type ymax = m_init_max; +#warning needs parallel for (Index_type i = ibegin; i < iend; ++i ) { - REDUCE_STRUCT_BODY; + xsum += points.x[i] ; \ + xmin = RAJA_MIN(xmin, points.x[i]) ; \ + xmax = RAJA_MAX(xmax, points.x[i]) ; \ + ysum += points.y[i] ; \ + ymin = RAJA_MIN(ymin, points.y[i]) ; \ + ymax = RAJA_MAX(ymax, points.y[i]) ; } points.SetCenter(xsum/(points.N), ysum/(points.N)); @@ -55,7 +68,6 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t break; } -#if defined(RUN_RAJA_STDPAR) case Lambda_StdPar : { auto reduce_struct_x_base_lam = [=](Index_type i) -> Real_type { @@ -73,6 +85,7 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t Real_type xmin = m_init_min; Real_type ymin = m_init_min; Real_type xmax = m_init_max; Real_type ymax = m_init_max; +#warning needs parallel for (Index_type i = ibegin; i < iend; ++i ) { xsum += reduce_struct_x_base_lam(i); xmin = RAJA_MIN(xmin, reduce_struct_x_base_lam(i)); @@ -95,6 +108,7 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t break; } +#if defined(RUN_RAJA_STDPAR) case RAJA_StdPar : { startTimer(); @@ -133,6 +147,7 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t } +#endif } } // end namespace basic diff --git a/src/basic/TRAP_INT-StdPar.cpp b/src/basic/TRAP_INT-StdPar.cpp index 359ed363a..ff626091b 100644 --- a/src/basic/TRAP_INT-StdPar.cpp +++ b/src/basic/TRAP_INT-StdPar.cpp @@ -120,7 +120,7 @@ void TRAP_INT::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n TRAP_INT : Unknown variant id = " << vid << std::endl; + getCout() << "\n TRAP_INT : Unknown variant id = " << vid << std::endl; } } diff --git a/src/common/StdParUtils.hpp b/src/common/StdParUtils.hpp index ab2e49f46..26c65c84b 100644 --- a/src/common/StdParUtils.hpp +++ b/src/common/StdParUtils.hpp @@ -32,6 +32,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include template diff --git a/src/lcals/DIFF_PREDICT-StdPar.cpp b/src/lcals/DIFF_PREDICT-StdPar.cpp index b86723185..5ae9aba30 100644 --- a/src/lcals/DIFF_PREDICT-StdPar.cpp +++ b/src/lcals/DIFF_PREDICT-StdPar.cpp @@ -91,7 +91,7 @@ void DIFF_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n DIFF_PREDICT : Unknown variant id = " << vid << std::endl; + getCout() << "\n DIFF_PREDICT : Unknown variant id = " << vid << std::endl; } } diff --git a/src/lcals/EOS-StdPar.cpp b/src/lcals/EOS-StdPar.cpp index a3aa279f2..8a2aa56a5 100644 --- a/src/lcals/EOS-StdPar.cpp +++ b/src/lcals/EOS-StdPar.cpp @@ -91,7 +91,7 @@ void EOS::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n EOS : Unknown variant id = " << vid << std::endl; + getCout() << "\n EOS : Unknown variant id = " << vid << std::endl; } } diff --git a/src/lcals/FIRST_DIFF-StdPar.cpp b/src/lcals/FIRST_DIFF-StdPar.cpp index 1a2d15e6c..e327870d9 100644 --- a/src/lcals/FIRST_DIFF-StdPar.cpp +++ b/src/lcals/FIRST_DIFF-StdPar.cpp @@ -91,7 +91,7 @@ void FIRST_DIFF::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n FIRST_DIFF : Unknown variant id = " << vid << std::endl; + getCout() << "\n FIRST_DIFF : Unknown variant id = " << vid << std::endl; } } diff --git a/src/lcals/FIRST_MIN-StdPar.cpp b/src/lcals/FIRST_MIN-StdPar.cpp index 4a019b5b3..4ea40ca09 100644 --- a/src/lcals/FIRST_MIN-StdPar.cpp +++ b/src/lcals/FIRST_MIN-StdPar.cpp @@ -102,7 +102,7 @@ void FIRST_MIN::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n FIRST_MIN : Unknown variant id = " << vid << std::endl; + getCout() << "\n FIRST_MIN : Unknown variant id = " << vid << std::endl; } } diff --git a/src/lcals/FIRST_SUM-StdPar.cpp b/src/lcals/FIRST_SUM-StdPar.cpp index 1f47f9412..c60223fe0 100644 --- a/src/lcals/FIRST_SUM-StdPar.cpp +++ b/src/lcals/FIRST_SUM-StdPar.cpp @@ -91,7 +91,7 @@ void FIRST_SUM::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n FIRST_SUM : Unknown variant id = " << vid << std::endl; + getCout() << "\n FIRST_SUM : Unknown variant id = " << vid << std::endl; } } diff --git a/src/lcals/GEN_LIN_RECUR-StdPar.cpp b/src/lcals/GEN_LIN_RECUR-StdPar.cpp index f1cd69a0d..d33cb4eea 100644 --- a/src/lcals/GEN_LIN_RECUR-StdPar.cpp +++ b/src/lcals/GEN_LIN_RECUR-StdPar.cpp @@ -114,7 +114,7 @@ void GEN_LIN_RECUR::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n GEN_LIN_RECUR : Unknown variant id = " << vid << std::endl; + getCout() << "\n GEN_LIN_RECUR : Unknown variant id = " << vid << std::endl; } } diff --git a/src/lcals/HYDRO_1D-StdPar.cpp b/src/lcals/HYDRO_1D-StdPar.cpp index 45601b347..1099102e3 100644 --- a/src/lcals/HYDRO_1D-StdPar.cpp +++ b/src/lcals/HYDRO_1D-StdPar.cpp @@ -92,7 +92,7 @@ void HYDRO_1D::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n HYDRO_1D : Unknown variant id = " << vid << std::endl; + getCout() << "\n HYDRO_1D : Unknown variant id = " << vid << std::endl; } } diff --git a/src/lcals/HYDRO_2D-StdPar.cpp b/src/lcals/HYDRO_2D-StdPar.cpp index 1650dffd5..b6825ebb2 100644 --- a/src/lcals/HYDRO_2D-StdPar.cpp +++ b/src/lcals/HYDRO_2D-StdPar.cpp @@ -183,7 +183,7 @@ void HYDRO_2D::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n HYDRO_2D : Unknown variant id = " << vid << std::endl; + getCout() << "\n HYDRO_2D : Unknown variant id = " << vid << std::endl; } } diff --git a/src/lcals/INT_PREDICT-StdPar.cpp b/src/lcals/INT_PREDICT-StdPar.cpp index d8139dfbe..ef2b06df1 100644 --- a/src/lcals/INT_PREDICT-StdPar.cpp +++ b/src/lcals/INT_PREDICT-StdPar.cpp @@ -92,7 +92,7 @@ void INT_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n INT_PREDICT : Unknown variant id = " << vid << std::endl; + getCout() << "\n INT_PREDICT : Unknown variant id = " << vid << std::endl; } } diff --git a/src/lcals/PLANCKIAN-StdPar.cpp b/src/lcals/PLANCKIAN-StdPar.cpp index 3d937bb22..7a41c6120 100644 --- a/src/lcals/PLANCKIAN-StdPar.cpp +++ b/src/lcals/PLANCKIAN-StdPar.cpp @@ -93,7 +93,7 @@ void PLANCKIAN::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n PLANCKIAN : Unknown variant id = " << vid << std::endl; + getCout() << "\n PLANCKIAN : Unknown variant id = " << vid << std::endl; } } diff --git a/src/lcals/TRIDIAG_ELIM-StdPar.cpp b/src/lcals/TRIDIAG_ELIM-StdPar.cpp index ff1986bc1..28f4a1ae2 100644 --- a/src/lcals/TRIDIAG_ELIM-StdPar.cpp +++ b/src/lcals/TRIDIAG_ELIM-StdPar.cpp @@ -92,7 +92,7 @@ void TRIDIAG_ELIM::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n TRIDIAG_ELIM : Unknown variant id = " << vid << std::endl; + getCout() << "\n TRIDIAG_ELIM : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_2MM-StdPar.cpp b/src/polybench/POLYBENCH_2MM-StdPar.cpp index feb441614..64d24ff30 100644 --- a/src/polybench/POLYBENCH_2MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_2MM-StdPar.cpp @@ -243,7 +243,7 @@ void POLYBENCH_2MM::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_2MM : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_2MM : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_3MM-StdPar.cpp b/src/polybench/POLYBENCH_3MM-StdPar.cpp index 189caa032..1a6a1ec38 100644 --- a/src/polybench/POLYBENCH_3MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_3MM-StdPar.cpp @@ -319,7 +319,7 @@ void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_3MM : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_3MM : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_ADI-StdPar.cpp b/src/polybench/POLYBENCH_ADI-StdPar.cpp index 6d2a99650..7fbe1d871 100644 --- a/src/polybench/POLYBENCH_ADI-StdPar.cpp +++ b/src/polybench/POLYBENCH_ADI-StdPar.cpp @@ -224,7 +224,7 @@ void POLYBENCH_ADI::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\nPOLYBENCH_ADI Unknown variant id = " << vid << std::endl; + getCout() << "\nPOLYBENCH_ADI Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_ATAX-StdPar.cpp b/src/polybench/POLYBENCH_ATAX-StdPar.cpp index 1c3d1a3a9..57e8d785a 100644 --- a/src/polybench/POLYBENCH_ATAX-StdPar.cpp +++ b/src/polybench/POLYBENCH_ATAX-StdPar.cpp @@ -201,7 +201,7 @@ void POLYBENCH_ATAX::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_ATAX : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_ATAX : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp index 5bd7435dd..01ebceccd 100644 --- a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp @@ -214,7 +214,7 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\nPOLYBENCH_FDTD_2D Unknown variant id = " << vid << std::endl; + getCout() << "\nPOLYBENCH_FDTD_2D Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp index 023b125d3..0f76e256b 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp @@ -146,7 +146,7 @@ void POLYBENCH_FLOYD_WARSHALL::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_FLOYD_WARSHALL : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_FLOYD_WARSHALL : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_GEMM-StdPar.cpp b/src/polybench/POLYBENCH_GEMM-StdPar.cpp index 1fd75528e..4eda80f76 100644 --- a/src/polybench/POLYBENCH_GEMM-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMM-StdPar.cpp @@ -180,7 +180,7 @@ void POLYBENCH_GEMM::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_GEMM : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_GEMM : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp index 2673abd45..7c40bbd64 100644 --- a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp @@ -243,7 +243,7 @@ void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_GEMVER : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_GEMVER : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp index 070e56c18..720688eee 100644 --- a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp +++ b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp @@ -133,7 +133,7 @@ void POLYBENCH_GESUMMV::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_GESUMMV : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_GESUMMV : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp index d18a359f9..8bb373b52 100644 --- a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp @@ -176,7 +176,7 @@ void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_HEAT_3D : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_HEAT_3D : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp index 1b1ce72f2..a39699c97 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp @@ -125,7 +125,7 @@ void POLYBENCH_JACOBI_1D::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_JACOBI_1D : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_JACOBI_1D : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp index 41cd58b2b..440016ca7 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp @@ -164,7 +164,7 @@ void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_JACOBI_2D : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_JACOBI_2D : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_MVT-StdPar.cpp b/src/polybench/POLYBENCH_MVT-StdPar.cpp index 7adc162de..79ff364c6 100644 --- a/src/polybench/POLYBENCH_MVT-StdPar.cpp +++ b/src/polybench/POLYBENCH_MVT-StdPar.cpp @@ -192,7 +192,7 @@ void POLYBENCH_MVT::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_MVT : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_MVT : Unknown variant id = " << vid << std::endl; } } diff --git a/src/stream/ADD-StdPar.cpp b/src/stream/ADD-StdPar.cpp index bde010541..5f26092a1 100644 --- a/src/stream/ADD-StdPar.cpp +++ b/src/stream/ADD-StdPar.cpp @@ -91,7 +91,7 @@ void ADD::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n ADD : Unknown variant id = " << vid << std::endl; + getCout() << "\n ADD : Unknown variant id = " << vid << std::endl; } } diff --git a/src/stream/COPY-StdPar.cpp b/src/stream/COPY-StdPar.cpp index 1fc757e22..38e53dbf8 100644 --- a/src/stream/COPY-StdPar.cpp +++ b/src/stream/COPY-StdPar.cpp @@ -76,7 +76,7 @@ void COPY::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n COPY : Unknown variant id = " << vid << std::endl; + getCout() << "\n COPY : Unknown variant id = " << vid << std::endl; } } diff --git a/src/stream/DOT-StdPar.cpp b/src/stream/DOT-StdPar.cpp index cf22a9e35..7d359ebb4 100644 --- a/src/stream/DOT-StdPar.cpp +++ b/src/stream/DOT-StdPar.cpp @@ -104,7 +104,7 @@ void DOT::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n DOT : Unknown variant id = " << vid << std::endl; + getCout() << "\n DOT : Unknown variant id = " << vid << std::endl; } } diff --git a/src/stream/MUL-StdPar.cpp b/src/stream/MUL-StdPar.cpp index 6cf9f418f..aca05ed54 100644 --- a/src/stream/MUL-StdPar.cpp +++ b/src/stream/MUL-StdPar.cpp @@ -91,7 +91,7 @@ void MUL::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n MUL : Unknown variant id = " << vid << std::endl; + getCout() << "\n MUL : Unknown variant id = " << vid << std::endl; } } diff --git a/src/stream/TRIAD-StdPar.cpp b/src/stream/TRIAD-StdPar.cpp index 484d79cc3..48fe2ee76 100644 --- a/src/stream/TRIAD-StdPar.cpp +++ b/src/stream/TRIAD-StdPar.cpp @@ -1,7 +1,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. -// See the RAJAPerf/COPYRIGHT file for details. +// See the RAJAPerf/LICENSE file for details. // // SPDX-License-Identifier: (BSD-3-Clause) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include @@ -91,7 +89,7 @@ void TRIAD::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n TRIAD : Unknown variant id = " << vid << std::endl; + getCout() << "\n TRIAD : Unknown variant id = " << vid << std::endl; } } From 64a9768332efa638cdbe2bf021baacbfa0b3267c Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 13:26:47 +0300 Subject: [PATCH 014/174] cleanup --- src/algorithm/MEMCPY-StdPar.cpp | 2 -- src/algorithm/MEMSET-StdPar.cpp | 2 -- src/algorithm/REDUCE_SUM-StdPar.cpp | 11 +++++++- src/algorithm/SCAN-StdPar.cpp | 26 +++++-------------- src/algorithm/SORT-StdPar.cpp | 6 ++--- src/algorithm/SORTPAIRS-StdPar.cpp | 3 +-- src/apps/CONVECTION3DPA-StdPar.cpp | 18 ++++++++++--- src/apps/DEL_DOT_VEC_2D-StdPar.cpp | 2 -- src/apps/DIFFUSION3DPA-StdPar.cpp | 7 ++--- src/apps/ENERGY-StdPar.cpp | 2 -- src/apps/FIR-StdPar.cpp | 2 -- src/apps/HALOEXCHANGE-StdPar.cpp | 2 -- src/apps/HALOEXCHANGE_FUSED-StdPar.cpp | 2 -- src/apps/LTIMES-StdPar.cpp | 2 -- src/apps/LTIMES_NOVIEW-StdPar.cpp | 2 -- src/apps/MASS3DPA-StdPar.cpp | 4 +-- src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp | 8 +++++- src/apps/PRESSURE-StdPar.cpp | 2 -- src/apps/VOL3D-StdPar.cpp | 2 -- src/basic/IF_QUAD-StdPar.cpp | 2 -- src/basic/INIT_VIEW1D-StdPar.cpp | 2 -- src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp | 2 -- src/basic/MULADDSUB-StdPar.cpp | 2 -- src/basic/NESTED_INIT-StdPar.cpp | 2 -- src/basic/PI_ATOMIC-StdPar.cpp | 2 -- src/basic/TRAP_INT-StdPar.cpp | 3 --- src/lcals/DIFF_PREDICT-StdPar.cpp | 2 -- src/lcals/EOS-StdPar.cpp | 2 -- src/lcals/FIRST_DIFF-StdPar.cpp | 2 -- src/lcals/FIRST_MIN-StdPar.cpp | 2 -- src/lcals/FIRST_SUM-StdPar.cpp | 2 -- src/lcals/GEN_LIN_RECUR-StdPar.cpp | 2 -- src/lcals/HYDRO_1D-StdPar.cpp | 2 -- src/lcals/HYDRO_2D-StdPar.cpp | 2 -- src/lcals/INT_PREDICT-StdPar.cpp | 2 -- src/lcals/PLANCKIAN-StdPar.cpp | 2 -- src/lcals/TRIDIAG_ELIM-StdPar.cpp | 2 -- src/polybench/POLYBENCH_2MM-StdPar.cpp | 2 -- src/polybench/POLYBENCH_3MM-StdPar.cpp | 2 -- src/polybench/POLYBENCH_ADI-StdPar.cpp | 2 -- src/polybench/POLYBENCH_ATAX-StdPar.cpp | 2 -- src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp | 2 -- .../POLYBENCH_FLOYD_WARSHALL-StdPar.cpp | 2 -- src/polybench/POLYBENCH_GEMM-StdPar.cpp | 2 -- src/polybench/POLYBENCH_GEMVER-StdPar.cpp | 2 -- src/polybench/POLYBENCH_GESUMMV-StdPar.cpp | 2 -- src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp | 2 -- src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp | 2 -- src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp | 2 -- src/polybench/POLYBENCH_MVT-StdPar.cpp | 3 --- src/stream/ADD-StdPar.cpp | 4 +-- src/stream/COPY-StdPar.cpp | 3 +-- src/stream/DOT-StdPar.cpp | 3 --- src/stream/MUL-StdPar.cpp | 2 -- src/stream/TRIAD-StdPar.cpp | 1 - 55 files changed, 46 insertions(+), 136 deletions(-) diff --git a/src/algorithm/MEMCPY-StdPar.cpp b/src/algorithm/MEMCPY-StdPar.cpp index ff8e66c08..71a6c2eac 100644 --- a/src/algorithm/MEMCPY-StdPar.cpp +++ b/src/algorithm/MEMCPY-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/algorithm/MEMSET-StdPar.cpp b/src/algorithm/MEMSET-StdPar.cpp index 73a61d5cf..8ffba6f3f 100644 --- a/src/algorithm/MEMSET-StdPar.cpp +++ b/src/algorithm/MEMSET-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/algorithm/REDUCE_SUM-StdPar.cpp b/src/algorithm/REDUCE_SUM-StdPar.cpp index 261be955a..b2e010833 100644 --- a/src/algorithm/REDUCE_SUM-StdPar.cpp +++ b/src/algorithm/REDUCE_SUM-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#include "common/StdParUtils.hpp" + #include namespace rajaperf @@ -20,10 +22,14 @@ namespace algorithm void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { +#if defined(RUN_STDPAR) const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + REDUCE_SUM_DATA_SETUP; switch ( vid ) { @@ -35,6 +41,7 @@ void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune Real_type sum = m_sum_init; +#warning needs parallel reduce for (Index_type i = ibegin; i < iend; ++i ) { REDUCE_SUM_BODY; } @@ -47,7 +54,6 @@ void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune break; } -#if defined(RUN_RAJA_STDPAR) case Lambda_StdPar : { auto reduce_sum_base_lam = [=](Index_type i) { @@ -59,6 +65,7 @@ void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune Real_type sum = m_sum_init; +#warning needs parallel reduce for (Index_type i = ibegin; i < iend; ++i ) { sum += reduce_sum_base_lam(i); } @@ -71,6 +78,7 @@ void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune break; } +#ifdef RAJA_ENABLE_STDPAR case RAJA_StdPar : { startTimer(); @@ -98,6 +106,7 @@ void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune } +#endif } } // end namespace algorithm diff --git a/src/algorithm/SCAN-StdPar.cpp b/src/algorithm/SCAN-StdPar.cpp index d6adeaabf..f6095642d 100644 --- a/src/algorithm/SCAN-StdPar.cpp +++ b/src/algorithm/SCAN-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#include "common/StdParUtils.hpp" + #include namespace rajaperf @@ -20,6 +22,7 @@ namespace algorithm void SCAN::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { +#if defined(RUN_STDPAR) const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); @@ -33,6 +36,7 @@ void SCAN::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#warning needs parallel scan SCAN_PROLOGUE; for (Index_type i = ibegin; i < iend; ++i ) { SCAN_BODY; @@ -44,26 +48,7 @@ void SCAN::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) break; } -#if defined(RUN_RAJA_STDPAR) - case Lambda_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - SCAN_PROLOGUE; - auto scan_lam = [=, &scan_var](Index_type i) { - SCAN_BODY; - }; - for (Index_type i = ibegin; i < iend; ++i ) { - scan_lam(i); - } - - } - stopTimer(); - - break; - } - +#ifdef RAJA_ENABLE_STDPAR case RAJA_StdPar : { startTimer(); @@ -84,6 +69,7 @@ void SCAN::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) } +#endif } } // end namespace algorithm diff --git a/src/algorithm/SORT-StdPar.cpp b/src/algorithm/SORT-StdPar.cpp index ddf3be505..3fcee72d6 100644 --- a/src/algorithm/SORT-StdPar.cpp +++ b/src/algorithm/SORT-StdPar.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/COPYRIGHT file for details. // @@ -10,8 +10,7 @@ #include "RAJA/RAJA.hpp" -#include -#include +#include "common/StdParUtils.hpp" #include @@ -24,7 +23,6 @@ namespace algorithm void SORT::runStdParVariant(VariantID vid, size_t tune_idx) { #if defined(RUN_STDPAR) - const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp index d97ade603..daa603e7f 100644 --- a/src/algorithm/SORTPAIRS-StdPar.cpp +++ b/src/algorithm/SORTPAIRS-StdPar.cpp @@ -11,8 +11,7 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include + #include #include #include diff --git a/src/apps/CONVECTION3DPA-StdPar.cpp b/src/apps/CONVECTION3DPA-StdPar.cpp index 1bc58d4f1..119d99b0a 100644 --- a/src/apps/CONVECTION3DPA-StdPar.cpp +++ b/src/apps/CONVECTION3DPA-StdPar.cpp @@ -10,16 +10,23 @@ #include "RAJA/RAJA.hpp" +#include "common/StdParUtils.hpp" + #include namespace rajaperf { namespace apps { -void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { +void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ +#if defined(RUN_STDPAR) const Index_type run_reps = getRunReps(); CONVECTION3DPA_DATA_SETUP; + auto begin = counting_iterator(0); + auto end = counting_iterator(NE); + switch (vid) { case Base_StdPar: { @@ -27,7 +34,9 @@ void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - for (int e = 0; e < NE; ++e) { + std::for_each( std::execution::par_unseq, + begin, end, + [=](int e) { CONVECTION3DPA_0_CPU; @@ -118,7 +127,7 @@ void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( } } } - } // element loop + }); // element loop } stopTimer(); @@ -126,7 +135,7 @@ void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( break; } -#if defined(RUN_RAJA_SEQ) +#if defined(RUN_RAJA_STDPAR) case RAJA_StdPar: { using launch_policy = RAJA::expt::LaunchPolicy; @@ -310,6 +319,7 @@ void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( getCout() << "\n CONVECTION3DPA : Unknown StdPar variant id = " << vid << std::endl; } +#endif } } // end namespace apps diff --git a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp index 7679df246..469bd93bd 100644 --- a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp +++ b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include "AppsData.hpp" diff --git a/src/apps/DIFFUSION3DPA-StdPar.cpp b/src/apps/DIFFUSION3DPA-StdPar.cpp index 97d0ad239..608a4d665 100644 --- a/src/apps/DIFFUSION3DPA-StdPar.cpp +++ b/src/apps/DIFFUSION3DPA-StdPar.cpp @@ -14,18 +14,15 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include namespace rajaperf { namespace apps { -void DIFFUSION3DPA::runStdParVariant(VariantID vid, size_t tune_idx) { - +void DIFFUSION3DPA::runStdParVariant(VariantID vid, size_t tune_idx) +{ #if defined(RUN_STDPAR) - const Index_type run_reps = getRunReps(); DIFFUSION3DPA_DATA_SETUP; diff --git a/src/apps/ENERGY-StdPar.cpp b/src/apps/ENERGY-StdPar.cpp index d02b68d4a..f8c054fa4 100644 --- a/src/apps/ENERGY-StdPar.cpp +++ b/src/apps/ENERGY-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/apps/FIR-StdPar.cpp b/src/apps/FIR-StdPar.cpp index 07040148f..befd29fa4 100644 --- a/src/apps/FIR-StdPar.cpp +++ b/src/apps/FIR-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/apps/HALOEXCHANGE-StdPar.cpp b/src/apps/HALOEXCHANGE-StdPar.cpp index e06612b6b..6043185a2 100644 --- a/src/apps/HALOEXCHANGE-StdPar.cpp +++ b/src/apps/HALOEXCHANGE-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp index b320e7fe3..242759fa2 100644 --- a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp +++ b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/apps/LTIMES-StdPar.cpp b/src/apps/LTIMES-StdPar.cpp index 60e8aec63..ba8ae1332 100644 --- a/src/apps/LTIMES-StdPar.cpp +++ b/src/apps/LTIMES-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/apps/LTIMES_NOVIEW-StdPar.cpp b/src/apps/LTIMES_NOVIEW-StdPar.cpp index 7ada8a148..0ff1a9d40 100644 --- a/src/apps/LTIMES_NOVIEW-StdPar.cpp +++ b/src/apps/LTIMES_NOVIEW-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/apps/MASS3DPA-StdPar.cpp b/src/apps/MASS3DPA-StdPar.cpp index 58b08b9d1..11ce57188 100644 --- a/src/apps/MASS3DPA-StdPar.cpp +++ b/src/apps/MASS3DPA-StdPar.cpp @@ -11,8 +11,8 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include + + #include diff --git a/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp b/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp index 585e3a000..f1326230e 100644 --- a/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp +++ b/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#include "common/StdParUtils.hpp" + #include "AppsData.hpp" #include @@ -22,6 +24,7 @@ namespace apps void NODAL_ACCUMULATION_3D::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { +#if defined(RUN_STDPAR) const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = m_domain->n_real_zones; @@ -37,6 +40,7 @@ void NODAL_ACCUMULATION_3D::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUS startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#warning needs parallel for+atomic or reduce for (Index_type ii = ibegin ; ii < iend ; ++ii ) { NODAL_ACCUMULATION_3D_BODY_INDEX; NODAL_ACCUMULATION_3D_BODY; @@ -48,7 +52,6 @@ void NODAL_ACCUMULATION_3D::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUS break; } -#if defined(RUN_RAJA_STDPAR) case Lambda_StdPar : { auto nodal_accumulation_3d_lam = [=](Index_type ii) { @@ -59,6 +62,7 @@ void NODAL_ACCUMULATION_3D::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUS startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#warning needs parallel for+atomic or reduce for (Index_type ii = ibegin ; ii < iend ; ++ii ) { nodal_accumulation_3d_lam(ii); } @@ -69,6 +73,7 @@ void NODAL_ACCUMULATION_3D::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUS break; } +#if defined(RUN_RAJA_STDPAR) case RAJA_StdPar : { camp::resources::Resource working_res{camp::resources::Host()}; @@ -98,6 +103,7 @@ void NODAL_ACCUMULATION_3D::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUS } +#endif } } // end namespace apps diff --git a/src/apps/PRESSURE-StdPar.cpp b/src/apps/PRESSURE-StdPar.cpp index 787def3e6..301e30719 100644 --- a/src/apps/PRESSURE-StdPar.cpp +++ b/src/apps/PRESSURE-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/apps/VOL3D-StdPar.cpp b/src/apps/VOL3D-StdPar.cpp index fb369f192..93cd2a941 100644 --- a/src/apps/VOL3D-StdPar.cpp +++ b/src/apps/VOL3D-StdPar.cpp @@ -13,8 +13,6 @@ #include "AppsData.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/basic/IF_QUAD-StdPar.cpp b/src/basic/IF_QUAD-StdPar.cpp index 605a1258e..137fdd0e6 100644 --- a/src/basic/IF_QUAD-StdPar.cpp +++ b/src/basic/IF_QUAD-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/basic/INIT_VIEW1D-StdPar.cpp b/src/basic/INIT_VIEW1D-StdPar.cpp index 1ffbde1a0..30c190fdd 100644 --- a/src/basic/INIT_VIEW1D-StdPar.cpp +++ b/src/basic/INIT_VIEW1D-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp index ca7bf0130..c6ff05190 100644 --- a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/basic/MULADDSUB-StdPar.cpp b/src/basic/MULADDSUB-StdPar.cpp index f89c3b179..b76f667b7 100644 --- a/src/basic/MULADDSUB-StdPar.cpp +++ b/src/basic/MULADDSUB-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/basic/NESTED_INIT-StdPar.cpp b/src/basic/NESTED_INIT-StdPar.cpp index be46d2e06..1041a9953 100644 --- a/src/basic/NESTED_INIT-StdPar.cpp +++ b/src/basic/NESTED_INIT-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/basic/PI_ATOMIC-StdPar.cpp b/src/basic/PI_ATOMIC-StdPar.cpp index 6b2f80fdc..491a2cfd3 100644 --- a/src/basic/PI_ATOMIC-StdPar.cpp +++ b/src/basic/PI_ATOMIC-StdPar.cpp @@ -19,8 +19,6 @@ typedef std::atomic myAtomic; #endif #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/basic/TRAP_INT-StdPar.cpp b/src/basic/TRAP_INT-StdPar.cpp index ff626091b..94907744c 100644 --- a/src/basic/TRAP_INT-StdPar.cpp +++ b/src/basic/TRAP_INT-StdPar.cpp @@ -11,9 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include -#include #include diff --git a/src/lcals/DIFF_PREDICT-StdPar.cpp b/src/lcals/DIFF_PREDICT-StdPar.cpp index 5ae9aba30..19a843bfb 100644 --- a/src/lcals/DIFF_PREDICT-StdPar.cpp +++ b/src/lcals/DIFF_PREDICT-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/lcals/EOS-StdPar.cpp b/src/lcals/EOS-StdPar.cpp index 8a2aa56a5..1022d79a1 100644 --- a/src/lcals/EOS-StdPar.cpp +++ b/src/lcals/EOS-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/lcals/FIRST_DIFF-StdPar.cpp b/src/lcals/FIRST_DIFF-StdPar.cpp index e327870d9..5e274c841 100644 --- a/src/lcals/FIRST_DIFF-StdPar.cpp +++ b/src/lcals/FIRST_DIFF-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/lcals/FIRST_MIN-StdPar.cpp b/src/lcals/FIRST_MIN-StdPar.cpp index 4ea40ca09..3b797cbc6 100644 --- a/src/lcals/FIRST_MIN-StdPar.cpp +++ b/src/lcals/FIRST_MIN-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/lcals/FIRST_SUM-StdPar.cpp b/src/lcals/FIRST_SUM-StdPar.cpp index c60223fe0..b02253819 100644 --- a/src/lcals/FIRST_SUM-StdPar.cpp +++ b/src/lcals/FIRST_SUM-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/lcals/GEN_LIN_RECUR-StdPar.cpp b/src/lcals/GEN_LIN_RECUR-StdPar.cpp index d33cb4eea..e16eebddd 100644 --- a/src/lcals/GEN_LIN_RECUR-StdPar.cpp +++ b/src/lcals/GEN_LIN_RECUR-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/lcals/HYDRO_1D-StdPar.cpp b/src/lcals/HYDRO_1D-StdPar.cpp index 1099102e3..ce8d37a2c 100644 --- a/src/lcals/HYDRO_1D-StdPar.cpp +++ b/src/lcals/HYDRO_1D-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/lcals/HYDRO_2D-StdPar.cpp b/src/lcals/HYDRO_2D-StdPar.cpp index b6825ebb2..5f6d3dbc2 100644 --- a/src/lcals/HYDRO_2D-StdPar.cpp +++ b/src/lcals/HYDRO_2D-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/lcals/INT_PREDICT-StdPar.cpp b/src/lcals/INT_PREDICT-StdPar.cpp index ef2b06df1..a635061d2 100644 --- a/src/lcals/INT_PREDICT-StdPar.cpp +++ b/src/lcals/INT_PREDICT-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/lcals/PLANCKIAN-StdPar.cpp b/src/lcals/PLANCKIAN-StdPar.cpp index 7a41c6120..df63d89b1 100644 --- a/src/lcals/PLANCKIAN-StdPar.cpp +++ b/src/lcals/PLANCKIAN-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include #include diff --git a/src/lcals/TRIDIAG_ELIM-StdPar.cpp b/src/lcals/TRIDIAG_ELIM-StdPar.cpp index 28f4a1ae2..9ce2afd9f 100644 --- a/src/lcals/TRIDIAG_ELIM-StdPar.cpp +++ b/src/lcals/TRIDIAG_ELIM-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_2MM-StdPar.cpp b/src/polybench/POLYBENCH_2MM-StdPar.cpp index 64d24ff30..43683a68b 100644 --- a/src/polybench/POLYBENCH_2MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_2MM-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_3MM-StdPar.cpp b/src/polybench/POLYBENCH_3MM-StdPar.cpp index 1a6a1ec38..b7ac966a2 100644 --- a/src/polybench/POLYBENCH_3MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_3MM-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_ADI-StdPar.cpp b/src/polybench/POLYBENCH_ADI-StdPar.cpp index 7fbe1d871..18641aa5c 100644 --- a/src/polybench/POLYBENCH_ADI-StdPar.cpp +++ b/src/polybench/POLYBENCH_ADI-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_ATAX-StdPar.cpp b/src/polybench/POLYBENCH_ATAX-StdPar.cpp index 57e8d785a..b2be11771 100644 --- a/src/polybench/POLYBENCH_ATAX-StdPar.cpp +++ b/src/polybench/POLYBENCH_ATAX-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp index 01ebceccd..d2584e96c 100644 --- a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp index 0f76e256b..c6d015640 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_GEMM-StdPar.cpp b/src/polybench/POLYBENCH_GEMM-StdPar.cpp index 4eda80f76..b17c381c0 100644 --- a/src/polybench/POLYBENCH_GEMM-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMM-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp index 7c40bbd64..37361759d 100644 --- a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp index 720688eee..8201ecb24 100644 --- a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp +++ b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp index 8bb373b52..0b5690828 100644 --- a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp index a39699c97..76dca3264 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp index 440016ca7..11d8c208c 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_MVT-StdPar.cpp b/src/polybench/POLYBENCH_MVT-StdPar.cpp index 79ff364c6..2c3b30ffb 100644 --- a/src/polybench/POLYBENCH_MVT-StdPar.cpp +++ b/src/polybench/POLYBENCH_MVT-StdPar.cpp @@ -11,9 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include - namespace rajaperf { diff --git a/src/stream/ADD-StdPar.cpp b/src/stream/ADD-StdPar.cpp index 5f26092a1..2131dedbf 100644 --- a/src/stream/ADD-StdPar.cpp +++ b/src/stream/ADD-StdPar.cpp @@ -11,12 +11,10 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include -namespace rajaperf +namespace rajaperf { namespace stream { diff --git a/src/stream/COPY-StdPar.cpp b/src/stream/COPY-StdPar.cpp index 38e53dbf8..bda9af163 100644 --- a/src/stream/COPY-StdPar.cpp +++ b/src/stream/COPY-StdPar.cpp @@ -10,8 +10,7 @@ #include "RAJA/RAJA.hpp" -#include -#include +#include "common/StdParUtils.hpp" #include diff --git a/src/stream/DOT-StdPar.cpp b/src/stream/DOT-StdPar.cpp index 7d359ebb4..43d247f72 100644 --- a/src/stream/DOT-StdPar.cpp +++ b/src/stream/DOT-StdPar.cpp @@ -11,9 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include -#include #include diff --git a/src/stream/MUL-StdPar.cpp b/src/stream/MUL-StdPar.cpp index aca05ed54..082265af2 100644 --- a/src/stream/MUL-StdPar.cpp +++ b/src/stream/MUL-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/stream/TRIAD-StdPar.cpp b/src/stream/TRIAD-StdPar.cpp index 48fe2ee76..4b3db4a49 100644 --- a/src/stream/TRIAD-StdPar.cpp +++ b/src/stream/TRIAD-StdPar.cpp @@ -23,7 +23,6 @@ namespace stream void TRIAD::runStdParVariant(VariantID vid, size_t tune_idx) { #if defined(RUN_STDPAR) - const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); From 1cda0fd30cb497624287c98f853246695fd3f705 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 15:45:28 +0300 Subject: [PATCH 015/174] README --- README.stdpar | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 README.stdpar diff --git a/README.stdpar b/README.stdpar new file mode 100644 index 000000000..5cd8a478b --- /dev/null +++ b/README.stdpar @@ -0,0 +1,7 @@ +# GCC + +# NVC++ + +# Intel +cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-unused-parameter -Wno-deprecated-volatile -tbb" -DENABLE_STDPAR=1 && make -j8 + From a1470e1a6edaac667845d16da422a88b79774450 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 15:45:52 +0300 Subject: [PATCH 016/174] fix unroll pragma --- src/apps/MASS3DPA-StdPar.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/apps/MASS3DPA-StdPar.cpp b/src/apps/MASS3DPA-StdPar.cpp index 11ce57188..ad347f5d8 100644 --- a/src/apps/MASS3DPA-StdPar.cpp +++ b/src/apps/MASS3DPA-StdPar.cpp @@ -12,20 +12,11 @@ #include "common/StdParUtils.hpp" - - #include namespace rajaperf { namespace apps { -//#define USE_RAJA_UNROLL -#define RAJA_DIRECT_PRAGMA(X) _Pragma(#X) -#if defined(USE_RAJA_UNROLL) -#define RAJA_UNROLL(N) RAJA_DIRECT_PRAGMA(unroll(N)) -#else -#define RAJA_UNROLL(N) -#endif #define CPU_FOREACH(i, k, N) for (int i = 0; i < N; i++) void MASS3DPA::runStdParVariant(VariantID vid, size_t tune_idx) { From 263821a1aace217d685c610cdb3254dded742702 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 15:46:49 +0300 Subject: [PATCH 017/174] GCC --- README.stdpar | 1 + 1 file changed, 1 insertion(+) diff --git a/README.stdpar b/README.stdpar index 5cd8a478b..1cb862c9d 100644 --- a/README.stdpar +++ b/README.stdpar @@ -1,4 +1,5 @@ # GCC +cmake .. -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-volatile -Wno-unused-parameter" -DENABLE_STDPAR=1 && make -j8 # NVC++ From bf344cfc369a3fcc183404179f137fc35ba043b3 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 16:32:49 +0300 Subject: [PATCH 018/174] remove RAJA_StdPar --- src/algorithm/MEMCPY-StdPar.cpp | 40 +--- src/algorithm/MEMCPY.cpp | 1 - src/algorithm/MEMSET-StdPar.cpp | 40 +--- src/algorithm/MEMSET.cpp | 1 - src/algorithm/REDUCE_SUM-StdPar.cpp | 22 -- src/algorithm/SCAN-StdPar.cpp | 15 -- src/algorithm/SORT-StdPar.cpp | 15 -- src/algorithm/SORT.cpp | 1 - src/algorithm/SORTPAIRS-StdPar.cpp | 15 -- src/algorithm/SORTPAIRS.cpp | 1 - src/apps/CONVECTION3DPA-StdPar.cpp | 180 ---------------- src/apps/DEL_DOT_VEC_2D-StdPar.cpp | 24 --- src/apps/DEL_DOT_VEC_2D.cpp | 1 - src/apps/DIFFUSION3DPA-StdPar.cpp | 199 ------------------ src/apps/DIFFUSION3DPA.cpp | 1 - src/apps/ENERGY-StdPar.cpp | 35 --- src/apps/ENERGY.cpp | 1 - src/apps/FIR-StdPar.cpp | 16 -- src/apps/FIR.cpp | 1 - src/apps/HALOEXCHANGE-StdPar.cpp | 47 ----- src/apps/HALOEXCHANGE.cpp | 1 - src/apps/HALOEXCHANGE_FUSED-StdPar.cpp | 80 ------- src/apps/HALOEXCHANGE_FUSED.cpp | 1 - src/apps/LTIMES-StdPar.cpp | 39 ---- src/apps/LTIMES.cpp | 1 - src/apps/LTIMES_NOVIEW-StdPar.cpp | 33 --- src/apps/LTIMES_NOVIEW.cpp | 1 - src/apps/MASS3DPA-StdPar.cpp | 112 ---------- src/apps/MASS3DPA.cpp | 1 - src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp | 24 --- src/apps/NODAL_ACCUMULATION_3D.cpp | 1 - src/apps/PRESSURE-StdPar.cpp | 23 -- src/apps/PRESSURE.cpp | 1 - src/apps/VOL3D-StdPar.cpp | 16 -- src/apps/VOL3D.cpp | 1 - src/basic/DAXPY-StdPar.cpp | 16 -- src/basic/DAXPY.cpp | 1 - src/basic/DAXPY_ATOMIC-StdPar.cpp | 19 -- src/basic/IF_QUAD-StdPar.cpp | 16 -- src/basic/IF_QUAD.cpp | 1 - src/basic/INDEXLIST_3LOOP-StdPar.cpp | 39 ---- src/basic/INIT3-StdPar.cpp | 16 -- src/basic/INIT3.cpp | 1 - src/basic/INIT_VIEW1D-StdPar.cpp | 22 -- src/basic/INIT_VIEW1D.cpp | 1 - src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp | 22 -- src/basic/INIT_VIEW1D_OFFSET.cpp | 1 - src/basic/MAT_MAT_SHARED.cpp | 5 +- src/basic/MULADDSUB-StdPar.cpp | 16 -- src/basic/MULADDSUB.cpp | 1 - src/basic/NESTED_INIT-StdPar.cpp | 30 --- src/basic/NESTED_INIT.cpp | 1 - src/basic/PI_ATOMIC-StdPar.cpp | 21 -- src/basic/PI_ATOMIC.cpp | 1 - src/basic/PI_REDUCE-StdPar.cpp | 22 -- src/basic/PI_REDUCE.cpp | 1 - src/basic/REDUCE3_INT-StdPar.cpp | 26 --- src/basic/REDUCE3_INT.cpp | 1 - src/basic/REDUCE_STRUCT-StdPar.cpp | 33 --- src/basic/TRAP_INT-StdPar.cpp | 22 -- src/basic/TRAP_INT.cpp | 1 - src/common/KernelBase.cpp | 8 - src/common/RAJAPerfSuite.cpp | 11 - src/common/RAJAPerfSuite.hpp | 1 - src/lcals/DIFF_PREDICT-StdPar.cpp | 16 -- src/lcals/DIFF_PREDICT.cpp | 1 - src/lcals/EOS-StdPar.cpp | 16 -- src/lcals/EOS.cpp | 1 - src/lcals/FIRST_DIFF-StdPar.cpp | 16 -- src/lcals/FIRST_DIFF.cpp | 1 - src/lcals/FIRST_MIN-StdPar.cpp | 23 -- src/lcals/FIRST_MIN.cpp | 1 - src/lcals/FIRST_SUM-StdPar.cpp | 16 -- src/lcals/FIRST_SUM.cpp | 1 - src/lcals/GEN_LIN_RECUR-StdPar.cpp | 19 -- src/lcals/GEN_LIN_RECUR.cpp | 1 - src/lcals/HYDRO_1D-StdPar.cpp | 16 -- src/lcals/HYDRO_1D.cpp | 1 - src/lcals/HYDRO_2D-StdPar.cpp | 49 ----- src/lcals/HYDRO_2D.cpp | 1 - src/lcals/INT_PREDICT-StdPar.cpp | 16 -- src/lcals/INT_PREDICT.cpp | 1 - src/lcals/PLANCKIAN-StdPar.cpp | 16 -- src/lcals/PLANCKIAN.cpp | 1 - src/lcals/TRIDIAG_ELIM-StdPar.cpp | 16 -- src/lcals/TRIDIAG_ELIM.cpp | 1 - src/polybench/POLYBENCH_2MM-StdPar.cpp | 74 ------- src/polybench/POLYBENCH_2MM.cpp | 1 - src/polybench/POLYBENCH_3MM-StdPar.cpp | 98 --------- src/polybench/POLYBENCH_3MM.cpp | 1 - src/polybench/POLYBENCH_ADI-StdPar.cpp | 82 -------- src/polybench/POLYBENCH_ADI.cpp | 1 - src/polybench/POLYBENCH_ATAX-StdPar.cpp | 79 ------- src/polybench/POLYBENCH_ATAX.cpp | 1 - src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp | 71 ------- src/polybench/POLYBENCH_FDTD_2D.cpp | 1 - .../POLYBENCH_FLOYD_WARSHALL-StdPar.cpp | 37 ---- src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp | 1 - src/polybench/POLYBENCH_GEMM-StdPar.cpp | 58 ----- src/polybench/POLYBENCH_GEMM.cpp | 1 - src/polybench/POLYBENCH_GEMVER-StdPar.cpp | 103 --------- src/polybench/POLYBENCH_GEMVER.cpp | 1 - src/polybench/POLYBENCH_GESUMMV-StdPar.cpp | 49 ----- src/polybench/POLYBENCH_GESUMMV.cpp | 1 - src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp | 54 ----- src/polybench/POLYBENCH_HEAT_3D.cpp | 1 - src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp | 27 --- src/polybench/POLYBENCH_JACOBI_1D.cpp | 1 - src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp | 49 ----- src/polybench/POLYBENCH_JACOBI_2D.cpp | 1 - src/polybench/POLYBENCH_MVT-StdPar.cpp | 71 ------- src/polybench/POLYBENCH_MVT.cpp | 1 - src/stream/ADD-StdPar.cpp | 16 -- src/stream/ADD.cpp | 1 - src/stream/COPY-StdPar.cpp | 16 -- src/stream/COPY.cpp | 1 - src/stream/DOT-StdPar.cpp | 22 -- src/stream/DOT.cpp | 1 - src/stream/MUL-StdPar.cpp | 16 -- src/stream/MUL.cpp | 1 - src/stream/TRIAD-StdPar.cpp | 16 -- src/stream/TRIAD.cpp | 1 - 122 files changed, 6 insertions(+), 2527 deletions(-) diff --git a/src/algorithm/MEMCPY-StdPar.cpp b/src/algorithm/MEMCPY-StdPar.cpp index 71a6c2eac..1d7d74709 100644 --- a/src/algorithm/MEMCPY-StdPar.cpp +++ b/src/algorithm/MEMCPY-StdPar.cpp @@ -23,7 +23,6 @@ namespace algorithm void MEMCPY::runStdParVariantLibrary(VariantID vid) { #if defined(RUN_STDPAR) - const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); @@ -46,23 +45,6 @@ void MEMCPY::runStdParVariantLibrary(VariantID vid) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - camp::resources::Host res = camp::resources::Host::get_default(); - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - res.memcpy(MEMCPY_STD_ARGS); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n MEMCPY : Unknown variant id = " << vid << std::endl; } @@ -126,24 +108,6 @@ void MEMCPY::runStdParVariantDefault(VariantID vid) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( RAJA::RangeSegment(ibegin, iend), - [=](Index_type i) { - MEMCPY_BODY; - }); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n MEMCPY : Unknown variant id = " << vid << std::endl; } @@ -157,7 +121,7 @@ void MEMCPY::runStdParVariant(VariantID vid, size_t tune_idx) { size_t t = 0; - if (vid == Base_StdPar || vid == RAJA_StdPar) { + if (vid == Base_StdPar) { if (tune_idx == t) { @@ -180,7 +144,7 @@ void MEMCPY::runStdParVariant(VariantID vid, size_t tune_idx) void MEMCPY::setStdParTuningDefinitions(VariantID vid) { - if (vid == Base_StdPar || vid == RAJA_StdPar) { + if (vid == Base_StdPar) { addVariantTuningName(vid, "library"); } diff --git a/src/algorithm/MEMCPY.cpp b/src/algorithm/MEMCPY.cpp index 80c7f4f62..08bf79731 100644 --- a/src/algorithm/MEMCPY.cpp +++ b/src/algorithm/MEMCPY.cpp @@ -54,7 +54,6 @@ MEMCPY::MEMCPY(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } MEMCPY::~MEMCPY() diff --git a/src/algorithm/MEMSET-StdPar.cpp b/src/algorithm/MEMSET-StdPar.cpp index 8ffba6f3f..835b27b74 100644 --- a/src/algorithm/MEMSET-StdPar.cpp +++ b/src/algorithm/MEMSET-StdPar.cpp @@ -23,7 +23,6 @@ namespace algorithm void MEMSET::runStdParVariantLibrary(VariantID vid) { #if defined(RUN_STDPAR) - const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); @@ -46,23 +45,6 @@ void MEMSET::runStdParVariantLibrary(VariantID vid) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - camp::resources::Host res = camp::resources::Host::get_default(); - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - res.memset(MEMSET_STD_ARGS); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n MEMSET : Unknown variant id = " << vid << std::endl; } @@ -126,24 +108,6 @@ void MEMSET::runStdParVariantDefault(VariantID vid) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( RAJA::RangeSegment(ibegin, iend), - [=](Index_type i) { - MEMSET_BODY; - }); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n MEMSET : Unknown variant id = " << vid << std::endl; } @@ -157,7 +121,7 @@ void MEMSET::runStdParVariant(VariantID vid, size_t tune_idx) { size_t t = 0; - if (vid == Base_StdPar || vid == RAJA_StdPar) { + if (vid == Base_StdPar) { if (tune_idx == t) { @@ -180,7 +144,7 @@ void MEMSET::runStdParVariant(VariantID vid, size_t tune_idx) void MEMSET::setStdParTuningDefinitions(VariantID vid) { - if (vid == Base_StdPar || vid == RAJA_StdPar) { + if (vid == Base_StdPar) { addVariantTuningName(vid, "library"); } diff --git a/src/algorithm/MEMSET.cpp b/src/algorithm/MEMSET.cpp index 3cf345bd6..22091b49e 100644 --- a/src/algorithm/MEMSET.cpp +++ b/src/algorithm/MEMSET.cpp @@ -55,7 +55,6 @@ MEMSET::MEMSET(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } MEMSET::~MEMSET() diff --git a/src/algorithm/REDUCE_SUM-StdPar.cpp b/src/algorithm/REDUCE_SUM-StdPar.cpp index b2e010833..08cbd206b 100644 --- a/src/algorithm/REDUCE_SUM-StdPar.cpp +++ b/src/algorithm/REDUCE_SUM-StdPar.cpp @@ -78,28 +78,6 @@ void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::ReduceSum sum(m_sum_init); - - RAJA::forall( RAJA::RangeSegment(ibegin, iend), - [=](Index_type i) { - REDUCE_SUM_BODY; - }); - - m_sum = sum.get(); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n REDUCE_SUM : Unknown variant id = " << vid << std::endl; } diff --git a/src/algorithm/SCAN-StdPar.cpp b/src/algorithm/SCAN-StdPar.cpp index f6095642d..5e6638e4b 100644 --- a/src/algorithm/SCAN-StdPar.cpp +++ b/src/algorithm/SCAN-StdPar.cpp @@ -48,21 +48,6 @@ void SCAN::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::exclusive_scan(RAJA_SCAN_ARGS); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n SCAN : Unknown variant id = " << vid << std::endl; } diff --git a/src/algorithm/SORT-StdPar.cpp b/src/algorithm/SORT-StdPar.cpp index 3fcee72d6..5a6fd384c 100644 --- a/src/algorithm/SORT-StdPar.cpp +++ b/src/algorithm/SORT-StdPar.cpp @@ -45,21 +45,6 @@ void SORT::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::sort(RAJA_SORT_ARGS); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n SORT : Unknown variant id = " << vid << std::endl; } diff --git a/src/algorithm/SORT.cpp b/src/algorithm/SORT.cpp index 15192f500..1e5fb07b4 100644 --- a/src/algorithm/SORT.cpp +++ b/src/algorithm/SORT.cpp @@ -43,7 +43,6 @@ SORT::SORT(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Base_StdPar ); - setVariantDefined( RAJA_StdPar ); } SORT::~SORT() diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp index daa603e7f..dcb0f3a5c 100644 --- a/src/algorithm/SORTPAIRS-StdPar.cpp +++ b/src/algorithm/SORTPAIRS-StdPar.cpp @@ -72,21 +72,6 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::sort_pairs(RAJA_SORTPAIRS_ARGS); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n SORTPAIRS : Unknown variant id = " << vid << std::endl; } diff --git a/src/algorithm/SORTPAIRS.cpp b/src/algorithm/SORTPAIRS.cpp index 882527eb1..db9a0f8af 100644 --- a/src/algorithm/SORTPAIRS.cpp +++ b/src/algorithm/SORTPAIRS.cpp @@ -43,7 +43,6 @@ SORTPAIRS::SORTPAIRS(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Base_StdPar ); - setVariantDefined( RAJA_StdPar ); } SORTPAIRS::~SORTPAIRS() diff --git a/src/apps/CONVECTION3DPA-StdPar.cpp b/src/apps/CONVECTION3DPA-StdPar.cpp index 119d99b0a..b8c36646e 100644 --- a/src/apps/CONVECTION3DPA-StdPar.cpp +++ b/src/apps/CONVECTION3DPA-StdPar.cpp @@ -135,186 +135,6 @@ void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar: { - - using launch_policy = RAJA::expt::LaunchPolicy; - - using outer_x = RAJA::expt::LoopPolicy; - - using inner_x = RAJA::expt::LoopPolicy; - - using inner_y = RAJA::expt::LoopPolicy; - - using inner_z = RAJA::expt::LoopPolicy; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - // Grid is empty as the host does not need a compute grid to be specified - RAJA::expt::launch( - RAJA::expt::Grid(), - [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, NE), - [&](int e) { - - CONVECTION3DPA_0_CPU; - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dx) { - - CONVECTION3DPA_1; - - } // lambda (dx) - ); // RAJA::expt::loop - } // lambda (dy) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qx) { - - CONVECTION3DPA_2; - - } // lambda (dx) - ); // RAJA::expt::loop - } // lambda (dy) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qy) { - - CONVECTION3DPA_3; - - } // lambda (dy) - ); // RAJA::expt::loop - } // lambda (dx) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qz) { - - CONVECTION3DPA_4; - - } // lambda (qz) - ); // RAJA::expt::loop - } // lambda (qy) - ); //RAJA::expt::loop - } // lambda (qx) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qx) { - - CONVECTION3DPA_5; - - } // lambda (qx) - ); // RAJA::expt::loop - } // lambda (qy) - ); //RAJA::expt::loop - } // lambda (qz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dz) { - - CONVECTION3DPA_6; - - } // lambda (dz) - ); // RAJA::expt::loop - } // lambda (qy) - ); //RAJA::expt::loop - } // lambda (qx) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dy) { - - CONVECTION3DPA_7; - - } // lambda (dy) - ); // RAJA::expt::loop - } // lambda (qx) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dx) { - - CONVECTION3DPA_8; - - } // lambda (dx) - ); // RAJA::expt::loop - } // lambda (dy) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - } // lambda (e) - ); // RAJA::expt::loop - - } // outer lambda (ctx) - ); // RAJA::expt::launch - } // loop over kernel reps - stopTimer(); - - return; - } -#endif // RUN_RAJA_SEQ - default: getCout() << "\n CONVECTION3DPA : Unknown StdPar variant id = " << vid << std::endl; diff --git a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp index 469bd93bd..7b9216949 100644 --- a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp +++ b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp @@ -84,30 +84,6 @@ void DEL_DOT_VEC_2D::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - camp::resources::Resource working_res{camp::resources::Host()}; - RAJA::TypedListSegment zones(m_domain->real_zones, - m_domain->n_real_zones, - working_res); - - auto deldotvec2d_lam = [=](Index_type i) { - DEL_DOT_VEC_2D_BODY; - }; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall(zones, deldotvec2d_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n DEL_DOT_VEC_2D : Unknown variant id = " << vid << std::endl; } diff --git a/src/apps/DEL_DOT_VEC_2D.cpp b/src/apps/DEL_DOT_VEC_2D.cpp index 7e3bf7579..821741f6d 100644 --- a/src/apps/DEL_DOT_VEC_2D.cpp +++ b/src/apps/DEL_DOT_VEC_2D.cpp @@ -65,7 +65,6 @@ DEL_DOT_VEC_2D::DEL_DOT_VEC_2D(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } DEL_DOT_VEC_2D::~DEL_DOT_VEC_2D() diff --git a/src/apps/DIFFUSION3DPA-StdPar.cpp b/src/apps/DIFFUSION3DPA-StdPar.cpp index 608a4d665..ae21b8df0 100644 --- a/src/apps/DIFFUSION3DPA-StdPar.cpp +++ b/src/apps/DIFFUSION3DPA-StdPar.cpp @@ -118,205 +118,6 @@ void DIFFUSION3DPA::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_Seq: { - - // Currently Teams requires two policies if compiled with a device - using launch_policy = RAJA::expt::LaunchPolicy; - - using outer_x = RAJA::expt::LoopPolicy; - - using inner_x = RAJA::expt::LoopPolicy; - - using inner_y = RAJA::expt::LoopPolicy; - - using inner_z = RAJA::expt::LoopPolicy; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - // Grid is empty as the host does not need a compute grid to be specified - RAJA::expt::launch( - RAJA::expt::Grid(), - [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, NE), - [&](int e) { - - DIFFUSION3DPA_0_CPU; - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dx) { - - DIFFUSION3DPA_1; - - } // lambda (dx) - ); // RAJA::expt::loop - } // lambda (dy) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, 1), - [&](int RAJA_UNUSED_ARG(dz)) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int qx) { - - DIFFUSION3DPA_2; - - } // lambda (qx) - ); // RAJA::expt::loop - } // lambda (dy) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int qx) { - - DIFFUSION3DPA_3; - - } // lambda (qx) - ); // RAJA::expt::loop - } // lambda (dy) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int qx) { - - DIFFUSION3DPA_4; - - } // lambda (qx) - ); // RAJA::expt::loop - } // lambda (qy) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int qz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int qx) { - - DIFFUSION3DPA_5; - - } // lambda (qx) - ); // RAJA::expt::loop - } // lambda (qy) - ); //RAJA::expt::loop - } // lambda (qz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, 1), - [&](int RAJA_UNUSED_ARG(dz)) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int d) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int q) { - - DIFFUSION3DPA_6; - - } // lambda (q) - ); // RAJA::expt::loop - } // lambda (d) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int qz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dx) { - - DIFFUSION3DPA_7; - - } // lambda (dx) - ); // RAJA::expt::loop - } // lambda (qy) - ); //RAJA::expt::loop - } // lambda (qz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int qz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dx) { - - DIFFUSION3DPA_8; - - } // lambda (dx) - ); // RAJA::expt::loop - } // lambda (dy) - ); //RAJA::expt::loop - } // lambda (qz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dx) { - - DIFFUSION3DPA_9; - - } // lambda (dx) - ); // RAJA::expt::loop - } // lambda (dy) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - } // lambda (e) - ); // RAJA::expt::loop - - } // outer lambda (ctx) - ); // RAJA::expt::launch - } // loop over kernel reps - stopTimer(); - - return; - } -#endif // RUN_RAJA_STDPAR - default: getCout() << "\n DIFFUSION3DPA : Unknown StdPar variant id = " << vid << std::endl; diff --git a/src/apps/DIFFUSION3DPA.cpp b/src/apps/DIFFUSION3DPA.cpp index 69ee1aa3a..4ab3fa5bb 100644 --- a/src/apps/DIFFUSION3DPA.cpp +++ b/src/apps/DIFFUSION3DPA.cpp @@ -66,7 +66,6 @@ DIFFUSION3DPA::DIFFUSION3DPA(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Base_StdPar ); - setVariantDefined( RAJA_StdPar ); } DIFFUSION3DPA::~DIFFUSION3DPA() diff --git a/src/apps/ENERGY-StdPar.cpp b/src/apps/ENERGY-StdPar.cpp index f8c054fa4..7c353618e 100644 --- a/src/apps/ENERGY-StdPar.cpp +++ b/src/apps/ENERGY-StdPar.cpp @@ -148,41 +148,6 @@ void ENERGY::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::region( [=]() { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), energy_lam1); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), energy_lam2); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), energy_lam3); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), energy_lam4); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), energy_lam5); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), energy_lam6); - - }); // end sequential region (for single-source code) - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n ENERGY : Unknown variant id = " << vid << std::endl; } diff --git a/src/apps/ENERGY.cpp b/src/apps/ENERGY.cpp index 66f796db1..e775aca5c 100644 --- a/src/apps/ENERGY.cpp +++ b/src/apps/ENERGY.cpp @@ -65,7 +65,6 @@ ENERGY::ENERGY(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } ENERGY::~ENERGY() diff --git a/src/apps/FIR-StdPar.cpp b/src/apps/FIR-StdPar.cpp index befd29fa4..4a7cc4235 100644 --- a/src/apps/FIR-StdPar.cpp +++ b/src/apps/FIR-StdPar.cpp @@ -78,22 +78,6 @@ void FIR::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), fir_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n FIR : Unknown variant id = " << vid << std::endl; } diff --git a/src/apps/FIR.cpp b/src/apps/FIR.cpp index 90871a160..25241fac6 100644 --- a/src/apps/FIR.cpp +++ b/src/apps/FIR.cpp @@ -59,7 +59,6 @@ FIR::FIR(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } FIR::~FIR() diff --git a/src/apps/HALOEXCHANGE-StdPar.cpp b/src/apps/HALOEXCHANGE-StdPar.cpp index 6043185a2..b8564868e 100644 --- a/src/apps/HALOEXCHANGE-StdPar.cpp +++ b/src/apps/HALOEXCHANGE-StdPar.cpp @@ -121,53 +121,6 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - using EXEC_POL = RAJA::loop_exec; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - for (Index_type l = 0; l < num_neighbors; ++l) { - Real_ptr buffer = buffers[l]; - Int_ptr list = pack_index_lists[l]; - Index_type len = pack_index_list_lengths[l]; - for (Index_type v = 0; v < num_vars; ++v) { - Real_ptr var = vars[v]; - auto haloexchange_pack_base_lam = [=](Index_type i) { - HALOEXCHANGE_PACK_BODY; - }; - RAJA::forall( - RAJA::TypedRangeSegment(0, len), - haloexchange_pack_base_lam ); - buffer += len; - } - } - - for (Index_type l = 0; l < num_neighbors; ++l) { - Real_ptr buffer = buffers[l]; - Int_ptr list = unpack_index_lists[l]; - Index_type len = unpack_index_list_lengths[l]; - for (Index_type v = 0; v < num_vars; ++v) { - Real_ptr var = vars[v]; - auto haloexchange_unpack_base_lam = [=](Index_type i) { - HALOEXCHANGE_UNPACK_BODY; - }; - RAJA::forall( - RAJA::TypedRangeSegment(0, len), - haloexchange_unpack_base_lam ); - buffer += len; - } - } - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n HALOEXCHANGE : Unknown variant id = " << vid << std::endl; } diff --git a/src/apps/HALOEXCHANGE.cpp b/src/apps/HALOEXCHANGE.cpp index 35c9839b1..4616d633c 100644 --- a/src/apps/HALOEXCHANGE.cpp +++ b/src/apps/HALOEXCHANGE.cpp @@ -101,7 +101,6 @@ HALOEXCHANGE::HALOEXCHANGE(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } HALOEXCHANGE::~HALOEXCHANGE() diff --git a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp index 242759fa2..c89f014e7 100644 --- a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp +++ b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp @@ -171,86 +171,6 @@ void HALOEXCHANGE_FUSED::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - using AllocatorHolder = RAJAPoolAllocatorHolder< - RAJA::basic_mempool::MemPool>; - using Allocator = AllocatorHolder::Allocator; - - AllocatorHolder allocatorHolder; - - using workgroup_policy = RAJA::WorkGroupPolicy < - RAJA::loop_work, - RAJA::ordered, - RAJA::constant_stride_array_of_objects >; - - using workpool = RAJA::WorkPool< workgroup_policy, - Index_type, - RAJA::xargs<>, - Allocator >; - - using workgroup = RAJA::WorkGroup< workgroup_policy, - Index_type, - RAJA::xargs<>, - Allocator >; - - using worksite = RAJA::WorkSite< workgroup_policy, - Index_type, - RAJA::xargs<>, - Allocator >; - - workpool pool_pack (allocatorHolder.template getAllocator()); - workpool pool_unpack(allocatorHolder.template getAllocator()); - pool_pack.reserve(num_neighbors * num_vars, 1024ull*1024ull); - pool_unpack.reserve(num_neighbors * num_vars, 1024ull*1024ull); - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - for (Index_type l = 0; l < num_neighbors; ++l) { - Real_ptr buffer = buffers[l]; - Int_ptr list = pack_index_lists[l]; - Index_type len = pack_index_list_lengths[l]; - for (Index_type v = 0; v < num_vars; ++v) { - Real_ptr var = vars[v]; - auto haloexchange_fused_pack_base_lam = [=](Index_type i) { - HALOEXCHANGE_FUSED_PACK_BODY; - }; - pool_pack.enqueue( - RAJA::TypedRangeSegment(0, len), - haloexchange_fused_pack_base_lam ); - buffer += len; - } - } - workgroup group_pack = pool_pack.instantiate(); - worksite site_pack = group_pack.run(); - - for (Index_type l = 0; l < num_neighbors; ++l) { - Real_ptr buffer = buffers[l]; - Int_ptr list = unpack_index_lists[l]; - Index_type len = unpack_index_list_lengths[l]; - for (Index_type v = 0; v < num_vars; ++v) { - Real_ptr var = vars[v]; - auto haloexchange_fused_unpack_base_lam = [=](Index_type i) { - HALOEXCHANGE_FUSED_UNPACK_BODY; - }; - pool_unpack.enqueue( - RAJA::TypedRangeSegment(0, len), - haloexchange_fused_unpack_base_lam ); - buffer += len; - } - } - workgroup group_unpack = pool_unpack.instantiate(); - worksite site_unpack = group_unpack.run(); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n HALOEXCHANGE_FUSED : Unknown variant id = " << vid << std::endl; } diff --git a/src/apps/HALOEXCHANGE_FUSED.cpp b/src/apps/HALOEXCHANGE_FUSED.cpp index 272d66de4..9bcefe0a9 100644 --- a/src/apps/HALOEXCHANGE_FUSED.cpp +++ b/src/apps/HALOEXCHANGE_FUSED.cpp @@ -101,7 +101,6 @@ HALOEXCHANGE_FUSED::HALOEXCHANGE_FUSED(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } HALOEXCHANGE_FUSED::~HALOEXCHANGE_FUSED() diff --git a/src/apps/LTIMES-StdPar.cpp b/src/apps/LTIMES-StdPar.cpp index ba8ae1332..aab2c55bc 100644 --- a/src/apps/LTIMES-StdPar.cpp +++ b/src/apps/LTIMES-StdPar.cpp @@ -84,45 +84,6 @@ void LTIMES::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - LTIMES_VIEWS_RANGES_RAJA; - - auto ltimes_lam = [=](ID d, IZ z, IG g, IM m) { - LTIMES_BODY_RAJA; - }; - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<1, RAJA::loop_exec, // z - RAJA::statement::For<2, RAJA::loop_exec, // g - RAJA::statement::For<3, RAJA::loop_exec, // m - RAJA::statement::For<0, RAJA::loop_exec, // d - RAJA::statement::Lambda<0> - > - > - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel( RAJA::make_tuple(IDRange(0, num_d), - IZRange(0, num_z), - IGRange(0, num_g), - IMRange(0, num_m)), - ltimes_lam - ); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n LTIMES : Unknown variant id = " << vid << std::endl; } diff --git a/src/apps/LTIMES.cpp b/src/apps/LTIMES.cpp index 9d170071b..80145a78a 100644 --- a/src/apps/LTIMES.cpp +++ b/src/apps/LTIMES.cpp @@ -80,7 +80,6 @@ LTIMES::LTIMES(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } LTIMES::~LTIMES() diff --git a/src/apps/LTIMES_NOVIEW-StdPar.cpp b/src/apps/LTIMES_NOVIEW-StdPar.cpp index 0ff1a9d40..d35e03a3d 100644 --- a/src/apps/LTIMES_NOVIEW-StdPar.cpp +++ b/src/apps/LTIMES_NOVIEW-StdPar.cpp @@ -84,39 +84,6 @@ void LTIMES_NOVIEW::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<1, RAJA::loop_exec, // z - RAJA::statement::For<2, RAJA::loop_exec, // g - RAJA::statement::For<3, RAJA::loop_exec, // m - RAJA::statement::For<0, RAJA::loop_exec, // d - RAJA::statement::Lambda<0> - > - > - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment(0, num_d), - RAJA::RangeSegment(0, num_z), - RAJA::RangeSegment(0, num_g), - RAJA::RangeSegment(0, num_m)), - ltimesnoview_lam - ); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n LTIMES_NOVIEW : Unknown variant id = " << vid << std::endl; } diff --git a/src/apps/LTIMES_NOVIEW.cpp b/src/apps/LTIMES_NOVIEW.cpp index a4f53d360..baaeacda5 100644 --- a/src/apps/LTIMES_NOVIEW.cpp +++ b/src/apps/LTIMES_NOVIEW.cpp @@ -79,7 +79,6 @@ LTIMES_NOVIEW::LTIMES_NOVIEW(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } LTIMES_NOVIEW::~LTIMES_NOVIEW() diff --git a/src/apps/MASS3DPA-StdPar.cpp b/src/apps/MASS3DPA-StdPar.cpp index ad347f5d8..2fdbdcdcf 100644 --- a/src/apps/MASS3DPA-StdPar.cpp +++ b/src/apps/MASS3DPA-StdPar.cpp @@ -100,118 +100,6 @@ void MASS3DPA::runStdParVariant(VariantID vid, size_t tune_idx) { break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar: { - - //Currently Teams requires two policies if compiled with a device - using launch_policy = RAJA::expt::LaunchPolicy; - - using outer_x = RAJA::expt::LoopPolicy; - - using inner_x = RAJA::expt::LoopPolicy; - - using inner_y = RAJA::expt::LoopPolicy; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::expt::launch( - RAJA::expt::HOST, RAJA::expt::Resources(), - [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, NE), [&](int e) { - - MASS3DPA_0_CPU - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { - MASS3DPA_1 - }); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int dx) { - MASS3DPA_2 - }); - }); - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { - MASS3DPA_3 - }); - }); - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { - MASS3DPA_4 - }); - }); - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { - MASS3DPA_5 - }); - }); - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int d) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int q) { - MASS3DPA_6 - }); - }); - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { - MASS3DPA_7 - }); - }); - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { - MASS3DPA_8 - }); - }); - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { - MASS3DPA_9 - }); - }); - }); - }); - } - stopTimer(); - - return; - } -#endif // RUN_RAJA_STDPAR - default: getCout() << "\n MASS3DPA : Unknown StdPar variant id = " << vid << std::endl; } diff --git a/src/apps/MASS3DPA.cpp b/src/apps/MASS3DPA.cpp index c951336ad..29c2a9ca7 100644 --- a/src/apps/MASS3DPA.cpp +++ b/src/apps/MASS3DPA.cpp @@ -62,7 +62,6 @@ MASS3DPA::MASS3DPA(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Base_StdPar ); - setVariantDefined( RAJA_StdPar ); } MASS3DPA::~MASS3DPA() diff --git a/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp b/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp index f1326230e..1be53f986 100644 --- a/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp +++ b/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp @@ -73,30 +73,6 @@ void NODAL_ACCUMULATION_3D::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUS break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - camp::resources::Resource working_res{camp::resources::Host()}; - RAJA::TypedListSegment zones(m_domain->real_zones, - m_domain->n_real_zones, - working_res); - - auto nodal_accumulation_3d_lam = [=](Index_type i) { - NODAL_ACCUMULATION_3D_RAJA_ATOMIC_BODY(RAJA::seq_atomic); - }; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall(zones, nodal_accumulation_3d_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n NODAL_ACCUMULATION_3D : Unknown variant id = " << vid << std::endl; } diff --git a/src/apps/NODAL_ACCUMULATION_3D.cpp b/src/apps/NODAL_ACCUMULATION_3D.cpp index ef652b4a4..c3be0a82f 100644 --- a/src/apps/NODAL_ACCUMULATION_3D.cpp +++ b/src/apps/NODAL_ACCUMULATION_3D.cpp @@ -70,7 +70,6 @@ NODAL_ACCUMULATION_3D::NODAL_ACCUMULATION_3D(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } NODAL_ACCUMULATION_3D::~NODAL_ACCUMULATION_3D() diff --git a/src/apps/PRESSURE-StdPar.cpp b/src/apps/PRESSURE-StdPar.cpp index 301e30719..17fc0eedf 100644 --- a/src/apps/PRESSURE-StdPar.cpp +++ b/src/apps/PRESSURE-StdPar.cpp @@ -88,29 +88,6 @@ void PRESSURE::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::region( [=]() { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), pressure_lam1); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), pressure_lam2); - - }); // end sequential region (for single-source code) - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n PRESSURE : Unknown variant id = " << vid << std::endl; } diff --git a/src/apps/PRESSURE.cpp b/src/apps/PRESSURE.cpp index 29fc72adc..70cdab10b 100644 --- a/src/apps/PRESSURE.cpp +++ b/src/apps/PRESSURE.cpp @@ -55,7 +55,6 @@ PRESSURE::PRESSURE(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } PRESSURE::~PRESSURE() diff --git a/src/apps/VOL3D-StdPar.cpp b/src/apps/VOL3D-StdPar.cpp index 93cd2a941..324d2bbcc 100644 --- a/src/apps/VOL3D-StdPar.cpp +++ b/src/apps/VOL3D-StdPar.cpp @@ -79,22 +79,6 @@ void VOL3D::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), vol3d_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n VOL3D : Unknown variant id = " << vid << std::endl; } diff --git a/src/apps/VOL3D.cpp b/src/apps/VOL3D.cpp index b05511f99..988b54bc4 100644 --- a/src/apps/VOL3D.cpp +++ b/src/apps/VOL3D.cpp @@ -67,7 +67,6 @@ VOL3D::VOL3D(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } VOL3D::~VOL3D() diff --git a/src/basic/DAXPY-StdPar.cpp b/src/basic/DAXPY-StdPar.cpp index 3615eeee8..66d09cd9e 100644 --- a/src/basic/DAXPY-StdPar.cpp +++ b/src/basic/DAXPY-StdPar.cpp @@ -72,22 +72,6 @@ void DAXPY::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), daxpy_lam); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n DAXPY : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/DAXPY.cpp b/src/basic/DAXPY.cpp index a0cd60977..93f2cf9d4 100644 --- a/src/basic/DAXPY.cpp +++ b/src/basic/DAXPY.cpp @@ -54,7 +54,6 @@ DAXPY::DAXPY(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/DAXPY_ATOMIC-StdPar.cpp b/src/basic/DAXPY_ATOMIC-StdPar.cpp index a0e887e7c..911e8de6e 100644 --- a/src/basic/DAXPY_ATOMIC-StdPar.cpp +++ b/src/basic/DAXPY_ATOMIC-StdPar.cpp @@ -73,25 +73,6 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), - [=](Index_type i) { - DAXPY_ATOMIC_RAJA_BODY(RAJA::seq_atomic); - }); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n DAXPY_ATOMIC : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/IF_QUAD-StdPar.cpp b/src/basic/IF_QUAD-StdPar.cpp index 137fdd0e6..dac40ccab 100644 --- a/src/basic/IF_QUAD-StdPar.cpp +++ b/src/basic/IF_QUAD-StdPar.cpp @@ -73,22 +73,6 @@ void IF_QUAD::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), ifquad_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n IF_QUAD : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/IF_QUAD.cpp b/src/basic/IF_QUAD.cpp index 799c02865..55b182a2b 100644 --- a/src/basic/IF_QUAD.cpp +++ b/src/basic/IF_QUAD.cpp @@ -58,7 +58,6 @@ IF_QUAD::IF_QUAD(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/INDEXLIST_3LOOP-StdPar.cpp b/src/basic/INDEXLIST_3LOOP-StdPar.cpp index 39657f1ce..f07bdd583 100644 --- a/src/basic/INDEXLIST_3LOOP-StdPar.cpp +++ b/src/basic/INDEXLIST_3LOOP-StdPar.cpp @@ -121,45 +121,6 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - INDEXLIST_3LOOP_DATA_SETUP_StdPar; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::ReduceSum len(0); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), - [=](Index_type i) { - counts[i] = (INDEXLIST_3LOOP_CONDITIONAL) ? 1 : 0; - }); - - RAJA::exclusive_scan_inplace( - RAJA::make_span(counts+ibegin, iend+1-ibegin)); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), - [=](Index_type i) { - if (counts[i] != counts[i+1]) { - list[counts[i]] = i; - len += 1; - } - }); - - m_len = len.get(); - - } - stopTimer(); - - INDEXLIST_3LOOP_DATA_TEARDOWN_StdPar; - - break; - } -#endif - default : { getCout() << "\n INDEXLIST_3LOOP : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/INIT3-StdPar.cpp b/src/basic/INIT3-StdPar.cpp index d176c3b42..a01964a85 100644 --- a/src/basic/INIT3-StdPar.cpp +++ b/src/basic/INIT3-StdPar.cpp @@ -72,22 +72,6 @@ void INIT3::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), init3_lam); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n INIT3 : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/INIT3.cpp b/src/basic/INIT3.cpp index 990278e36..6f750553f 100644 --- a/src/basic/INIT3.cpp +++ b/src/basic/INIT3.cpp @@ -54,7 +54,6 @@ INIT3::INIT3(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/INIT_VIEW1D-StdPar.cpp b/src/basic/INIT_VIEW1D-StdPar.cpp index 30c190fdd..13cc0fdf5 100644 --- a/src/basic/INIT_VIEW1D-StdPar.cpp +++ b/src/basic/INIT_VIEW1D-StdPar.cpp @@ -73,28 +73,6 @@ void INIT_VIEW1D::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - INIT_VIEW1D_VIEW_RAJA; - - auto initview1d_lam = [=](Index_type i) { - INIT_VIEW1D_BODY_RAJA; - }; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), initview1d_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n INIT_VIEW1D : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/INIT_VIEW1D.cpp b/src/basic/INIT_VIEW1D.cpp index ea68d0951..bb7195b16 100644 --- a/src/basic/INIT_VIEW1D.cpp +++ b/src/basic/INIT_VIEW1D.cpp @@ -55,7 +55,6 @@ INIT_VIEW1D::INIT_VIEW1D(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp index c6ff05190..e60db90b2 100644 --- a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp @@ -73,28 +73,6 @@ void INIT_VIEW1D_OFFSET::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - INIT_VIEW1D_OFFSET_VIEW_RAJA; - - auto initview1doffset_lam = [=](Index_type i) { - INIT_VIEW1D_OFFSET_BODY_RAJA; - }; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), initview1doffset_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n INIT_VIEW1D_OFFSET : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/INIT_VIEW1D_OFFSET.cpp b/src/basic/INIT_VIEW1D_OFFSET.cpp index 1c482cec7..9918f8c0a 100644 --- a/src/basic/INIT_VIEW1D_OFFSET.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET.cpp @@ -55,7 +55,6 @@ INIT_VIEW1D_OFFSET::INIT_VIEW1D_OFFSET(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/MAT_MAT_SHARED.cpp b/src/basic/MAT_MAT_SHARED.cpp index 747aa8413..454bb2eed 100644 --- a/src/basic/MAT_MAT_SHARED.cpp +++ b/src/basic/MAT_MAT_SHARED.cpp @@ -61,9 +61,8 @@ MAT_MAT_SHARED::MAT_MAT_SHARED(const RunParams ¶ms) setVariantDefined(Lambda_HIP); setVariantDefined(RAJA_HIP); - //setVariantDefined( Base_StdPar ); - //setVariantDefined( Lambda_StdPar ); - //setVariantDefined( RAJA_StdPar ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } MAT_MAT_SHARED::~MAT_MAT_SHARED() {} diff --git a/src/basic/MULADDSUB-StdPar.cpp b/src/basic/MULADDSUB-StdPar.cpp index b76f667b7..9f01a117e 100644 --- a/src/basic/MULADDSUB-StdPar.cpp +++ b/src/basic/MULADDSUB-StdPar.cpp @@ -72,22 +72,6 @@ void MULADDSUB::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), mas_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n MULADDSUB : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/MULADDSUB.cpp b/src/basic/MULADDSUB.cpp index 8e6b76b5d..f3020dfbb 100644 --- a/src/basic/MULADDSUB.cpp +++ b/src/basic/MULADDSUB.cpp @@ -54,7 +54,6 @@ MULADDSUB::MULADDSUB(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/NESTED_INIT-StdPar.cpp b/src/basic/NESTED_INIT-StdPar.cpp index 1041a9953..705ed38bd 100644 --- a/src/basic/NESTED_INIT-StdPar.cpp +++ b/src/basic/NESTED_INIT-StdPar.cpp @@ -105,36 +105,6 @@ void NESTED_INIT::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<2, RAJA::loop_exec, // k - RAJA::statement::For<1, RAJA::loop_exec, // j - RAJA::statement::For<0, RAJA::loop_exec,// i - RAJA::statement::Lambda<0> - > - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment(0, ni), - RAJA::RangeSegment(0, nj), - RAJA::RangeSegment(0, nk)), - nestedinit_lam - ); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n NESTED_INIT : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/NESTED_INIT.cpp b/src/basic/NESTED_INIT.cpp index 3c01fe350..a4be4f273 100644 --- a/src/basic/NESTED_INIT.cpp +++ b/src/basic/NESTED_INIT.cpp @@ -65,7 +65,6 @@ NESTED_INIT::NESTED_INIT(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/PI_ATOMIC-StdPar.cpp b/src/basic/PI_ATOMIC-StdPar.cpp index 491a2cfd3..d73f13814 100644 --- a/src/basic/PI_ATOMIC-StdPar.cpp +++ b/src/basic/PI_ATOMIC-StdPar.cpp @@ -85,27 +85,6 @@ void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - *pi = m_pi_init; - RAJA::forall( RAJA::RangeSegment(ibegin, iend), - [=](Index_type i) { - double x = (double(i) + 0.5) * dx; - RAJA::atomicAdd(pi, dx / (1.0 + x * x)); - }); - *pi *= 4.0; - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/PI_ATOMIC.cpp b/src/basic/PI_ATOMIC.cpp index 0633887a0..6a15d4784 100644 --- a/src/basic/PI_ATOMIC.cpp +++ b/src/basic/PI_ATOMIC.cpp @@ -56,7 +56,6 @@ PI_ATOMIC::PI_ATOMIC(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/PI_REDUCE-StdPar.cpp b/src/basic/PI_REDUCE-StdPar.cpp index e1f37eea4..a3fc51531 100644 --- a/src/basic/PI_REDUCE-StdPar.cpp +++ b/src/basic/PI_REDUCE-StdPar.cpp @@ -80,28 +80,6 @@ void PI_REDUCE::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::ReduceSum pi(m_pi_init); - - RAJA::forall( RAJA::RangeSegment(ibegin, iend), - [=](Index_type i) { - PI_REDUCE_BODY; - }); - - m_pi = 4.0 * pi.get(); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n PI_REDUCE : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/PI_REDUCE.cpp b/src/basic/PI_REDUCE.cpp index 5af375f56..62a20bd57 100644 --- a/src/basic/PI_REDUCE.cpp +++ b/src/basic/PI_REDUCE.cpp @@ -54,7 +54,6 @@ PI_REDUCE::PI_REDUCE(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } PI_REDUCE::~PI_REDUCE() diff --git a/src/basic/REDUCE3_INT-StdPar.cpp b/src/basic/REDUCE3_INT-StdPar.cpp index f7b5f5dd5..b2ada68e7 100644 --- a/src/basic/REDUCE3_INT-StdPar.cpp +++ b/src/basic/REDUCE3_INT-StdPar.cpp @@ -99,32 +99,6 @@ void REDUCE3_INT::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::ReduceSum vsum(m_vsum_init); - RAJA::ReduceMin vmin(m_vmin_init); - RAJA::ReduceMax vmax(m_vmax_init); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { - REDUCE3_INT_BODY_RAJA; - }); - - m_vsum += static_cast(vsum.get()); - m_vmin = RAJA_MIN(m_vmin, static_cast(vmin.get())); - m_vmax = RAJA_MAX(m_vmax, static_cast(vmax.get())); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n REDUCE3_INT : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/REDUCE3_INT.cpp b/src/basic/REDUCE3_INT.cpp index e39f0c031..2b9988268 100644 --- a/src/basic/REDUCE3_INT.cpp +++ b/src/basic/REDUCE3_INT.cpp @@ -59,7 +59,6 @@ REDUCE3_INT::REDUCE3_INT(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/REDUCE_STRUCT-StdPar.cpp b/src/basic/REDUCE_STRUCT-StdPar.cpp index 1264f8257..ee94e542c 100644 --- a/src/basic/REDUCE_STRUCT-StdPar.cpp +++ b/src/basic/REDUCE_STRUCT-StdPar.cpp @@ -108,39 +108,6 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::ReduceSum xsum(m_init_sum); - RAJA::ReduceSum ysum(m_init_sum); - RAJA::ReduceMin xmin(m_init_min); - RAJA::ReduceMin ymin(m_init_min); - RAJA::ReduceMax xmax(m_init_max); - RAJA::ReduceMax ymax(m_init_max); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { - REDUCE_STRUCT_BODY_RAJA; - }); - - points.SetCenter(xsum.get()/(points.N), - ysum.get()/(points.N)); - points.SetXMin(xmin.get()); - points.SetXMax(xmax.get()); - points.SetYMin(ymin.get()); - points.SetYMax(ymax.get()); - m_points=points; - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n REDUCE_STRUCT : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/TRAP_INT-StdPar.cpp b/src/basic/TRAP_INT-StdPar.cpp index 94907744c..e1e9b4cd0 100644 --- a/src/basic/TRAP_INT-StdPar.cpp +++ b/src/basic/TRAP_INT-StdPar.cpp @@ -94,28 +94,6 @@ void TRAP_INT::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::ReduceSum sumx(m_sumx_init); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { - TRAP_INT_BODY; - }); - - m_sumx += static_cast(sumx.get()) * h; - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n TRAP_INT : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/TRAP_INT.cpp b/src/basic/TRAP_INT.cpp index 7ddc1991b..5491ddcd6 100644 --- a/src/basic/TRAP_INT.cpp +++ b/src/basic/TRAP_INT.cpp @@ -54,7 +54,6 @@ TRAP_INT::TRAP_INT(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); setVariantDefined( Kokkos_Lambda ); } diff --git a/src/common/KernelBase.cpp b/src/common/KernelBase.cpp index 0b526afd0..2a777971d 100644 --- a/src/common/KernelBase.cpp +++ b/src/common/KernelBase.cpp @@ -254,14 +254,6 @@ void KernelBase::runKernel(VariantID vid, size_t tune_idx) break; } - case RAJA_StdPar : - { -#if defined(RUN_RAJA_STDPAR) - runStdParVariant(vid, tune_idx); -#endif - break; - } - case Kokkos_Lambda : { #if defined(RUN_KOKKOS) diff --git a/src/common/RAJAPerfSuite.cpp b/src/common/RAJAPerfSuite.cpp index 5c1144ef3..1b71a8401 100644 --- a/src/common/RAJAPerfSuite.cpp +++ b/src/common/RAJAPerfSuite.cpp @@ -280,7 +280,6 @@ static const std::string VariantNames [] = std::string("Base_StdPar"), std::string("Lambda_StdPar"), - std::string("RAJA_StdPar"), std::string("Kokkos_Lambda"), @@ -432,11 +431,6 @@ bool isVariantAvailable(VariantID vid) vid == Lambda_StdPar) { ret_val = true; } -#if defined(RUN_RAJA_STDPAR) - if ( vid == RAJA_StdPar ) { - ret_val = true; - } -#endif #if defined(RUN_KOKKOS) if ( vid == Kokkos_Lambda ) { @@ -503,11 +497,6 @@ bool isVariantGPU(VariantID vid) vid == Lambda_StdPar) { ret_val = true; } -#if defined(RUN_RAJA_STDPAR) - if ( vid == RAJA_StdPar ) { - ret_val = true; - } -#endif #if defined(RUN_KOKKOS) if ( vid == Kokkos_Lambda ) { diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index ab93280f1..b037d0c67 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -196,7 +196,6 @@ enum VariantID { Base_StdPar, Lambda_StdPar, - RAJA_StdPar, Kokkos_Lambda, diff --git a/src/lcals/DIFF_PREDICT-StdPar.cpp b/src/lcals/DIFF_PREDICT-StdPar.cpp index 19a843bfb..c38b3936c 100644 --- a/src/lcals/DIFF_PREDICT-StdPar.cpp +++ b/src/lcals/DIFF_PREDICT-StdPar.cpp @@ -72,22 +72,6 @@ void DIFF_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), diffpredict_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n DIFF_PREDICT : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/DIFF_PREDICT.cpp b/src/lcals/DIFF_PREDICT.cpp index e60d636e1..49a41deb8 100644 --- a/src/lcals/DIFF_PREDICT.cpp +++ b/src/lcals/DIFF_PREDICT.cpp @@ -52,7 +52,6 @@ DIFF_PREDICT::DIFF_PREDICT(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } DIFF_PREDICT::~DIFF_PREDICT() diff --git a/src/lcals/EOS-StdPar.cpp b/src/lcals/EOS-StdPar.cpp index 1022d79a1..f9281b86e 100644 --- a/src/lcals/EOS-StdPar.cpp +++ b/src/lcals/EOS-StdPar.cpp @@ -72,22 +72,6 @@ void EOS::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), eos_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n EOS : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/EOS.cpp b/src/lcals/EOS.cpp index b7b3813b3..8fc00ab30 100644 --- a/src/lcals/EOS.cpp +++ b/src/lcals/EOS.cpp @@ -60,7 +60,6 @@ EOS::EOS(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } EOS::~EOS() diff --git a/src/lcals/FIRST_DIFF-StdPar.cpp b/src/lcals/FIRST_DIFF-StdPar.cpp index 5e274c841..720371793 100644 --- a/src/lcals/FIRST_DIFF-StdPar.cpp +++ b/src/lcals/FIRST_DIFF-StdPar.cpp @@ -72,22 +72,6 @@ void FIRST_DIFF::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), firstdiff_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n FIRST_DIFF : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/FIRST_DIFF.cpp b/src/lcals/FIRST_DIFF.cpp index 54a7c0326..19cbfbb53 100644 --- a/src/lcals/FIRST_DIFF.cpp +++ b/src/lcals/FIRST_DIFF.cpp @@ -56,7 +56,6 @@ FIRST_DIFF::FIRST_DIFF(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } FIRST_DIFF::~FIRST_DIFF() diff --git a/src/lcals/FIRST_MIN-StdPar.cpp b/src/lcals/FIRST_MIN-StdPar.cpp index 3b797cbc6..ef6a11c93 100644 --- a/src/lcals/FIRST_MIN-StdPar.cpp +++ b/src/lcals/FIRST_MIN-StdPar.cpp @@ -76,29 +76,6 @@ void FIRST_MIN::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::ReduceMinLoc loc( - m_xmin_init, m_initloc); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { - FIRST_MIN_BODY_RAJA; - }); - - m_minloc = RAJA_MAX(m_minloc, loc.getLoc()); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n FIRST_MIN : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/FIRST_MIN.cpp b/src/lcals/FIRST_MIN.cpp index a1cffc072..e8825dd17 100644 --- a/src/lcals/FIRST_MIN.cpp +++ b/src/lcals/FIRST_MIN.cpp @@ -60,7 +60,6 @@ FIRST_MIN::FIRST_MIN(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } FIRST_MIN::~FIRST_MIN() diff --git a/src/lcals/FIRST_SUM-StdPar.cpp b/src/lcals/FIRST_SUM-StdPar.cpp index b02253819..c00a7c062 100644 --- a/src/lcals/FIRST_SUM-StdPar.cpp +++ b/src/lcals/FIRST_SUM-StdPar.cpp @@ -72,22 +72,6 @@ void FIRST_SUM::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), firstsum_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n FIRST_SUM : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/FIRST_SUM.cpp b/src/lcals/FIRST_SUM.cpp index 109c6499a..cda99863d 100644 --- a/src/lcals/FIRST_SUM.cpp +++ b/src/lcals/FIRST_SUM.cpp @@ -55,7 +55,6 @@ FIRST_SUM::FIRST_SUM(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } FIRST_SUM::~FIRST_SUM() diff --git a/src/lcals/GEN_LIN_RECUR-StdPar.cpp b/src/lcals/GEN_LIN_RECUR-StdPar.cpp index e16eebddd..fd4cf6ed3 100644 --- a/src/lcals/GEN_LIN_RECUR-StdPar.cpp +++ b/src/lcals/GEN_LIN_RECUR-StdPar.cpp @@ -92,25 +92,6 @@ void GEN_LIN_RECUR::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(0, N), genlinrecur_lam1); - - RAJA::forall( - RAJA::RangeSegment(1, N+1), genlinrecur_lam2); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n GEN_LIN_RECUR : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/GEN_LIN_RECUR.cpp b/src/lcals/GEN_LIN_RECUR.cpp index eb21f7f5c..0d5ea5ace 100644 --- a/src/lcals/GEN_LIN_RECUR.cpp +++ b/src/lcals/GEN_LIN_RECUR.cpp @@ -60,7 +60,6 @@ GEN_LIN_RECUR::GEN_LIN_RECUR(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } GEN_LIN_RECUR::~GEN_LIN_RECUR() diff --git a/src/lcals/HYDRO_1D-StdPar.cpp b/src/lcals/HYDRO_1D-StdPar.cpp index ce8d37a2c..7d02aaa5d 100644 --- a/src/lcals/HYDRO_1D-StdPar.cpp +++ b/src/lcals/HYDRO_1D-StdPar.cpp @@ -73,22 +73,6 @@ void HYDRO_1D::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), hydro1d_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n HYDRO_1D : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/HYDRO_1D.cpp b/src/lcals/HYDRO_1D.cpp index 32d40d978..bda89bec3 100644 --- a/src/lcals/HYDRO_1D.cpp +++ b/src/lcals/HYDRO_1D.cpp @@ -59,7 +59,6 @@ HYDRO_1D::HYDRO_1D(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } HYDRO_1D::~HYDRO_1D() diff --git a/src/lcals/HYDRO_2D-StdPar.cpp b/src/lcals/HYDRO_2D-StdPar.cpp index 5f6d3dbc2..8b8ff5c66 100644 --- a/src/lcals/HYDRO_2D-StdPar.cpp +++ b/src/lcals/HYDRO_2D-StdPar.cpp @@ -131,55 +131,6 @@ void HYDRO_2D::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - HYDRO_2D_VIEWS_RAJA; - - auto hydro2d_lam1 = [=] (Index_type k, Index_type j) { - HYDRO_2D_BODY1_RAJA; - }; - auto hydro2d_lam2 = [=] (Index_type k, Index_type j) { - HYDRO_2D_BODY2_RAJA; - }; - auto hydro2d_lam3 = [=] (Index_type k, Index_type j) { - HYDRO_2D_BODY3_RAJA; - }; - - using EXECPOL = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, // k - RAJA::statement::For<1, RAJA::loop_exec, // j - RAJA::statement::Lambda<0> - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel( - RAJA::make_tuple( RAJA::RangeSegment(kbeg, kend), - RAJA::RangeSegment(jbeg, jend)), - hydro2d_lam1); - - RAJA::kernel( - RAJA::make_tuple( RAJA::RangeSegment(kbeg, kend), - RAJA::RangeSegment(jbeg, jend)), - hydro2d_lam2); - - RAJA::kernel( - RAJA::make_tuple( RAJA::RangeSegment(kbeg, kend), - RAJA::RangeSegment(jbeg, jend)), - hydro2d_lam3); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n HYDRO_2D : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/HYDRO_2D.cpp b/src/lcals/HYDRO_2D.cpp index 331e6e695..81687b486 100644 --- a/src/lcals/HYDRO_2D.cpp +++ b/src/lcals/HYDRO_2D.cpp @@ -74,7 +74,6 @@ HYDRO_2D::HYDRO_2D(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } HYDRO_2D::~HYDRO_2D() diff --git a/src/lcals/INT_PREDICT-StdPar.cpp b/src/lcals/INT_PREDICT-StdPar.cpp index a635061d2..2df532913 100644 --- a/src/lcals/INT_PREDICT-StdPar.cpp +++ b/src/lcals/INT_PREDICT-StdPar.cpp @@ -73,22 +73,6 @@ void INT_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), intpredict_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n INT_PREDICT : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/INT_PREDICT.cpp b/src/lcals/INT_PREDICT.cpp index dd4ff83d8..5ef7dc4a1 100644 --- a/src/lcals/INT_PREDICT.cpp +++ b/src/lcals/INT_PREDICT.cpp @@ -52,7 +52,6 @@ INT_PREDICT::INT_PREDICT(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } INT_PREDICT::~INT_PREDICT() diff --git a/src/lcals/PLANCKIAN-StdPar.cpp b/src/lcals/PLANCKIAN-StdPar.cpp index df63d89b1..e6378319c 100644 --- a/src/lcals/PLANCKIAN-StdPar.cpp +++ b/src/lcals/PLANCKIAN-StdPar.cpp @@ -74,22 +74,6 @@ void PLANCKIAN::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), planckian_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n PLANCKIAN : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/PLANCKIAN.cpp b/src/lcals/PLANCKIAN.cpp index 74c65e31b..f3362d860 100644 --- a/src/lcals/PLANCKIAN.cpp +++ b/src/lcals/PLANCKIAN.cpp @@ -52,7 +52,6 @@ PLANCKIAN::PLANCKIAN(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } PLANCKIAN::~PLANCKIAN() diff --git a/src/lcals/TRIDIAG_ELIM-StdPar.cpp b/src/lcals/TRIDIAG_ELIM-StdPar.cpp index 9ce2afd9f..c18df1303 100644 --- a/src/lcals/TRIDIAG_ELIM-StdPar.cpp +++ b/src/lcals/TRIDIAG_ELIM-StdPar.cpp @@ -73,22 +73,6 @@ void TRIDIAG_ELIM::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), tridiag_elim_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n TRIDIAG_ELIM : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/TRIDIAG_ELIM.cpp b/src/lcals/TRIDIAG_ELIM.cpp index d606e39be..10d19c0f4 100644 --- a/src/lcals/TRIDIAG_ELIM.cpp +++ b/src/lcals/TRIDIAG_ELIM.cpp @@ -54,7 +54,6 @@ TRIDIAG_ELIM::TRIDIAG_ELIM(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } TRIDIAG_ELIM::~TRIDIAG_ELIM() diff --git a/src/polybench/POLYBENCH_2MM-StdPar.cpp b/src/polybench/POLYBENCH_2MM-StdPar.cpp index 43683a68b..20ad50043 100644 --- a/src/polybench/POLYBENCH_2MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_2MM-StdPar.cpp @@ -166,80 +166,6 @@ void POLYBENCH_2MM::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_2MM_VIEWS_RAJA; - - auto poly_2mm_lam1 = [=](Real_type &dot) { - POLYBENCH_2MM_BODY1_RAJA; - }; - auto poly_2mm_lam2 = [=](Index_type i, Index_type j, Index_type k, - Real_type &dot) { - POLYBENCH_2MM_BODY2_RAJA; - }; - auto poly_2mm_lam3 = [=](Index_type i, Index_type j, - Real_type &dot) { - POLYBENCH_2MM_BODY3_RAJA; - }; - auto poly_2mm_lam4 = [=](Real_type &dot) { - POLYBENCH_2MM_BODY4_RAJA; - }; - auto poly_2mm_lam5 = [=](Index_type i, Index_type l, Index_type j, - Real_type &dot) { - POLYBENCH_2MM_BODY5_RAJA; - }; - auto poly_2mm_lam6 = [=](Index_type i, Index_type l, - Real_type &dot) { - POLYBENCH_2MM_BODY6_RAJA; - }; - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Params<0>>, - RAJA::statement::For<2, RAJA::loop_exec, - RAJA::statement::Lambda<1, RAJA::Segs<0,1,2>, RAJA::Params<0>> - >, - RAJA::statement::Lambda<2, RAJA::Segs<0,1>, RAJA::Params<0>> - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, ni}, - RAJA::RangeSegment{0, nj}, - RAJA::RangeSegment{0, nk}), - RAJA::tuple{0.0}, - - poly_2mm_lam1, - poly_2mm_lam2, - poly_2mm_lam3 - ); - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, ni}, - RAJA::RangeSegment{0, nl}, - RAJA::RangeSegment{0, nj}), - RAJA::tuple{0.0}, - - poly_2mm_lam4, - poly_2mm_lam5, - poly_2mm_lam6 - - ); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_2MM : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_2MM.cpp b/src/polybench/POLYBENCH_2MM.cpp index c1284791f..5952c07bb 100644 --- a/src/polybench/POLYBENCH_2MM.cpp +++ b/src/polybench/POLYBENCH_2MM.cpp @@ -81,7 +81,6 @@ POLYBENCH_2MM::POLYBENCH_2MM(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_2MM::~POLYBENCH_2MM() diff --git a/src/polybench/POLYBENCH_3MM-StdPar.cpp b/src/polybench/POLYBENCH_3MM-StdPar.cpp index b7ac966a2..ba0df5bb0 100644 --- a/src/polybench/POLYBENCH_3MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_3MM-StdPar.cpp @@ -218,104 +218,6 @@ void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_3MM_VIEWS_RAJA; - - auto poly_3mm_lam1 = [=] (Real_type &dot) { - POLYBENCH_3MM_BODY1_RAJA; - }; - auto poly_3mm_lam2 = [=] (Index_type i, Index_type j, Index_type k, - Real_type &dot) { - POLYBENCH_3MM_BODY2_RAJA; - }; - auto poly_3mm_lam3 = [=] (Index_type i, Index_type j, - Real_type &dot) { - POLYBENCH_3MM_BODY3_RAJA; - }; - auto poly_3mm_lam4 = [=] (Real_type &dot) { - POLYBENCH_3MM_BODY4_RAJA; - }; - auto poly_3mm_lam5 = [=] (Index_type j, Index_type l, Index_type m, - Real_type &dot) { - POLYBENCH_3MM_BODY5_RAJA; - }; - auto poly_3mm_lam6 = [=] (Index_type j, Index_type l, - Real_type &dot) { - POLYBENCH_3MM_BODY6_RAJA; - }; - auto poly_3mm_lam7 = [=] (Real_type &dot) { - POLYBENCH_3MM_BODY7_RAJA; - }; - auto poly_3mm_lam8 = [=] (Index_type i, Index_type l, Index_type j, - Real_type &dot) { - POLYBENCH_3MM_BODY8_RAJA; - }; - auto poly_3mm_lam9 = [=] (Index_type i, Index_type l, - Real_type &dot) { - POLYBENCH_3MM_BODY9_RAJA; - }; - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Params<0>>, - RAJA::statement::For<2, RAJA::loop_exec, - RAJA::statement::Lambda<1, RAJA::Segs<0,1,2>, RAJA::Params<0>> - >, - RAJA::statement::Lambda<2, RAJA::Segs<0,1>, RAJA::Params<0>> - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, ni}, - RAJA::RangeSegment{0, nj}, - RAJA::RangeSegment{0, nk}), - RAJA::tuple{0.0}, - - poly_3mm_lam1, - poly_3mm_lam2, - poly_3mm_lam3 - - ); - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, nj}, - RAJA::RangeSegment{0, nl}, - RAJA::RangeSegment{0, nm}), - RAJA::tuple{0.0}, - - poly_3mm_lam4, - poly_3mm_lam5, - poly_3mm_lam6 - - ); - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, ni}, - RAJA::RangeSegment{0, nl}, - RAJA::RangeSegment{0, nj}), - RAJA::tuple{0.0}, - - poly_3mm_lam7, - poly_3mm_lam8, - poly_3mm_lam9 - - ); - - } // end run_reps - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_3MM : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_3MM.cpp b/src/polybench/POLYBENCH_3MM.cpp index 3cc6fce66..71f147869 100644 --- a/src/polybench/POLYBENCH_3MM.cpp +++ b/src/polybench/POLYBENCH_3MM.cpp @@ -89,7 +89,6 @@ POLYBENCH_3MM::POLYBENCH_3MM(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_3MM::~POLYBENCH_3MM() diff --git a/src/polybench/POLYBENCH_ADI-StdPar.cpp b/src/polybench/POLYBENCH_ADI-StdPar.cpp index 18641aa5c..0151f931b 100644 --- a/src/polybench/POLYBENCH_ADI-StdPar.cpp +++ b/src/polybench/POLYBENCH_ADI-StdPar.cpp @@ -139,88 +139,6 @@ void POLYBENCH_ADI::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_ADI_VIEWS_RAJA; - - auto poly_adi_lam2 = [=](Index_type i) { - POLYBENCH_ADI_BODY2_RAJA; - }; - auto poly_adi_lam3 = [=](Index_type i, Index_type j) { - POLYBENCH_ADI_BODY3_RAJA; - }; - auto poly_adi_lam4 = [=](Index_type i) { - POLYBENCH_ADI_BODY4_RAJA; - }; - auto poly_adi_lam5 = [=](Index_type i, Index_type k) { - POLYBENCH_ADI_BODY5_RAJA; - }; - auto poly_adi_lam6 = [=](Index_type i) { - POLYBENCH_ADI_BODY6_RAJA; - }; - auto poly_adi_lam7 = [=](Index_type i, Index_type j) { - POLYBENCH_ADI_BODY7_RAJA; - }; - auto poly_adi_lam8 = [=](Index_type i) { - POLYBENCH_ADI_BODY8_RAJA; - }; - auto poly_adi_lam9 = [=](Index_type i, Index_type k) { - POLYBENCH_ADI_BODY9_RAJA; - }; - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Segs<0>>, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<1, RAJA::Segs<0,1>> - >, - RAJA::statement::Lambda<2, RAJA::Segs<0>>, - RAJA::statement::For<2, RAJA::loop_exec, - RAJA::statement::Lambda<3, RAJA::Segs<0,2>> - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - for (Index_type t = 1; t <= tsteps; ++t) { - - RAJA::kernel( - RAJA::make_tuple(RAJA::RangeSegment{1, n-1}, - RAJA::RangeSegment{1, n-1}, - RAJA::RangeStrideSegment{n-2, 0, -1}), - - poly_adi_lam2, - poly_adi_lam3, - poly_adi_lam4, - poly_adi_lam5 - - ); - - RAJA::kernel( - RAJA::make_tuple(RAJA::RangeSegment{1, n-1}, - RAJA::RangeSegment{1, n-1}, - RAJA::RangeStrideSegment{n-2, 0, -1}), - - poly_adi_lam6, - poly_adi_lam7, - poly_adi_lam8, - poly_adi_lam9 - - ); - - } // tstep loop - - } // run_reps - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\nPOLYBENCH_ADI Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_ADI.cpp b/src/polybench/POLYBENCH_ADI.cpp index 5ad7544dd..7e3749e46 100644 --- a/src/polybench/POLYBENCH_ADI.cpp +++ b/src/polybench/POLYBENCH_ADI.cpp @@ -66,7 +66,6 @@ POLYBENCH_ADI::POLYBENCH_ADI(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_ADI::~POLYBENCH_ADI() diff --git a/src/polybench/POLYBENCH_ATAX-StdPar.cpp b/src/polybench/POLYBENCH_ATAX-StdPar.cpp index b2be11771..6137fcb70 100644 --- a/src/polybench/POLYBENCH_ATAX-StdPar.cpp +++ b/src/polybench/POLYBENCH_ATAX-StdPar.cpp @@ -119,85 +119,6 @@ void POLYBENCH_ATAX::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_ATAX_VIEWS_RAJA; - - auto poly_atax_lam1 = [=] (Index_type i, Real_type &dot) { - POLYBENCH_ATAX_BODY1_RAJA; - }; - auto poly_atax_lam2 = [=] (Index_type i, Index_type j, Real_type &dot) { - POLYBENCH_ATAX_BODY2_RAJA; - }; - auto poly_atax_lam3 = [=] (Index_type i, Real_type &dot) { - POLYBENCH_ATAX_BODY3_RAJA; - }; - auto poly_atax_lam4 = [=] (Index_type j, Real_type &dot) { - POLYBENCH_ATAX_BODY4_RAJA; - }; - auto poly_atax_lam5 = [=] (Index_type i, Index_type j , Real_type &dot) { - POLYBENCH_ATAX_BODY5_RAJA; - }; - auto poly_atax_lam6 = [=] (Index_type j, Real_type &dot) { - POLYBENCH_ATAX_BODY6_RAJA; - }; - - using EXEC_POL1 = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Segs<0>, RAJA::Params<0>>, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> - >, - RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> - > - >; - - using EXEC_POL2 = - RAJA::KernelPolicy< - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Segs<1>, RAJA::Params<0>>, - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> - >, - RAJA::statement::Lambda<2, RAJA::Segs<1>, RAJA::Params<0>> - > - >; - - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, N}, - RAJA::RangeSegment{0, N}), - RAJA::tuple{0.0}, - - poly_atax_lam1, - poly_atax_lam2, - poly_atax_lam3 - - ); - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, N}, - RAJA::RangeSegment{0, N}), - RAJA::tuple{0.0}, - - poly_atax_lam4, - poly_atax_lam5, - poly_atax_lam6 - - ); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_ATAX : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_ATAX.cpp b/src/polybench/POLYBENCH_ATAX.cpp index 440586561..e0da9af86 100644 --- a/src/polybench/POLYBENCH_ATAX.cpp +++ b/src/polybench/POLYBENCH_ATAX.cpp @@ -68,7 +68,6 @@ POLYBENCH_ATAX::POLYBENCH_ATAX(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_ATAX::~POLYBENCH_ATAX() diff --git a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp index d2584e96c..4742b76f9 100644 --- a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp @@ -140,77 +140,6 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_FDTD_2D_VIEWS_RAJA; - - // - // Note: first lambda must use capture by reference so that the - // scalar variable 't' used in it is updated for each - // t-loop iteration. - // - auto poly_fdtd2d_lam1 = [&](Index_type j) { - POLYBENCH_FDTD_2D_BODY1_RAJA; - }; - auto poly_fdtd2d_lam2 = [=](Index_type i, Index_type j) { - POLYBENCH_FDTD_2D_BODY2_RAJA; - }; - auto poly_fdtd2d_lam3 = [=](Index_type i, Index_type j) { - POLYBENCH_FDTD_2D_BODY3_RAJA; - }; - auto poly_fdtd2d_lam4 = [=](Index_type i, Index_type j) { - POLYBENCH_FDTD_2D_BODY4_RAJA; - }; - - using EXEC_POL1 = RAJA::loop_exec; - - using EXEC_POL234 = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<0> - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - for (t = 0; t < tsteps; ++t) { - - RAJA::forall( RAJA::RangeSegment(0, ny), - poly_fdtd2d_lam1 - ); - - RAJA::kernel( - RAJA::make_tuple(RAJA::RangeSegment{1, nx}, - RAJA::RangeSegment{0, ny}), - poly_fdtd2d_lam2 - ); - - RAJA::kernel( - RAJA::make_tuple(RAJA::RangeSegment{0, nx}, - RAJA::RangeSegment{1, ny}), - poly_fdtd2d_lam3 - ); - - RAJA::kernel( - RAJA::make_tuple(RAJA::RangeSegment{0, nx-1}, - RAJA::RangeSegment{0, ny-1}), - poly_fdtd2d_lam4 - ); - - } // tstep loop - - } // run_reps - stopTimer(); - - break; - } - -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\nPOLYBENCH_FDTD_2D Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_FDTD_2D.cpp b/src/polybench/POLYBENCH_FDTD_2D.cpp index 47bb79ce2..19505024c 100644 --- a/src/polybench/POLYBENCH_FDTD_2D.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D.cpp @@ -87,7 +87,6 @@ POLYBENCH_FDTD_2D::POLYBENCH_FDTD_2D(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_FDTD_2D::~POLYBENCH_FDTD_2D() diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp index c6d015640..4b2a44daa 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp @@ -106,43 +106,6 @@ void POLYBENCH_FLOYD_WARSHALL::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_FLOYD_WARSHALL_VIEWS_RAJA; - - auto poly_floydwarshall_lam = [=](Index_type k, Index_type i, - Index_type j) { - POLYBENCH_FLOYD_WARSHALL_BODY_RAJA; - }; - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::For<2, RAJA::loop_exec, - RAJA::statement::Lambda<0> - > - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{0, N}, - RAJA::RangeSegment{0, N}, - RAJA::RangeSegment{0, N}), - poly_floydwarshall_lam - ); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_FLOYD_WARSHALL : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp index d48f141f0..cb8da97fc 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp @@ -63,7 +63,6 @@ POLYBENCH_FLOYD_WARSHALL::POLYBENCH_FLOYD_WARSHALL(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_FLOYD_WARSHALL::~POLYBENCH_FLOYD_WARSHALL() diff --git a/src/polybench/POLYBENCH_GEMM-StdPar.cpp b/src/polybench/POLYBENCH_GEMM-StdPar.cpp index b17c381c0..5d644def0 100644 --- a/src/polybench/POLYBENCH_GEMM-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMM-StdPar.cpp @@ -119,64 +119,6 @@ void POLYBENCH_GEMM::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_GEMM_VIEWS_RAJA; - - auto poly_gemm_lam1 = [=](Real_type& dot) { - POLYBENCH_GEMM_BODY1_RAJA; - }; - auto poly_gemm_lam2 = [=](Index_type i, Index_type j) { - POLYBENCH_GEMM_BODY2_RAJA; - }; - auto poly_gemm_lam3 = [=](Index_type i, Index_type j, Index_type k, - Real_type& dot) { - POLYBENCH_GEMM_BODY3_RAJA; - }; - auto poly_gemm_lam4 = [=](Index_type i, Index_type j, - Real_type& dot) { - POLYBENCH_GEMM_BODY4_RAJA; - }; - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Params<0>>, - RAJA::statement::Lambda<1, RAJA::Segs<0,1>>, - RAJA::statement::For<2, RAJA::loop_exec, - RAJA::statement::Lambda<2, RAJA::Segs<0,1,2>, RAJA::Params<0>> - >, - RAJA::statement::Lambda<3, RAJA::Segs<0,1>, RAJA::Params<0>> - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel_param( - - RAJA::make_tuple( RAJA::RangeSegment{0, ni}, - RAJA::RangeSegment{0, nj}, - RAJA::RangeSegment{0, nk} ), - RAJA::tuple{0.0}, // variable for dot - - poly_gemm_lam1, - poly_gemm_lam2, - poly_gemm_lam3, - poly_gemm_lam4 - - ); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_GEMM : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_GEMM.cpp b/src/polybench/POLYBENCH_GEMM.cpp index a7dec71b4..57d22fc60 100644 --- a/src/polybench/POLYBENCH_GEMM.cpp +++ b/src/polybench/POLYBENCH_GEMM.cpp @@ -73,7 +73,6 @@ POLYBENCH_GEMM::POLYBENCH_GEMM(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_GEMM::~POLYBENCH_GEMM() diff --git a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp index 37361759d..7a51496f9 100644 --- a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp @@ -137,109 +137,6 @@ void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_GEMVER_VIEWS_RAJA; - - auto poly_gemver_lam1 = [=] (Index_type i, Index_type j) { - POLYBENCH_GEMVER_BODY1_RAJA; - }; - auto poly_gemver_lam2 = [=] (Real_type &dot) { - POLYBENCH_GEMVER_BODY2_RAJA; - }; - auto poly_gemver_lam3 = [=] (Index_type i, Index_type j, Real_type &dot) { - POLYBENCH_GEMVER_BODY3_RAJA; - }; - auto poly_gemver_lam4 = [=] (Index_type i, Real_type &dot) { - POLYBENCH_GEMVER_BODY4_RAJA; - }; - auto poly_gemver_lam5 = [=] (Index_type i) { - POLYBENCH_GEMVER_BODY5_RAJA; - }; - auto poly_gemver_lam6 = [=] (Index_type i, Real_type &dot) { - POLYBENCH_GEMVER_BODY6_RAJA; - }; - auto poly_gemver_lam7 = [=] (Index_type i, Index_type j, Real_type &dot) { - POLYBENCH_GEMVER_BODY7_RAJA; - }; - auto poly_gemver_lam8 = [=] (Index_type i, Real_type &dot) { - POLYBENCH_GEMVER_BODY8_RAJA; - }; - - using EXEC_POL1 = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Segs<0,1>> - > - > - >; - - using EXEC_POL2 = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Params<0>>, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> - >, - RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> - > - >; - - using EXEC_POL3 = RAJA::loop_exec; - - using EXEC_POL4 = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Segs<0>, RAJA::Params<0>>, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> - >, - RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{0, n}, - RAJA::RangeSegment{0, n}), - poly_gemver_lam1 - ); - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, n}, - RAJA::RangeSegment{0, n}), - RAJA::tuple{0.0}, - - poly_gemver_lam2, - poly_gemver_lam3, - poly_gemver_lam4 - ); - - RAJA::forall (RAJA::RangeSegment{0, n}, - poly_gemver_lam5 - ); - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, n}, - RAJA::RangeSegment{0, n}), - RAJA::tuple{0.0}, - - poly_gemver_lam6, - poly_gemver_lam7, - poly_gemver_lam8 - - ); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_GEMVER : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_GEMVER.cpp b/src/polybench/POLYBENCH_GEMVER.cpp index 22a4837af..0c8108cdd 100644 --- a/src/polybench/POLYBENCH_GEMVER.cpp +++ b/src/polybench/POLYBENCH_GEMVER.cpp @@ -82,7 +82,6 @@ POLYBENCH_GEMVER::POLYBENCH_GEMVER(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_GEMVER::~POLYBENCH_GEMVER() diff --git a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp index 8201ecb24..7ad6e101b 100644 --- a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp +++ b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp @@ -81,55 +81,6 @@ void POLYBENCH_GESUMMV::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_GESUMMV_VIEWS_RAJA; - - auto poly_gesummv_lam1 = [=](Real_type& tmpdot, Real_type& ydot) { - POLYBENCH_GESUMMV_BODY1_RAJA; - }; - auto poly_gesummv_lam2 = [=](Index_type i, Index_type j, - Real_type& tmpdot, Real_type& ydot) { - POLYBENCH_GESUMMV_BODY2_RAJA; - }; - auto poly_gesummv_lam3 = [=](Index_type i, - Real_type& tmpdot, Real_type& ydot) { - POLYBENCH_GESUMMV_BODY3_RAJA; - }; - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Params<0,1>>, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<1, RAJA::Segs<0, 1>, RAJA::Params<0,1>> - >, - RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0,1>> - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel_param( - RAJA::make_tuple( RAJA::RangeSegment{0, N}, - RAJA::RangeSegment{0, N} ), - RAJA::make_tuple(static_cast(0.0), - static_cast(0.0)), - - poly_gesummv_lam1, - poly_gesummv_lam2, - poly_gesummv_lam3 - ); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_GESUMMV : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_GESUMMV.cpp b/src/polybench/POLYBENCH_GESUMMV.cpp index f1f10c645..c56dd757b 100644 --- a/src/polybench/POLYBENCH_GESUMMV.cpp +++ b/src/polybench/POLYBENCH_GESUMMV.cpp @@ -62,7 +62,6 @@ POLYBENCH_GESUMMV::POLYBENCH_GESUMMV(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_GESUMMV::~POLYBENCH_GESUMMV() diff --git a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp index 0b5690828..8f9e1bc54 100644 --- a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp @@ -119,60 +119,6 @@ void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_HEAT_3D_VIEWS_RAJA; - - auto poly_heat3d_lam1 = [=](Index_type i, Index_type j, Index_type k) { - POLYBENCH_HEAT_3D_BODY1_RAJA; - }; - auto poly_heat3d_lam2 = [=](Index_type i, Index_type j, Index_type k) { - POLYBENCH_HEAT_3D_BODY2_RAJA; - }; - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::For<2, RAJA::loop_exec, - RAJA::statement::Lambda<0> - > - > - >, - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::For<2, RAJA::loop_exec, - RAJA::statement::Lambda<1> - > - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - for (Index_type t = 0; t < tsteps; ++t) { - - RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{1, N-1}, - RAJA::RangeSegment{1, N-1}, - RAJA::RangeSegment{1, N-1}), - - poly_heat3d_lam1, - poly_heat3d_lam2 - ); - - } - - } - stopTimer(); - - POLYBENCH_HEAT_3D_DATA_RESET; - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_HEAT_3D : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_HEAT_3D.cpp b/src/polybench/POLYBENCH_HEAT_3D.cpp index ec86de900..af4727d8a 100644 --- a/src/polybench/POLYBENCH_HEAT_3D.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D.cpp @@ -73,7 +73,6 @@ POLYBENCH_HEAT_3D::POLYBENCH_HEAT_3D(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_HEAT_3D::~POLYBENCH_HEAT_3D() diff --git a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp index 76dca3264..3b95527e9 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp @@ -95,33 +95,6 @@ void POLYBENCH_JACOBI_1D::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - for (Index_type t = 0; t < tsteps; ++t) { - - RAJA::forall ( RAJA::RangeSegment{1, N-1}, - poly_jacobi1d_lam1 - ); - - RAJA::forall ( RAJA::RangeSegment{1, N-1}, - poly_jacobi1d_lam2 - ); - - } - - } - stopTimer(); - - POLYBENCH_JACOBI_1D_DATA_RESET; - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_JACOBI_1D : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_JACOBI_1D.cpp b/src/polybench/POLYBENCH_JACOBI_1D.cpp index a8aa3e089..3ed3e8361 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D.cpp @@ -70,7 +70,6 @@ POLYBENCH_JACOBI_1D::POLYBENCH_JACOBI_1D(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_JACOBI_1D::~POLYBENCH_JACOBI_1D() diff --git a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp index 11d8c208c..b5c4ace75 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp @@ -112,55 +112,6 @@ void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_JACOBI_2D_VIEWS_RAJA; - - auto poly_jacobi2d_lam1 = [=](Index_type i, Index_type j) { - POLYBENCH_JACOBI_2D_BODY1_RAJA; - }; - auto poly_jacobi2d_lam2 = [=](Index_type i, Index_type j) { - POLYBENCH_JACOBI_2D_BODY2_RAJA; - }; - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<0> - > - >, - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<1> - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - for (Index_type t = 0; t < tsteps; ++t) { - - RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{1, N-1}, - RAJA::RangeSegment{1, N-1}), - - poly_jacobi2d_lam1, - poly_jacobi2d_lam2 - ); - - } - - } - stopTimer(); - - POLYBENCH_JACOBI_2D_DATA_RESET; - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_JACOBI_2D : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_JACOBI_2D.cpp b/src/polybench/POLYBENCH_JACOBI_2D.cpp index a8d54e751..98ef3aa3d 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D.cpp @@ -72,7 +72,6 @@ POLYBENCH_JACOBI_2D::POLYBENCH_JACOBI_2D(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_JACOBI_2D::~POLYBENCH_JACOBI_2D() diff --git a/src/polybench/POLYBENCH_MVT-StdPar.cpp b/src/polybench/POLYBENCH_MVT-StdPar.cpp index 2c3b30ffb..45da020a8 100644 --- a/src/polybench/POLYBENCH_MVT-StdPar.cpp +++ b/src/polybench/POLYBENCH_MVT-StdPar.cpp @@ -117,77 +117,6 @@ void POLYBENCH_MVT::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_MVT_VIEWS_RAJA; - - auto poly_mvt_lam1 = [=] (Real_type &dot) { - POLYBENCH_MVT_BODY1_RAJA; - }; - auto poly_mvt_lam2 = [=] (Index_type i, Index_type j, Real_type &dot) { - POLYBENCH_MVT_BODY2_RAJA; - }; - auto poly_mvt_lam3 = [=] (Index_type i, Real_type &dot) { - POLYBENCH_MVT_BODY3_RAJA; - }; - auto poly_mvt_lam4 = [=] (Real_type &dot) { - POLYBENCH_MVT_BODY4_RAJA; - }; - auto poly_mvt_lam5 = [=] (Index_type i, Index_type j, Real_type &dot) { - POLYBENCH_MVT_BODY5_RAJA; - }; - auto poly_mvt_lam6 = [=] (Index_type i, Real_type &dot) { - POLYBENCH_MVT_BODY6_RAJA; - }; - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Params<0>>, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> - >, - RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::region( [=]() { - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, N}, - RAJA::RangeSegment{0, N}), - RAJA::tuple{0.0}, - - poly_mvt_lam1, - poly_mvt_lam2, - poly_mvt_lam3 - - ); - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, N}, - RAJA::RangeSegment{0, N}), - RAJA::tuple{0.0}, - - poly_mvt_lam4, - poly_mvt_lam5, - poly_mvt_lam6 - - ); - - }); // end sequential region (for single-source code) - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_MVT : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_MVT.cpp b/src/polybench/POLYBENCH_MVT.cpp index 3ac9d680f..5d1313988 100644 --- a/src/polybench/POLYBENCH_MVT.cpp +++ b/src/polybench/POLYBENCH_MVT.cpp @@ -65,7 +65,6 @@ POLYBENCH_MVT::POLYBENCH_MVT(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_MVT::~POLYBENCH_MVT() diff --git a/src/stream/ADD-StdPar.cpp b/src/stream/ADD-StdPar.cpp index 2131dedbf..0a38d1619 100644 --- a/src/stream/ADD-StdPar.cpp +++ b/src/stream/ADD-StdPar.cpp @@ -72,22 +72,6 @@ void ADD::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), add_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n ADD : Unknown variant id = " << vid << std::endl; } diff --git a/src/stream/ADD.cpp b/src/stream/ADD.cpp index 534deee28..0181888a0 100644 --- a/src/stream/ADD.cpp +++ b/src/stream/ADD.cpp @@ -55,7 +55,6 @@ ADD::ADD(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } ADD::~ADD() diff --git a/src/stream/COPY-StdPar.cpp b/src/stream/COPY-StdPar.cpp index bda9af163..bc25a6a64 100644 --- a/src/stream/COPY-StdPar.cpp +++ b/src/stream/COPY-StdPar.cpp @@ -58,22 +58,6 @@ void COPY::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), copy_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n COPY : Unknown variant id = " << vid << std::endl; } diff --git a/src/stream/COPY.cpp b/src/stream/COPY.cpp index 40fae2467..0544c214c 100644 --- a/src/stream/COPY.cpp +++ b/src/stream/COPY.cpp @@ -55,7 +55,6 @@ COPY::COPY(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } COPY::~COPY() diff --git a/src/stream/DOT-StdPar.cpp b/src/stream/DOT-StdPar.cpp index 43d247f72..23d031d91 100644 --- a/src/stream/DOT-StdPar.cpp +++ b/src/stream/DOT-StdPar.cpp @@ -78,28 +78,6 @@ void DOT::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::ReduceSum dot(m_dot_init); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { - DOT_BODY; - }); - - m_dot += static_cast(dot.get()); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n DOT : Unknown variant id = " << vid << std::endl; } diff --git a/src/stream/DOT.cpp b/src/stream/DOT.cpp index d1f701431..48774b354 100644 --- a/src/stream/DOT.cpp +++ b/src/stream/DOT.cpp @@ -55,7 +55,6 @@ DOT::DOT(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } DOT::~DOT() diff --git a/src/stream/MUL-StdPar.cpp b/src/stream/MUL-StdPar.cpp index 082265af2..f8c919e8d 100644 --- a/src/stream/MUL-StdPar.cpp +++ b/src/stream/MUL-StdPar.cpp @@ -72,22 +72,6 @@ void MUL::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), mul_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n MUL : Unknown variant id = " << vid << std::endl; } diff --git a/src/stream/MUL.cpp b/src/stream/MUL.cpp index 38bfe4aca..9bdd5969c 100644 --- a/src/stream/MUL.cpp +++ b/src/stream/MUL.cpp @@ -55,7 +55,6 @@ MUL::MUL(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } MUL::~MUL() diff --git a/src/stream/TRIAD-StdPar.cpp b/src/stream/TRIAD-StdPar.cpp index 4b3db4a49..de8118d0e 100644 --- a/src/stream/TRIAD-StdPar.cpp +++ b/src/stream/TRIAD-StdPar.cpp @@ -71,22 +71,6 @@ void TRIAD::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), triad_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n TRIAD : Unknown variant id = " << vid << std::endl; } diff --git a/src/stream/TRIAD.cpp b/src/stream/TRIAD.cpp index e4064b9fd..03a6b670d 100644 --- a/src/stream/TRIAD.cpp +++ b/src/stream/TRIAD.cpp @@ -59,7 +59,6 @@ TRIAD::TRIAD(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } TRIAD::~TRIAD() From 8ce9c411832fabd9a6352afe609aecbb99fab3ed Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 16:52:23 +0300 Subject: [PATCH 019/174] fix the issue with running --- src/common/KernelBase.cpp | 8 ++++++++ src/common/KernelBase.hpp | 2 ++ 2 files changed, 10 insertions(+) diff --git a/src/common/KernelBase.cpp b/src/common/KernelBase.cpp index 2a777971d..d2d7ac141 100644 --- a/src/common/KernelBase.cpp +++ b/src/common/KernelBase.cpp @@ -129,6 +129,14 @@ void KernelBase::setVariantDefined(VariantID vid) #endif break; } + + case Base_StdPar : + case Lambda_StdPar : + { + setStdParTuningDefinitions(vid); + break; + } + // Required for running Kokkos case Kokkos_Lambda : { diff --git a/src/common/KernelBase.hpp b/src/common/KernelBase.hpp index 8c72e854e..8cde6bbe6 100644 --- a/src/common/KernelBase.hpp +++ b/src/common/KernelBase.hpp @@ -90,6 +90,8 @@ class KernelBase virtual void setOpenMPTargetTuningDefinitions(VariantID vid) { addVariantTuningName(vid, getDefaultTuningName()); } #endif + virtual void setStdParTuningDefinitions(VariantID vid) + { addVariantTuningName(vid, getDefaultTuningName()); } #if defined(RUN_KOKKOS) virtual void setKokkosTuningDefinitions(VariantID vid) { addVariantTuningName(vid, getDefaultTuningName()); } From 27f800770261c6ee1d50b247b82d22a8d817190a Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 17:13:54 +0300 Subject: [PATCH 020/174] NVC note --- README.stdpar | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.stdpar b/README.stdpar index 1cb862c9d..f96dcda66 100644 --- a/README.stdpar +++ b/README.stdpar @@ -2,6 +2,9 @@ cmake .. -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-volatile -Wno-unused-parameter" -DENABLE_STDPAR=1 && make -j8 # NVC++ +cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 -stdpar=multicore -tp=haswell" -DENABLE_STDPAR=1 && make -j8 + +^ SORTPAIRS fails with sizefact 0.1 when _all_ kernels are run but not when SORTPAIRS or Algorithm are run by themselves # Intel cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-unused-parameter -Wno-deprecated-volatile -tbb" -DENABLE_STDPAR=1 && make -j8 From e4f16040e2ffd1fad8e021e44690ff3a0764d739 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 17:14:27 +0300 Subject: [PATCH 021/174] nuke StdPar SORTPAIRS because it needs work --- src/algorithm/SORTPAIRS-StdPar.cpp | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp index dcb0f3a5c..cad571d4a 100644 --- a/src/algorithm/SORTPAIRS-StdPar.cpp +++ b/src/algorithm/SORTPAIRS-StdPar.cpp @@ -1,7 +1,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. -// See the RAJAPerf/COPYRIGHT file for details. +// See the RAJAPerf/LICENSE file for details. // // SPDX-License-Identifier: (BSD-3-Clause) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// @@ -30,8 +30,8 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); + //auto begin = counting_iterator(ibegin); + //auto end = counting_iterator(iend); SORTPAIRS_DATA_SETUP; @@ -51,20 +51,16 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) vector_of_pairs.emplace_back(x[iend*irep + iemp], i[iend*irep + iemp]); } - std::sort( std::execution::par_unseq, - vector_of_pairs.begin(), vector_of_pairs.end(), + std::sort(vector_of_pairs.begin(), vector_of_pairs.end(), [](pair_type const& lhs, pair_type const& rhs) { return lhs.first < rhs.first; }); - //for (Index_type iemp = ibegin; iemp < iend; ++iemp) { - std::for_each( //std::execution::par_unseq, - begin, end, - [=](Index_type iemp) { - const pair_type& pair = vector_of_pairs[iemp - ibegin]; + for (Index_type iemp = ibegin; iemp < iend; ++iemp) { + pair_type& pair = vector_of_pairs[iemp - ibegin]; x[iend*irep + iemp] = pair.first; i[iend*irep + iemp] = pair.second; - }); + } } stopTimer(); From 2e3bb2dbe95901617a62043de8879d7230de6196 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 17:22:07 +0300 Subject: [PATCH 022/174] notes on NVC++ multicore issues --- README.stdpar | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/README.stdpar b/README.stdpar index f96dcda66..94d12d2b7 100644 --- a/README.stdpar +++ b/README.stdpar @@ -1,11 +1,64 @@ # GCC + cmake .. -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-volatile -Wno-unused-parameter" -DENABLE_STDPAR=1 && make -j8 # NVC++ + cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 -stdpar=multicore -tp=haswell" -DENABLE_STDPAR=1 && make -j8 ^ SORTPAIRS fails with sizefact 0.1 when _all_ kernels are run but not when SORTPAIRS or Algorithm are run by themselves +Just disable the lambda one I guess... + +------------------------------------------------------- +Basic_MAT_MAT_SHARED +........................................................ +Base_StdPar-default 1136.6199452543779141 0.0000000000000000000 +Lambda_StdPar-default -nan -nan + +Probably just not atomic... + +------------------------------------------------------- +Basic_PI_ATOMIC +........................................................ +Base_StdPar-default 0.55899274342205662602 2.5825999101679185666 +Lambda_StdPar-default 3.1415926535899751926 0.0000000000000000000 + +Check these to make sure no stupid float<->double stuff happening. + +------------------------------------------------------- +Polybench_GEMVER +........................................................ +Base_Seq-default 16695345.016927006001 0.0000000000000000000 +Lambda_Seq-default 16695345.016927005882 1.1914380593225359917e-10 +RAJA_Seq-default 16695345.016927006608 -6.0663296608254313469e-10 +Base_StdPar-default 16695345.016927005745 2.5647750589996576309e-10 +Lambda_StdPar-default 16695345.016927006608 -6.0663296608254313469e-10 + +------------------------------------------------------- +Polybench_MVT +........................................................ +Base_Seq-default 6821556.1519041797419 0.0000000000000000000 +Lambda_Seq-default 6821556.1519041797419 0.0000000000000000000 +RAJA_Seq-default 6821556.1519041792999 4.4201442506164312363e-10 +Base_StdPar-default 6821556.1519041792999 4.4201442506164312363e-10 +Lambda_StdPar-default 6821556.1519041792999 4.4201442506164312363e-10 + +------------------------------------------------------- +Stream_DOT +........................................................ +Base_Seq-default 39999973.379841431975 0.0000000000000000000 +Lambda_Seq-default 39999973.379841439426 -7.4505805969238281250e-09 +RAJA_Seq-default 39999973.379841662943 -2.3096799850463867188e-07 +Base_StdPar-default 39999973.379841439426 -7.4505805969238281250e-09 +Lambda_StdPar-default 39999973.379841439426 -7.4505805969238281250e-09 + +------------------------------------------------------- +Algorithm_REDUCE_SUM +........................................................ +RAJA_Seq-default 268294.10758353886195 1.5483237802982330322e-08 + # Intel + cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-unused-parameter -Wno-deprecated-volatile -tbb" -DENABLE_STDPAR=1 && make -j8 From a85a5c61438e1037690bf90d1ebab5b4d94618f7 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 17:31:48 +0300 Subject: [PATCH 023/174] NVC++ GPU fails here --- src/apps/HALOEXCHANGE-StdPar.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/apps/HALOEXCHANGE-StdPar.cpp b/src/apps/HALOEXCHANGE-StdPar.cpp index b8564868e..d2cd73794 100644 --- a/src/apps/HALOEXCHANGE-StdPar.cpp +++ b/src/apps/HALOEXCHANGE-StdPar.cpp @@ -38,7 +38,7 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, + std::for_each( //std::execution::par_unseq, begin, end, [=](Index_type l) { Real_ptr buffer = buffers[l]; @@ -53,7 +53,7 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) } }); - std::for_each( std::execution::par_unseq, + std::for_each( //std::execution::par_unseq, begin, end, [=](Index_type l) { Real_ptr buffer = buffers[l]; @@ -79,9 +79,9 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type l) { + std::for_each( //std::execution::par_unseq, + begin, end, + [=](Index_type l) { Real_ptr buffer = buffers[l]; Int_ptr list = pack_index_lists[l]; Index_type len = pack_index_list_lengths[l]; @@ -97,9 +97,9 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) } }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type l) { + std::for_each( //std::execution::par_unseq, + begin, end, + [=](Index_type l) { Real_ptr buffer = buffers[l]; Int_ptr list = unpack_index_lists[l]; Index_type len = unpack_index_list_lengths[l]; From fe8f91691a55b5cbd34ef94ff0d681a9d3616b43 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 17:33:09 +0300 Subject: [PATCH 024/174] more errata --- README.stdpar | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.stdpar b/README.stdpar index 94d12d2b7..a9d058263 100644 --- a/README.stdpar +++ b/README.stdpar @@ -6,6 +6,8 @@ cmake .. -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_CXX_FLAG cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 -stdpar=multicore -tp=haswell" -DENABLE_STDPAR=1 && make -j8 +## CPU + ^ SORTPAIRS fails with sizefact 0.1 when _all_ kernels are run but not when SORTPAIRS or Algorithm are run by themselves Just disable the lambda one I guess... @@ -58,6 +60,24 @@ Algorithm_REDUCE_SUM ........................................................ RAJA_Seq-default 268294.10758353886195 1.5483237802982330322e-08 +## GPU + +Just disable parallel execution here... + +[ 99%] Linking CXX executable ../bin/raja-perf.exe +nvlink error : Undefined reference to '_ZSt28__throw_bad_array_new_lengthv' in '../lib/libapps.a:HALOEXCHANGE-StdPar.cpp.o' +pgacclnk: child process exit status 2: /opt/nvidia/hpc_sdk/Linux_x86_64/22.5/compilers/bin/tools/nvdd +make[2]: *** [src/CMakeFiles/raja-perf.exe.dir/build.make:109: bin/raja-perf.exe] Error 2 +make[1]: *** [CMakeFiles/Makefile2:1393: src/CMakeFiles/raja-perf.exe.dir/all] Error 2 +make[1]: *** Waiting for unfinished jobs.... + +Exclude this one until fixed... + + Running Base_StdPar variant +terminate called after throwing an instance of 'thrust::system::system_error' + what(): for_each: failed to synchronize: cudaErrorIllegalAddress: an illegal memory access was encountered +Aborted (core dumped) + # Intel cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-unused-parameter -Wno-deprecated-volatile -tbb" -DENABLE_STDPAR=1 && make -j8 From f56c87a219a6e70fd31806d1c50450cdb43987df Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 17:41:14 +0300 Subject: [PATCH 025/174] all the erratum --- README.stdpar | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/README.stdpar b/README.stdpar index a9d058263..19abe0f30 100644 --- a/README.stdpar +++ b/README.stdpar @@ -78,6 +78,28 @@ terminate called after throwing an instance of 'thrust::system::system_error' what(): for_each: failed to synchronize: cudaErrorIllegalAddress: an illegal memory access was encountered Aborted (core dumped) +Ouch... + +------------------------------------------------------- +Polybench_FLOYD_WARSHALL +........................................................ +Base_Seq-default -166623.06893187693646 0.0000000000000000000 +Lambda_Seq-default -166623.06893187693646 0.0000000000000000000 +RAJA_Seq-default -166623.06893187693646 0.0000000000000000000 +Base_StdPar-default -172966.42970694099014 6343.3607750640536835 +Lambda_StdPar-default -170706.96338200639781 4083.8944501294613474 + +Lambda_Seq has the bug too so just disable the Lambda versions... + +------------------------------------------------------- +Basic_MAT_MAT_SHARED +........................................................ +Base_Seq-default 1136.6199452543779141 0.0000000000000000000 +Lambda_Seq-default -6.0464819976872759102e+32 6.0464819976872759102e+32 +RAJA_Seq-default 1136.6199452543779141 0.0000000000000000000 +Base_StdPar-default 1136.6199452543779141 0.0000000000000000000 +Lambda_StdPar-default -6.0464819976872759102e+32 6.0464819976872759102e+32 + # Intel cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-unused-parameter -Wno-deprecated-volatile -tbb" -DENABLE_STDPAR=1 && make -j8 From ccbd4f03896314862b01bfc3547e0b16fee66545 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 20:12:50 +0300 Subject: [PATCH 026/174] pointer to atomic for GPU --- src/basic/PI_ATOMIC-StdPar.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/basic/PI_ATOMIC-StdPar.cpp b/src/basic/PI_ATOMIC-StdPar.cpp index d73f13814..44925913d 100644 --- a/src/basic/PI_ATOMIC-StdPar.cpp +++ b/src/basic/PI_ATOMIC-StdPar.cpp @@ -48,14 +48,16 @@ void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - myAtomic a_pi{m_pi_init}; + //myAtomic a_pi{m_pi_init}; + myAtomic * a_pi = new myAtomic; // i hate this + *a_pi = m_pi_init; std::for_each( std::execution::par_unseq, begin, end, - [=,&a_pi](Index_type i) { + [=](Index_type i) { double x = (double(i) + 0.5) * dx; - a_pi = a_pi + dx / (1.0 + x * x); + *a_pi = *a_pi + dx / (1.0 + x * x); }); - *pi = a_pi * 4.0; + *pi = *a_pi * 4.0; } stopTimer(); From 51cbb35fc62622308f7f84c12aea03b5f7330c3a Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 20:14:41 +0300 Subject: [PATCH 027/174] update PI_ATOMIC --- README.stdpar | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.stdpar b/README.stdpar index 19abe0f30..697e27a22 100644 --- a/README.stdpar +++ b/README.stdpar @@ -71,7 +71,7 @@ make[2]: *** [src/CMakeFiles/raja-perf.exe.dir/build.make:109: bin/raja-perf.exe make[1]: *** [CMakeFiles/Makefile2:1393: src/CMakeFiles/raja-perf.exe.dir/all] Error 2 make[1]: *** Waiting for unfinished jobs.... -Exclude this one until fixed... +PI_ATOMIC is fixed by allocating on the heap... Running Base_StdPar variant terminate called after throwing an instance of 'thrust::system::system_error' From 790d74e3e95ed3fef9f685b19e9e37b22e52953e Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 20:26:13 +0300 Subject: [PATCH 028/174] fix this one - nested way faster on GPU --- .../POLYBENCH_FLOYD_WARSHALL-StdPar.cpp | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp index 4b2a44daa..0c197f2ca 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp @@ -14,7 +14,7 @@ #include -//#define USE_STDPAR_COLLAPSE 1 +#define USE_STDPAR_COLLAPSE 1 namespace rajaperf { @@ -44,25 +44,25 @@ void POLYBENCH_FLOYD_WARSHALL::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + for (Index_type k = 0; k < N; ++k) { #ifdef USE_STDPAR_COLLAPSE std::for_each( std::execution::par_unseq, - begin2, end2, [=](Index_type ki) { - const auto k = ki / N; - const auto i = ki % N; + begin2, end2, [=](Index_type ji) { + const auto j = ji / N; + const auto i = ji % N; #else std::for_each( std::execution::par_unseq, begin, end, - [=](Index_type k) { - std::for_each(begin, end, - [=](Index_type i) { + [=](Index_type i) { + std::for_each( begin, end, + [=](Index_type j) { #endif - for (Index_type j = 0; j < N; ++j) { POLYBENCH_FLOYD_WARSHALL_BODY; - } + }); #ifndef USE_STDPAR_COLLAPSE }); #endif - }); + } } stopTimer(); @@ -80,25 +80,25 @@ void POLYBENCH_FLOYD_WARSHALL::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + for (Index_type k = 0; k < N; ++k) { #ifdef USE_STDPAR_COLLAPSE std::for_each( std::execution::par_unseq, - begin2, end2, [=](Index_type ki) { - const auto k = ki / N; - const auto i = ki % N; + begin2, end2, [=](Index_type ji) { + const auto j = ji / N; + const auto i = ji % N; #else std::for_each( std::execution::par_unseq, begin, end, - [=](Index_type k) { - std::for_each(begin, end, - [=](Index_type i) { + [=](Index_type i) { + std::for_each( begin, end, + [=](Index_type j) { #endif - for (Index_type j = 0; j < N; ++j) { poly_floydwarshall_base_lam(k, i, j); - } -#ifndef USE_STDPAR_COLLAPSE }); -#endif +#ifndef USE_STDPAR_COLLAPSE }); +#endif + } } stopTimer(); From 2cc38e718cdfaa6842b7fc5b40af09711c7880bc Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 20:26:44 +0300 Subject: [PATCH 029/174] fix this one - nested way faster on GPU --- README.stdpar | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/README.stdpar b/README.stdpar index 697e27a22..a5b81beca 100644 --- a/README.stdpar +++ b/README.stdpar @@ -78,17 +78,6 @@ terminate called after throwing an instance of 'thrust::system::system_error' what(): for_each: failed to synchronize: cudaErrorIllegalAddress: an illegal memory access was encountered Aborted (core dumped) -Ouch... - -------------------------------------------------------- -Polybench_FLOYD_WARSHALL -........................................................ -Base_Seq-default -166623.06893187693646 0.0000000000000000000 -Lambda_Seq-default -166623.06893187693646 0.0000000000000000000 -RAJA_Seq-default -166623.06893187693646 0.0000000000000000000 -Base_StdPar-default -172966.42970694099014 6343.3607750640536835 -Lambda_StdPar-default -170706.96338200639781 4083.8944501294613474 - Lambda_Seq has the bug too so just disable the Lambda versions... ------------------------------------------------------- From 1a77c06172b6255102b31b1dbd924985dc93211c Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 20:36:35 +0300 Subject: [PATCH 030/174] remove RAJA_StdPar --- src/basic/MAT_MAT_SHARED-StdPar.cpp | 94 +---------------------------- 1 file changed, 2 insertions(+), 92 deletions(-) diff --git a/src/basic/MAT_MAT_SHARED-StdPar.cpp b/src/basic/MAT_MAT_SHARED-StdPar.cpp index 6aa32ea3b..cc211b719 100644 --- a/src/basic/MAT_MAT_SHARED-StdPar.cpp +++ b/src/basic/MAT_MAT_SHARED-StdPar.cpp @@ -36,7 +36,7 @@ void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type by = 0; by < Ny; ++by) { for (Index_type bx = 0; bx < Nx; ++bx) { - MAT_MAT_SHARED_BODY_0(TL_SZ) + MAT_MAT_SHARED_BODY_0(TL_SZ) for (Index_type ty = 0; ty < TL_SZ; ++ty) { for (Index_type tx = 0; tx < TL_SZ; ++tx) { @@ -81,6 +81,7 @@ void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) auto outer_y = [&](Index_type by) { auto outer_x = [&](Index_type bx) { + MAT_MAT_SHARED_BODY_0(TL_SZ) auto inner_y_1 = [&](Index_type ty) { @@ -152,97 +153,6 @@ void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_Sq: { - - using launch_policy = RAJA::expt::LaunchPolicy; - - using outer_x = RAJA::expt::LoopPolicy; - - using outer_y = RAJA::expt::LoopPolicy; - - using inner_x = RAJA::expt::LoopPolicy; - - using inner_y = RAJA::expt::LoopPolicy; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - //Grid is empty as the host does not need a compute grid to be specified - RAJA::expt::launch(RAJA::expt::Grid(), - [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, Ny), - [&](Index_type by) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, Nx), - [&](Index_type bx) { - - MAT_MAT_SHARED_BODY_0(TL_SZ) - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), - - [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), - [&](Index_type tx) { - MAT_MAT_SHARED_BODY_1(TL_SZ) - } - ); // RAJA::expt::loop - } - ); // RAJA::expt::loop - - for (Index_type k = 0; k < (TL_SZ + N - 1) / TL_SZ; k++) { - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), - [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), - [&](Index_type tx) { - MAT_MAT_SHARED_BODY_2(TL_SZ) - } - ); // RAJA::expt::loop - } - ); // RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), - [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), - [&](Index_type tx) { - MAT_MAT_SHARED_BODY_3(TL_SZ) - } - ); // RAJA::expt::loop - } - ); // RAJA::expt::loop - - ctx.teamSync(); - - } // for (k) - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), - [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), - [&](Index_type tx) { - MAT_MAT_SHARED_BODY_4(TL_SZ) - } - ); // RAJA::expt::loop - } - ); // RAJA::expt::loop - - } // lambda (bx) - ); // RAJA::expt::loop - } // lambda (by) - ); // RAJA::expt::loop - - } // outer lambda (ctx) - ); // RAJA::expt::launch - - } // loop over kernel reps - stopTimer(); - - break; - } -#endif - default: { getCout() << "\n MAT_MAT_SHARED : Unknown variant id = " << vid << std::endl; From c986bf94a7de65231b1b3c6bfa517cd2a179d5cc Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Mon, 11 Jul 2022 23:22:11 -0700 Subject: [PATCH 031/174] disable Lambda_StdPar; use par not par_unseq --- src/basic/PI_ATOMIC-StdPar.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/basic/PI_ATOMIC-StdPar.cpp b/src/basic/PI_ATOMIC-StdPar.cpp index 44925913d..fe7eeb599 100644 --- a/src/basic/PI_ATOMIC-StdPar.cpp +++ b/src/basic/PI_ATOMIC-StdPar.cpp @@ -51,7 +51,7 @@ void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) //myAtomic a_pi{m_pi_init}; myAtomic * a_pi = new myAtomic; // i hate this *a_pi = m_pi_init; - std::for_each( std::execution::par_unseq, + std::for_each( std::execution::par, begin, end, [=](Index_type i) { double x = (double(i) + 0.5) * dx; @@ -65,9 +65,10 @@ void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) break; } +#if 0 case Lambda_StdPar : { - auto piatomic_base_lam = [=](Index_type i, myAtomic &a_pi) { + auto piatomic_base_lam = [=](Index_type i, myAtomic * a_pi) { double x = (double(i) + 0.5) * dx; a_pi = a_pi + dx / (1.0 + x * x); }; @@ -75,17 +76,22 @@ void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - myAtomic a_pi{m_pi_init}; - for (Index_type i = ibegin; i < iend; ++i ) { + //myAtomic a_pi{m_pi_init}; + myAtomic * a_pi = new myAtomic; // i hate this + *a_pi = m_pi_init; + std::for_each( std::execution::par, + begin, end, + [=](Index_type i) { piatomic_base_lam(i,a_pi); - } - *pi = a_pi * 4.0; + }); + *pi = *a_pi * 4.0; } stopTimer(); break; } +#endif default : { getCout() << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; From 563c969d1c250a32e9852f27a30aec51a432ab11 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Mon, 11 Jul 2022 23:22:39 -0700 Subject: [PATCH 032/174] enable par_unseq even though it is slower --- src/apps/HALOEXCHANGE-StdPar.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/apps/HALOEXCHANGE-StdPar.cpp b/src/apps/HALOEXCHANGE-StdPar.cpp index d2cd73794..8a7f36032 100644 --- a/src/apps/HALOEXCHANGE-StdPar.cpp +++ b/src/apps/HALOEXCHANGE-StdPar.cpp @@ -38,9 +38,9 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( //std::execution::par_unseq, - begin, end, - [=](Index_type l) { + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type l) { Real_ptr buffer = buffers[l]; Int_ptr list = pack_index_lists[l]; Index_type len = pack_index_list_lengths[l]; @@ -53,9 +53,9 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) } }); - std::for_each( //std::execution::par_unseq, - begin, end, - [=](Index_type l) { + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type l) { Real_ptr buffer = buffers[l]; Int_ptr list = unpack_index_lists[l]; Index_type len = unpack_index_list_lengths[l]; @@ -79,7 +79,7 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( //std::execution::par_unseq, + std::for_each( std::execution::par_unseq, begin, end, [=](Index_type l) { Real_ptr buffer = buffers[l]; @@ -97,7 +97,7 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) } }); - std::for_each( //std::execution::par_unseq, + std::for_each( std::execution::par_unseq, begin, end, [=](Index_type l) { Real_ptr buffer = buffers[l]; From ea4bccb1292e6e045021c85f2570d733376269c7 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Mon, 11 Jul 2022 23:22:56 -0700 Subject: [PATCH 033/174] disable Lambda_StdPar --- src/basic/PI_ATOMIC.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/basic/PI_ATOMIC.cpp b/src/basic/PI_ATOMIC.cpp index 6a15d4784..35ee2d502 100644 --- a/src/basic/PI_ATOMIC.cpp +++ b/src/basic/PI_ATOMIC.cpp @@ -55,7 +55,7 @@ PI_ATOMIC::PI_ATOMIC(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Base_StdPar ); - setVariantDefined( Lambda_StdPar ); + //setVariantDefined( Lambda_StdPar ); setVariantDefined( Kokkos_Lambda ); } From 2d54a4ba94957c1eeea856c2a2e3c695085be633 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Mon, 11 Jul 2022 23:23:10 -0700 Subject: [PATCH 034/174] move collapse choice here --- src/common/StdParUtils.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/common/StdParUtils.hpp b/src/common/StdParUtils.hpp index 26c65c84b..7775360bc 100644 --- a/src/common/StdParUtils.hpp +++ b/src/common/StdParUtils.hpp @@ -28,6 +28,10 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#if defined(__NVCOMPILER_CUDA__) || defined(_NVHPC_STDPAR_CUDA) +#define USE_STDPAR_COLLAPSE +#endif + // This implementation was authored by David Olsen #include From d9a94c21161e502717d92eec3b8f89bf500345fb Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 00:34:43 -0700 Subject: [PATCH 035/174] partially implement SORTPAIRS with StdPar (GPU issues) --- src/algorithm/SORTPAIRS-StdPar.cpp | 39 ++++++++++++++++++++---------- src/common/StdParUtils.hpp | 1 + 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp index cad571d4a..c01a5f093 100644 --- a/src/algorithm/SORTPAIRS-StdPar.cpp +++ b/src/algorithm/SORTPAIRS-StdPar.cpp @@ -30,8 +30,8 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - //auto begin = counting_iterator(ibegin); - //auto end = counting_iterator(iend); + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); SORTPAIRS_DATA_SETUP; @@ -47,20 +47,33 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) std::vector vector_of_pairs; vector_of_pairs.reserve(iend-ibegin); - for (Index_type iemp = ibegin; iemp < iend; ++iemp) { + //for (Index_type iemp = ibegin; iemp < iend; ++iemp) { + std::for_each( +#ifndef NVCXX_GPU_ENABLED + std::execution::par_unseq, +#endif + begin,end, + [=,&vector_of_pairs](Index_type iemp) noexcept { vector_of_pairs.emplace_back(x[iend*irep + iemp], i[iend*irep + iemp]); - } - - std::sort(vector_of_pairs.begin(), vector_of_pairs.end(), - [](pair_type const& lhs, pair_type const& rhs) { - return lhs.first < rhs.first; - }); - - for (Index_type iemp = ibegin; iemp < iend; ++iemp) { - pair_type& pair = vector_of_pairs[iemp - ibegin]; + }); + + std::sort( std::execution::par_unseq, + vector_of_pairs.begin(), vector_of_pairs.end(), + [](pair_type const& lhs, pair_type const& rhs) { + return lhs.first < rhs.first; + }); + + //for (Index_type iemp = ibegin; iemp < iend; ++iemp) { + std::for_each( +#ifndef NVCXX_GPU_ENABLED + std::execution::par_unseq, +#endif + begin,end, + [=](Index_type iemp) { + const pair_type &pair = vector_of_pairs[iemp - ibegin]; x[iend*irep + iemp] = pair.first; i[iend*irep + iemp] = pair.second; - } + }); } stopTimer(); diff --git a/src/common/StdParUtils.hpp b/src/common/StdParUtils.hpp index 7775360bc..7f207e011 100644 --- a/src/common/StdParUtils.hpp +++ b/src/common/StdParUtils.hpp @@ -30,6 +30,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__NVCOMPILER_CUDA__) || defined(_NVHPC_STDPAR_CUDA) #define USE_STDPAR_COLLAPSE +#define NVCXX_GPU_ENABLED #endif // This implementation was authored by David Olsen From 6f1fbbdc7fc9f8644d74beef2e27628c15f95085 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 00:35:03 -0700 Subject: [PATCH 036/174] partially implement SORTPAIRS with StdPar (GPU issues) --- README.stdpar | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README.stdpar b/README.stdpar index a5b81beca..9d9067e90 100644 --- a/README.stdpar +++ b/README.stdpar @@ -89,6 +89,18 @@ RAJA_Seq-default 1136.6199452543779141 0.0000000000000000000 Base_StdPar-default 1136.6199452543779141 0.0000000000000000000 Lambda_StdPar-default -6.0464819976872759102e+32 6.0464819976872759102e+32 +SORTPAIRS emplace_back not supported on GPU... + +nvlink error : Undefined reference to '_ZSt20__throw_length_errorPKc' in '../lib/libalgorithm.a:SORTPAIRS-StdPar.cpp.o' +nvlink error : Undefined reference to '_ZSt20__throw_length_errorPKc' in '../lib/libalgorithm.a:SORTPAIRS-StdPar.cpp.o' + +SORTPAIRS write out to {x,i} bad... + + Running Base_StdPar variant +terminate called after throwing an instance of 'thrust::system::system_error' + what(): for_each: failed to synchronize: cudaErrorIllegalAddress: an illegal memory access was encountered +Aborted (core dumped) + # Intel cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-unused-parameter -Wno-deprecated-volatile -tbb" -DENABLE_STDPAR=1 && make -j8 From 3f39c8b32d0d61fd4d3528d1bb4bacc6cc85f621 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 00:36:12 -0700 Subject: [PATCH 037/174] bring USE_STDPAR_COLLAPSE into common header --- src/polybench/POLYBENCH_2MM-StdPar.cpp | 2 +- src/polybench/POLYBENCH_3MM-StdPar.cpp | 2 +- src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp | 2 +- src/polybench/POLYBENCH_GEMM-StdPar.cpp | 2 +- src/polybench/POLYBENCH_GEMVER-StdPar.cpp | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/polybench/POLYBENCH_2MM-StdPar.cpp b/src/polybench/POLYBENCH_2MM-StdPar.cpp index 20ad50043..e067a9842 100644 --- a/src/polybench/POLYBENCH_2MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_2MM-StdPar.cpp @@ -14,7 +14,7 @@ #include -#define USE_STDPAR_COLLAPSE 1 +//#define USE_STDPAR_COLLAPSE 1 namespace rajaperf { diff --git a/src/polybench/POLYBENCH_3MM-StdPar.cpp b/src/polybench/POLYBENCH_3MM-StdPar.cpp index ba0df5bb0..00f431291 100644 --- a/src/polybench/POLYBENCH_3MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_3MM-StdPar.cpp @@ -14,7 +14,7 @@ #include -#define USE_STDPAR_COLLAPSE 1 +//#define USE_STDPAR_COLLAPSE 1 namespace rajaperf { diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp index 0c197f2ca..c3d1e8b15 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp @@ -14,7 +14,7 @@ #include -#define USE_STDPAR_COLLAPSE 1 +//#define USE_STDPAR_COLLAPSE 1 namespace rajaperf { diff --git a/src/polybench/POLYBENCH_GEMM-StdPar.cpp b/src/polybench/POLYBENCH_GEMM-StdPar.cpp index 5d644def0..58a7f11ec 100644 --- a/src/polybench/POLYBENCH_GEMM-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMM-StdPar.cpp @@ -14,7 +14,7 @@ #include -#define USE_STDPAR_COLLAPSE 1 +//#define USE_STDPAR_COLLAPSE 1 namespace rajaperf { diff --git a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp index 7a51496f9..ed94308d8 100644 --- a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp @@ -14,7 +14,7 @@ #include -#define USE_STDPAR_COLLAPSE 1 +//#define USE_STDPAR_COLLAPSE 1 namespace rajaperf { From 6548d9c52c8c718d3973f2973d3743e63928e613 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 01:01:01 -0700 Subject: [PATCH 038/174] add StdPar impl using std::reduce --- src/algorithm/REDUCE_SUM-StdPar.cpp | 14 ++++++-------- src/algorithm/REDUCE_SUM.cpp | 3 +++ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/algorithm/REDUCE_SUM-StdPar.cpp b/src/algorithm/REDUCE_SUM-StdPar.cpp index 08cbd206b..c2605250a 100644 --- a/src/algorithm/REDUCE_SUM-StdPar.cpp +++ b/src/algorithm/REDUCE_SUM-StdPar.cpp @@ -41,10 +41,9 @@ void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune Real_type sum = m_sum_init; -#warning needs parallel reduce - for (Index_type i = ibegin; i < iend; ++i ) { - REDUCE_SUM_BODY; - } + sum += std::reduce( std::execution::par_unseq, + x+ibegin, x+iend, + Real_type(0), std::plus() ); m_sum = sum; @@ -65,10 +64,9 @@ void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune Real_type sum = m_sum_init; -#warning needs parallel reduce - for (Index_type i = ibegin; i < iend; ++i ) { - sum += reduce_sum_base_lam(i); - } + sum += std::transform_reduce( std::execution::par_unseq, + begin, end, + Real_type(0), std::plus(), reduce_sum_base_lam); m_sum = sum; diff --git a/src/algorithm/REDUCE_SUM.cpp b/src/algorithm/REDUCE_SUM.cpp index f85b982f6..24cc657e6 100644 --- a/src/algorithm/REDUCE_SUM.cpp +++ b/src/algorithm/REDUCE_SUM.cpp @@ -51,6 +51,9 @@ REDUCE_SUM::REDUCE_SUM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + //setVariantDefined( Lambda_StdPar ); // exists but is not interesting } REDUCE_SUM::~REDUCE_SUM() From 33bb44ebf1bf5d96db05d45712dff3d9f4c70df6 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 01:11:28 -0700 Subject: [PATCH 039/174] add comment why GPU disabled --- src/algorithm/SORTPAIRS-StdPar.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp index c01a5f093..6650aa2f6 100644 --- a/src/algorithm/SORTPAIRS-StdPar.cpp +++ b/src/algorithm/SORTPAIRS-StdPar.cpp @@ -50,6 +50,7 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) //for (Index_type iemp = ibegin; iemp < iend; ++iemp) { std::for_each( #ifndef NVCXX_GPU_ENABLED +// GPU implementation crashes std::execution::par_unseq, #endif begin,end, @@ -66,6 +67,7 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) //for (Index_type iemp = ibegin; iemp < iend; ++iemp) { std::for_each( #ifndef NVCXX_GPU_ENABLED +// GPU implementation crashes std::execution::par_unseq, #endif begin,end, From 0cd37a38547c81a8a29ccda4344b31f619e02430 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 01:11:47 -0700 Subject: [PATCH 040/174] add SCAN StdPar but wrong on GPU??? --- src/algorithm/SCAN-StdPar.cpp | 11 ++++++----- src/algorithm/SCAN.cpp | 2 ++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/algorithm/SCAN-StdPar.cpp b/src/algorithm/SCAN-StdPar.cpp index 5e6638e4b..c421c8a65 100644 --- a/src/algorithm/SCAN-StdPar.cpp +++ b/src/algorithm/SCAN-StdPar.cpp @@ -36,11 +36,12 @@ void SCAN::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { -#warning needs parallel scan - SCAN_PROLOGUE; - for (Index_type i = ibegin; i < iend; ++i ) { - SCAN_BODY; - } + std::exclusive_scan( +#ifndef NVCXX_GPU_ENABLED +// GPU implementation is wrong + std::execution::par_unseq, +#endif + x+ibegin, x+iend, y, (Real_type)0 ); } stopTimer(); diff --git a/src/algorithm/SCAN.cpp b/src/algorithm/SCAN.cpp index 7a4d9091c..d9ae2044d 100644 --- a/src/algorithm/SCAN.cpp +++ b/src/algorithm/SCAN.cpp @@ -55,6 +55,8 @@ SCAN::SCAN(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); } SCAN::~SCAN() From 4f8e7aba09ae9fa972b913a245d8fce8eec5ce24 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 01:18:15 -0700 Subject: [PATCH 041/174] start working on INDEXLIST StdPar --- src/basic/INDEXLIST-StdPar.cpp | 7 ++++++- src/basic/INDEXLIST.cpp | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/basic/INDEXLIST-StdPar.cpp b/src/basic/INDEXLIST-StdPar.cpp index f2b8cb828..51f29f220 100644 --- a/src/basic/INDEXLIST-StdPar.cpp +++ b/src/basic/INDEXLIST-StdPar.cpp @@ -38,10 +38,15 @@ void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ Index_type count = 0; +#if 0 #warning needs parallel inscan for (Index_type i = ibegin; i < iend; ++i ) { - INDEXLIST_BODY; + if ( x[i] < 0.0 ) { + list[count++] = i; + } } +#else +#endif m_len = count; diff --git a/src/basic/INDEXLIST.cpp b/src/basic/INDEXLIST.cpp index df523fbf6..c369f5ad8 100644 --- a/src/basic/INDEXLIST.cpp +++ b/src/basic/INDEXLIST.cpp @@ -49,6 +49,8 @@ INDEXLIST::INDEXLIST(const RunParams& params) setVariantDefined( Base_CUDA ); setVariantDefined( Base_HIP ); + + //setVariantDefined( Base_StdPar ); } INDEXLIST::~INDEXLIST() From c7c292b4bc2c364099ee13e9062fe3af6c875648 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 01:48:03 -0700 Subject: [PATCH 042/174] implement DAXPY_ATOMIC StdPar using a variety of atomics, because C++20 atomic_ref is not widely available --- src/basic/DAXPY_ATOMIC-StdPar.cpp | 35 ++++++++++++++++++++++++------- src/basic/DAXPY_ATOMIC.cpp | 3 +++ 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/basic/DAXPY_ATOMIC-StdPar.cpp b/src/basic/DAXPY_ATOMIC-StdPar.cpp index 911e8de6e..ba7421545 100644 --- a/src/basic/DAXPY_ATOMIC-StdPar.cpp +++ b/src/basic/DAXPY_ATOMIC-StdPar.cpp @@ -13,6 +13,12 @@ #include "common/StdParUtils.hpp" #include +#include + +#if defined(NVCXX_GPU_ENABLED) +// this is required to get NVC++ to compile CUDA atomics in StdPar +#include +#endif namespace rajaperf { @@ -41,9 +47,23 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu for (RepIndex_type irep = 0; irep < run_reps; ++irep) { std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { - DAXPY_ATOMIC_BODY; + begin, end, + [=](Index_type i) { +#if __cpp_lib_atomic_ref + auto px = std::atomic_ref(&x[i]); + auto py = std::atomic_ref(&y[i]); + py += a * px; +#elif defined(_OPENMP) + #pragma omp atomic + y[i] += a * x[i]; +#elif defined(_OPENACC) + #pragma acc atomic + y[i] += a * x[i]; +#elif defined(NVCXX_GPU_ENABLED) + atomicaddd(&y[i],a * x[i]); +#else +#error No atomic +#endif }); } @@ -55,15 +75,16 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu case Lambda_StdPar : { auto daxpy_atomic_lam = [=](Index_type i) { - DAXPY_ATOMIC_BODY; - }; + #pragma omp atomic + y[i] += a * x[i] ; + }; startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + begin, end, + [=](Index_type i) { daxpy_atomic_lam(i); }); diff --git a/src/basic/DAXPY_ATOMIC.cpp b/src/basic/DAXPY_ATOMIC.cpp index 200df93db..111010c36 100644 --- a/src/basic/DAXPY_ATOMIC.cpp +++ b/src/basic/DAXPY_ATOMIC.cpp @@ -52,6 +52,9 @@ DAXPY_ATOMIC::DAXPY_ATOMIC(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + //setVariantDefined( Lambda_StdPar ); + setVariantDefined( Kokkos_Lambda ); } From d4f47a5cbfbfebd4fc655167152e757f44cae531 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 02:07:14 -0700 Subject: [PATCH 043/174] use std:: not RAJA min/max --- src/basic/REDUCE3_INT-StdPar.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/basic/REDUCE3_INT-StdPar.cpp b/src/basic/REDUCE3_INT-StdPar.cpp index b2ada68e7..c1de02c72 100644 --- a/src/basic/REDUCE3_INT-StdPar.cpp +++ b/src/basic/REDUCE3_INT-StdPar.cpp @@ -44,8 +44,8 @@ void REDUCE3_INT::runStdParVariant(VariantID vid, size_t tune_idx) typedef std::array Reduce_type; Reduce_type result = std::transform_reduce( std::execution::par_unseq, - begin, end, - Reduce_type{m_vsum_init,m_vmin_init,m_vmax_init}, + begin, end, + Reduce_type{m_vsum_init,m_vmin_init,m_vmax_init}, [=](Reduce_type a, Reduce_type b) -> Reduce_type { auto plus = a[0] + b[0]; auto min = std::min(a[1],b[1]); @@ -61,8 +61,8 @@ void REDUCE3_INT::runStdParVariant(VariantID vid, size_t tune_idx) ); m_vsum += result[0]; - m_vmin = RAJA_MIN(m_vmin, result[1]); - m_vmax = RAJA_MAX(m_vmax, result[2]); + m_vmin = std::min(m_vmin, result[1]); + m_vmax = std::max(m_vmax, result[2]); } stopTimer(); @@ -85,13 +85,13 @@ void REDUCE3_INT::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type i = ibegin; i < iend; ++i ) { vsum += init3_base_lam(i); - vmin = RAJA_MIN(vmin, init3_base_lam(i)); - vmax = RAJA_MAX(vmax, init3_base_lam(i)); + vmin = std::min(vmin, init3_base_lam(i)); + vmax = std::max(vmax, init3_base_lam(i)); } m_vsum += vsum; - m_vmin = RAJA_MIN(m_vmin, vmin); - m_vmax = RAJA_MAX(m_vmax, vmax); + m_vmin = std::min(m_vmin, vmin); + m_vmax = std::max(m_vmax, vmax); } stopTimer(); From 16c209946a6501bc6a34ad0fc1605641d4847401 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 02:07:29 -0700 Subject: [PATCH 044/174] implement REDUCE_STRUCT Base_StdPar --- src/basic/REDUCE_STRUCT-StdPar.cpp | 52 ++++++++++++++++++++++++------ src/basic/REDUCE_STRUCT.cpp | 3 ++ 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/src/basic/REDUCE_STRUCT-StdPar.cpp b/src/basic/REDUCE_STRUCT-StdPar.cpp index ee94e542c..2be2360ad 100644 --- a/src/basic/REDUCE_STRUCT-StdPar.cpp +++ b/src/basic/REDUCE_STRUCT-StdPar.cpp @@ -45,15 +45,47 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t Real_type xmin = m_init_min; Real_type ymin = m_init_min; Real_type xmax = m_init_max; Real_type ymax = m_init_max; +#if 0 #warning needs parallel for (Index_type i = ibegin; i < iend; ++i ) { - xsum += points.x[i] ; \ - xmin = RAJA_MIN(xmin, points.x[i]) ; \ - xmax = RAJA_MAX(xmax, points.x[i]) ; \ - ysum += points.y[i] ; \ - ymin = RAJA_MIN(ymin, points.y[i]) ; \ - ymax = RAJA_MAX(ymax, points.y[i]) ; + xsum += points.x[i] ; + xmin = std::min(xmin, points.x[i]) ; + xmax = std::max(xmax, points.x[i]) ; + ysum += points.y[i] ; + ymin = std::min(ymin, points.y[i]) ; + ymax = std::max(ymax, points.y[i]) ; } +#else + using Reduce_type = std::array; + Reduce_type result = + std::transform_reduce( std::execution::par_unseq, + begin, end, + Reduce_type{ m_init_sum, m_init_min, m_init_max, // x + m_init_sum, m_init_min, m_init_max }, // y + [=](Reduce_type a, Reduce_type b) -> Reduce_type { + auto xsum = a[0] + b[0]; + auto xmin = std::min(a[1],b[1]); + auto xmax = std::max(a[2],b[2]); + auto ysum = a[3] + b[3]; + auto ymin = std::min(a[4],b[4]); + auto ymax = std::max(a[5],b[5]); + Reduce_type red{ xsum, xmin, xmax, ysum, ymin, ymax }; + return red; + }, + [=](Index_type i) -> Reduce_type { + Reduce_type val{ points.x[i], points.x[i], points.x[i], + points.y[i], points.y[i], points.y[i] }; + return val; + + } + ); +#endif + xsum = result[0]; + xmin = result[1]; + xmax = result[2]; + ysum = result[3]; + ymin = result[4]; + ymax = result[5]; points.SetCenter(xsum/(points.N), ysum/(points.N)); points.SetXMin(xmin); @@ -88,11 +120,11 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t #warning needs parallel for (Index_type i = ibegin; i < iend; ++i ) { xsum += reduce_struct_x_base_lam(i); - xmin = RAJA_MIN(xmin, reduce_struct_x_base_lam(i)); - xmax = RAJA_MAX(xmax, reduce_struct_x_base_lam(i)); + xmin = std::min(xmin, reduce_struct_x_base_lam(i)); + xmax = std::max(xmax, reduce_struct_x_base_lam(i)); ysum += reduce_struct_y_base_lam(i); - ymin = RAJA_MIN(ymin, reduce_struct_y_base_lam(i)); - ymax = RAJA_MAX(ymax, reduce_struct_y_base_lam(i)); + ymin = std::min(ymin, reduce_struct_y_base_lam(i)); + ymax = std::max(ymax, reduce_struct_y_base_lam(i)); } points.SetCenter(xsum/(points.N), ysum/(points.N)); diff --git a/src/basic/REDUCE_STRUCT.cpp b/src/basic/REDUCE_STRUCT.cpp index d5c33f906..0be2df509 100644 --- a/src/basic/REDUCE_STRUCT.cpp +++ b/src/basic/REDUCE_STRUCT.cpp @@ -56,6 +56,9 @@ REDUCE_STRUCT::REDUCE_STRUCT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + //setVariantDefined( Lambda_StdPar ); } REDUCE_STRUCT::~REDUCE_STRUCT() From 7ece3bf4e76aa861bd22a42740fec725e098b13e Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 02:08:57 -0700 Subject: [PATCH 045/174] remove warning --- src/basic/REDUCE_STRUCT-StdPar.cpp | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/basic/REDUCE_STRUCT-StdPar.cpp b/src/basic/REDUCE_STRUCT-StdPar.cpp index 2be2360ad..b8e15d033 100644 --- a/src/basic/REDUCE_STRUCT-StdPar.cpp +++ b/src/basic/REDUCE_STRUCT-StdPar.cpp @@ -45,17 +45,6 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t Real_type xmin = m_init_min; Real_type ymin = m_init_min; Real_type xmax = m_init_max; Real_type ymax = m_init_max; -#if 0 -#warning needs parallel - for (Index_type i = ibegin; i < iend; ++i ) { - xsum += points.x[i] ; - xmin = std::min(xmin, points.x[i]) ; - xmax = std::max(xmax, points.x[i]) ; - ysum += points.y[i] ; - ymin = std::min(ymin, points.y[i]) ; - ymax = std::max(ymax, points.y[i]) ; - } -#else using Reduce_type = std::array; Reduce_type result = std::transform_reduce( std::execution::par_unseq, @@ -79,7 +68,7 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t } ); -#endif + xsum = result[0]; xmin = result[1]; xmax = result[2]; From 7a58180a9d775472595bf993b2c6d1ff2ec043c3 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 02:44:43 -0700 Subject: [PATCH 046/174] FIRST_MIN Lambda_StdPar unimplemented --- src/lcals/FIRST_MIN-OMP.cpp | 8 ++++---- src/lcals/FIRST_MIN.cpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lcals/FIRST_MIN-OMP.cpp b/src/lcals/FIRST_MIN-OMP.cpp index ef7791739..2b95528c1 100644 --- a/src/lcals/FIRST_MIN-OMP.cpp +++ b/src/lcals/FIRST_MIN-OMP.cpp @@ -36,12 +36,12 @@ void FIRST_MIN::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - #pragma omp declare reduction(minloc : MyMinLoc : \ + //#pragma omp declare reduction(minloc : MyMinLoc : \ omp_out = MinLoc_compare(omp_out, omp_in)) FIRST_MIN_MINLOC_INIT; - #pragma omp parallel for reduction(minloc:mymin) + //#pragma omp parallel for reduction(minloc:mymin) for (Index_type i = ibegin; i < iend; ++i ) { FIRST_MIN_BODY; } @@ -63,12 +63,12 @@ void FIRST_MIN::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - #pragma omp declare reduction(minloc : MyMinLoc : \ + //#pragma omp declare reduction(minloc : MyMinLoc : \ omp_out = MinLoc_compare(omp_out, omp_in)) FIRST_MIN_MINLOC_INIT; - #pragma omp parallel for reduction(minloc:mymin) + //#pragma omp parallel for reduction(minloc:mymin) for (Index_type i = ibegin; i < iend; ++i ) { if ( firstmin_base_lam(i) < mymin.val ) { mymin.val = x[i]; diff --git a/src/lcals/FIRST_MIN.cpp b/src/lcals/FIRST_MIN.cpp index e8825dd17..69778e263 100644 --- a/src/lcals/FIRST_MIN.cpp +++ b/src/lcals/FIRST_MIN.cpp @@ -59,7 +59,7 @@ FIRST_MIN::FIRST_MIN(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Base_StdPar ); - setVariantDefined( Lambda_StdPar ); + //setVariantDefined( Lambda_StdPar ); } FIRST_MIN::~FIRST_MIN() From 5fa774366ea5af0ec6e331e00546ce6f9475e730 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 02:46:47 -0700 Subject: [PATCH 047/174] s/RAJA_MAX/std::max/g --- src/lcals/FIRST_MIN-StdPar.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lcals/FIRST_MIN-StdPar.cpp b/src/lcals/FIRST_MIN-StdPar.cpp index ef6a11c93..10e579197 100644 --- a/src/lcals/FIRST_MIN-StdPar.cpp +++ b/src/lcals/FIRST_MIN-StdPar.cpp @@ -42,7 +42,7 @@ void FIRST_MIN::runStdParVariant(VariantID vid, size_t tune_idx) &x[ibegin], &x[iend]); auto loc = std::distance(&x[ibegin], result); - m_minloc = RAJA_MAX(m_minloc, loc); + m_minloc = std::max(m_minloc, loc); } stopTimer(); @@ -63,12 +63,12 @@ void FIRST_MIN::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type i = ibegin; i < iend; ++i ) { if ( firstmin_base_lam(i) < mymin.val ) { \ - mymin.val = x[i]; \ - mymin.loc = i; \ + mymin.val = x[i]; + mymin.loc = i; } } - m_minloc = RAJA_MAX(m_minloc, mymin.loc); + m_minloc = std::max(m_minloc, mymin.loc); } stopTimer(); From 7ea72bc436945f9108b69d2de9d33a6dbefac316 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 13:20:10 +0300 Subject: [PATCH 048/174] CPU StdPar --- README.stdpar | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.stdpar b/README.stdpar index a5b81beca..0897cedcc 100644 --- a/README.stdpar +++ b/README.stdpar @@ -6,6 +6,8 @@ cmake .. -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_CXX_FLAG cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 -stdpar=multicore -tp=haswell" -DENABLE_STDPAR=1 && make -j8 +cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 --diag_supress=volatile_inc_dec_deprecated -stdpar=gpu -tp=haswell -acc" -DENABLE_STDPAR=1 && make -j8 + ## CPU ^ SORTPAIRS fails with sizefact 0.1 when _all_ kernels are run but not when SORTPAIRS or Algorithm are run by themselves From 013c0ad4b7368892adcadb593e0b417c575d2df3 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 13:40:58 +0300 Subject: [PATCH 049/174] disable atomic_ref --- src/basic/DAXPY_ATOMIC-StdPar.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/basic/DAXPY_ATOMIC-StdPar.cpp b/src/basic/DAXPY_ATOMIC-StdPar.cpp index ba7421545..a2ba4fe28 100644 --- a/src/basic/DAXPY_ATOMIC-StdPar.cpp +++ b/src/basic/DAXPY_ATOMIC-StdPar.cpp @@ -49,7 +49,7 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu std::for_each( std::execution::par_unseq, begin, end, [=](Index_type i) { -#if __cpp_lib_atomic_ref +#if 0 //__cpp_lib_atomic_ref auto px = std::atomic_ref(&x[i]); auto py = std::atomic_ref(&y[i]); py += a * px; From e99f8c342ffff392ede75d69cc637073356760bb Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 13:41:09 +0300 Subject: [PATCH 050/174] CPU info --- README.stdpar | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.stdpar b/README.stdpar index 5efe4a24d..a1996134c 100644 --- a/README.stdpar +++ b/README.stdpar @@ -4,10 +4,11 @@ cmake .. -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_CXX_FLAG # NVC++ -cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 -stdpar=multicore -tp=haswell" -DENABLE_STDPAR=1 && make -j8 +cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 --diag_suppress=volatile_inc_dec_deprecated -stdpar=multicore -tp=haswell" -DENABLE_STDPAR=1 && make -j8 cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 --diag_supress=volatile_inc_dec_deprecated -stdpar=gpu -tp=haswell -acc" -DENABLE_STDPAR=1 && make -j8 + ## CPU ^ SORTPAIRS fails with sizefact 0.1 when _all_ kernels are run but not when SORTPAIRS or Algorithm are run by themselves From 7213acc5207cf8454cd88ccb17e49820c0578b6e Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 04:49:23 -0700 Subject: [PATCH 051/174] fix no GPU StdPar in SCAN --- src/algorithm/SCAN-StdPar.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/algorithm/SCAN-StdPar.cpp b/src/algorithm/SCAN-StdPar.cpp index c421c8a65..0c99ae9d9 100644 --- a/src/algorithm/SCAN-StdPar.cpp +++ b/src/algorithm/SCAN-StdPar.cpp @@ -37,8 +37,10 @@ void SCAN::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { std::exclusive_scan( -#ifndef NVCXX_GPU_ENABLED +#ifdef NVCXX_GPU_ENABLED // GPU implementation is wrong + std::execution::seq, +#else std::execution::par_unseq, #endif x+ibegin, x+iend, y, (Real_type)0 ); From e1791c71b1034ac936d957205ac0f28b71865036 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 04:49:43 -0700 Subject: [PATCH 052/174] Lambda_StdPar HEAT_3D added --- src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp | 36 ++++++++++++++-------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp index 8f9e1bc54..24d0b9afd 100644 --- a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp @@ -93,21 +93,33 @@ void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type t = 0; t < tsteps; ++t) { - for (Index_type i = 1; i < N-1; ++i ) { - for (Index_type j = 1; j < N-1; ++j ) { - for (Index_type k = 1; k < N-1; ++k ) { + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type j) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type k) { poly_heat3d_base_lam1(i, j, k); - } - } - } + }); + }); + }); - for (Index_type i = 1; i < N-1; ++i ) { - for (Index_type j = 1; j < N-1; ++j ) { - for (Index_type k = 1; k < N-1; ++k ) { + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type j) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type k) { poly_heat3d_base_lam2(i, j, k); - } - } - } + }); + }); + }); } From d06f0ced7a63f6f43d026a7d271afa5065052f6d Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 7 Jul 2022 17:25:27 +0300 Subject: [PATCH 053/174] starting over with StdPar because git submodules are trash --- CMakeLists.txt | 6 ++++ src/algorithm/CMakeLists.txt | 2 ++ src/algorithm/MEMCPY.hpp | 1 + src/algorithm/MEMSET.hpp | 1 + src/algorithm/REDUCE_SUM.hpp | 1 + src/algorithm/SCAN.hpp | 1 + src/algorithm/SORT.cpp | 3 ++ src/algorithm/SORT.hpp | 1 + src/algorithm/SORTPAIRS.cpp | 3 ++ src/algorithm/SORTPAIRS.hpp | 1 + src/apps/CMakeLists.txt | 11 +++++++ src/apps/CONVECTION3DPA.hpp | 1 + src/apps/DEL_DOT_VEC_2D.cpp | 4 +++ src/apps/DEL_DOT_VEC_2D.hpp | 1 + src/apps/DIFFUSION3DPA.cpp | 2 ++ src/apps/DIFFUSION3DPA.hpp | 1 + src/apps/ENERGY.cpp | 4 +++ src/apps/ENERGY.hpp | 1 + src/apps/FIR.cpp | 4 +++ src/apps/FIR.hpp | 1 + src/apps/HALOEXCHANGE.cpp | 4 +++ src/apps/HALOEXCHANGE.hpp | 1 + src/apps/HALOEXCHANGE_FUSED.cpp | 4 +++ src/apps/HALOEXCHANGE_FUSED.hpp | 1 + src/apps/LTIMES.cpp | 4 +++ src/apps/LTIMES.hpp | 1 + src/apps/LTIMES_NOVIEW.cpp | 4 +++ src/apps/LTIMES_NOVIEW.hpp | 1 + src/apps/MASS3DPA.cpp | 2 ++ src/apps/MASS3DPA.hpp | 1 + src/apps/NODAL_ACCUMULATION_3D.hpp | 1 + src/apps/PRESSURE.cpp | 4 +++ src/apps/PRESSURE.hpp | 1 + src/apps/VOL3D.cpp | 4 +++ src/apps/VOL3D.hpp | 1 + src/apps/WIP-COUPLE.hpp | 1 + src/basic/CMakeLists.txt | 12 ++++++++ src/basic/DAXPY.cpp | 4 +++ src/basic/DAXPY.hpp | 1 + src/basic/DAXPY_ATOMIC.hpp | 1 + src/basic/IF_QUAD.cpp | 4 +++ src/basic/IF_QUAD.hpp | 1 + src/basic/INDEXLIST.hpp | 1 + src/basic/INDEXLIST_3LOOP.hpp | 1 + src/basic/INIT3.cpp | 4 +++ src/basic/INIT3.hpp | 1 + src/basic/INIT_VIEW1D.cpp | 4 +++ src/basic/INIT_VIEW1D.hpp | 1 + src/basic/INIT_VIEW1D_OFFSET.cpp | 4 +++ src/basic/INIT_VIEW1D_OFFSET.hpp | 1 + src/basic/MAT_MAT_SHARED.cpp | 4 +++ src/basic/MAT_MAT_SHARED.hpp | 1 + src/basic/MULADDSUB.cpp | 4 +++ src/basic/MULADDSUB.hpp | 1 + src/basic/NESTED_INIT-Seq.cpp | 1 + src/basic/NESTED_INIT.cpp | 4 +++ src/basic/NESTED_INIT.hpp | 1 + src/basic/PI_ATOMIC.cpp | 4 +++ src/basic/PI_ATOMIC.hpp | 1 + src/basic/PI_REDUCE.cpp | 4 +++ src/basic/PI_REDUCE.hpp | 1 + src/basic/REDUCE3_INT.cpp | 4 +++ src/basic/REDUCE3_INT.hpp | 1 + src/basic/REDUCE_STRUCT.hpp | 1 + src/basic/TRAP_INT.cpp | 4 +++ src/basic/TRAP_INT.hpp | 1 + src/common/KernelBase.cpp | 16 ++++++++++ src/common/KernelBase.hpp | 1 + src/common/RAJAPerfSuite.cpp | 24 +++++++++++++++ src/common/RAJAPerfSuite.hpp | 4 +++ src/lcals/CMakeLists.txt | 11 +++++++ src/lcals/DIFF_PREDICT.cpp | 4 +++ src/lcals/DIFF_PREDICT.hpp | 1 + src/lcals/EOS.cpp | 4 +++ src/lcals/EOS.hpp | 1 + src/lcals/FIRST_DIFF.cpp | 6 +++- src/lcals/FIRST_DIFF.hpp | 1 + src/lcals/FIRST_MIN.cpp | 4 +++ src/lcals/FIRST_MIN.hpp | 1 + src/lcals/FIRST_SUM.cpp | 4 +++ src/lcals/FIRST_SUM.hpp | 1 + src/lcals/GEN_LIN_RECUR.cpp | 4 +++ src/lcals/GEN_LIN_RECUR.hpp | 1 + src/lcals/HYDRO_1D.cpp | 4 +++ src/lcals/HYDRO_1D.hpp | 1 + src/lcals/HYDRO_2D.cpp | 4 +++ src/lcals/HYDRO_2D.hpp | 1 + src/lcals/INT_PREDICT.cpp | 4 +++ src/lcals/INT_PREDICT.hpp | 1 + src/lcals/PLANCKIAN.cpp | 4 +++ src/lcals/PLANCKIAN.hpp | 1 + src/lcals/TRIDIAG_ELIM.cpp | 4 +++ src/lcals/TRIDIAG_ELIM.hpp | 1 + src/polybench/CMakeLists.txt | 13 ++++++++ src/polybench/POLYBENCH_2MM.cpp | 4 +++ src/polybench/POLYBENCH_2MM.hpp | 1 + src/polybench/POLYBENCH_3MM-Seq.cpp | 1 - src/polybench/POLYBENCH_3MM.cpp | 4 +++ src/polybench/POLYBENCH_3MM.hpp | 1 + src/polybench/POLYBENCH_ADI.cpp | 4 +++ src/polybench/POLYBENCH_ADI.hpp | 1 + src/polybench/POLYBENCH_ATAX.cpp | 4 +++ src/polybench/POLYBENCH_ATAX.hpp | 1 + src/polybench/POLYBENCH_FDTD_2D.cpp | 4 +++ src/polybench/POLYBENCH_FDTD_2D.hpp | 1 + .../POLYBENCH_FLOYD_WARSHALL-Seq.cpp | 1 - src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp | 4 +++ src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp | 1 + src/polybench/POLYBENCH_GEMM.cpp | 4 +++ src/polybench/POLYBENCH_GEMM.hpp | 1 + src/polybench/POLYBENCH_GEMVER-Seq.cpp | 21 +++++++++---- src/polybench/POLYBENCH_GEMVER.cpp | 4 +++ src/polybench/POLYBENCH_GEMVER.hpp | 7 ++--- src/polybench/POLYBENCH_GESUMMV-Seq.cpp | 4 +-- src/polybench/POLYBENCH_GESUMMV.cpp | 4 +++ src/polybench/POLYBENCH_GESUMMV.hpp | 1 + src/polybench/POLYBENCH_HEAT_3D-Seq.cpp | 30 ++++++++++--------- src/polybench/POLYBENCH_HEAT_3D.cpp | 4 +++ src/polybench/POLYBENCH_HEAT_3D.hpp | 3 ++ src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp | 1 - src/polybench/POLYBENCH_JACOBI_1D.cpp | 4 +++ src/polybench/POLYBENCH_JACOBI_1D.hpp | 1 + src/polybench/POLYBENCH_JACOBI_2D.cpp | 4 +++ src/polybench/POLYBENCH_JACOBI_2D.hpp | 1 + src/polybench/POLYBENCH_MVT.cpp | 4 +++ src/polybench/POLYBENCH_MVT.hpp | 1 + src/stream/ADD-Seq.cpp | 1 - src/stream/ADD.cpp | 4 +++ src/stream/ADD.hpp | 1 + src/stream/CMakeLists.txt | 5 ++++ src/stream/COPY.cpp | 4 +++ src/stream/COPY.hpp | 1 + src/stream/DOT.cpp | 4 +++ src/stream/DOT.hpp | 1 + src/stream/MUL.cpp | 4 +++ src/stream/MUL.hpp | 1 + src/stream/TRIAD.cpp | 4 +++ src/stream/TRIAD.hpp | 1 + 138 files changed, 420 insertions(+), 30 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 83a574af2..50670be17 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,9 @@ endif() if(ENABLE_KOKKOS) set(CMAKE_CXX_STANDARD 17) set(BLT_CXX_STD c++17) +elseif (ENABLE_STDPAR) + set(CMAKE_CXX_STANDARD 20) + set(BLT_CXX_STD c++14) else() set(CMAKE_CXX_STANDARD 14) set(BLT_CXX_STD c++14) @@ -90,6 +93,9 @@ endif () if (ENABLE_OPENMP) add_definitions(-DRUN_OPENMP) endif () +if (ENABLE_STDPAR) + add_definitions(-DRUN_STDPAR) +endif () set(RAJA_PERFSUITE_VERSION_MAJOR 0) set(RAJA_PERFSUITE_VERSION_MINOR 11) diff --git a/src/algorithm/CMakeLists.txt b/src/algorithm/CMakeLists.txt index 73bea70d2..7c0fcd39f 100644 --- a/src/algorithm/CMakeLists.txt +++ b/src/algorithm/CMakeLists.txt @@ -15,11 +15,13 @@ blt_add_library( SCAN-OMP.cpp SORT.cpp SORT-Seq.cpp + SORT-StdPar.cpp SORT-Hip.cpp SORT-Cuda.cpp SORT-OMP.cpp SORTPAIRS.cpp SORTPAIRS-Seq.cpp + SORTPAIRS-StdPar.cpp SORTPAIRS-Hip.cpp SORTPAIRS-Cuda.cpp SORTPAIRS-OMP.cpp diff --git a/src/algorithm/MEMCPY.hpp b/src/algorithm/MEMCPY.hpp index 67fff5255..2477115ce 100644 --- a/src/algorithm/MEMCPY.hpp +++ b/src/algorithm/MEMCPY.hpp @@ -54,6 +54,7 @@ class MEMCPY : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setSeqTuningDefinitions(VariantID vid); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/algorithm/MEMSET.hpp b/src/algorithm/MEMSET.hpp index 8edc5b611..0e9630fa7 100644 --- a/src/algorithm/MEMSET.hpp +++ b/src/algorithm/MEMSET.hpp @@ -54,6 +54,7 @@ class MEMSET : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setSeqTuningDefinitions(VariantID vid); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/algorithm/REDUCE_SUM.hpp b/src/algorithm/REDUCE_SUM.hpp index f6dba52db..4d6f2f9b9 100644 --- a/src/algorithm/REDUCE_SUM.hpp +++ b/src/algorithm/REDUCE_SUM.hpp @@ -58,6 +58,7 @@ class REDUCE_SUM : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/algorithm/SCAN.hpp b/src/algorithm/SCAN.hpp index 519789a55..51cc13325 100644 --- a/src/algorithm/SCAN.hpp +++ b/src/algorithm/SCAN.hpp @@ -61,6 +61,7 @@ class SCAN : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); private: static const size_t default_gpu_block_size = 0; diff --git a/src/algorithm/SORT.cpp b/src/algorithm/SORT.cpp index b9722c4d7..15192f500 100644 --- a/src/algorithm/SORT.cpp +++ b/src/algorithm/SORT.cpp @@ -41,6 +41,9 @@ SORT::SORT(const RunParams& params) setVariantDefined( RAJA_CUDA ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( RAJA_StdPar ); } SORT::~SORT() diff --git a/src/algorithm/SORT.hpp b/src/algorithm/SORT.hpp index 0670c9dd0..3331dacf8 100644 --- a/src/algorithm/SORT.hpp +++ b/src/algorithm/SORT.hpp @@ -54,6 +54,7 @@ class SORT : public KernelBase { getCout() << "\n SORT : Unknown OMP Target variant id = " << vid << std::endl; } + void runStdParVariant(VariantID vid, size_t tune_idx); private: static const size_t default_gpu_block_size = 0; diff --git a/src/algorithm/SORTPAIRS.cpp b/src/algorithm/SORTPAIRS.cpp index df175844e..882527eb1 100644 --- a/src/algorithm/SORTPAIRS.cpp +++ b/src/algorithm/SORTPAIRS.cpp @@ -41,6 +41,9 @@ SORTPAIRS::SORTPAIRS(const RunParams& params) setVariantDefined( RAJA_CUDA ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( RAJA_StdPar ); } SORTPAIRS::~SORTPAIRS() diff --git a/src/algorithm/SORTPAIRS.hpp b/src/algorithm/SORTPAIRS.hpp index 658d3ad4b..9a2365957 100644 --- a/src/algorithm/SORTPAIRS.hpp +++ b/src/algorithm/SORTPAIRS.hpp @@ -53,6 +53,7 @@ class SORTPAIRS : public KernelBase { getCout() << "\n SORTPAIRS : Unknown OMP Target variant id = " << vid << std::endl; } + void runStdParVariant(VariantID vid, size_t tune_idx); private: static const size_t default_gpu_block_size = 0; diff --git a/src/apps/CMakeLists.txt b/src/apps/CMakeLists.txt index dbb0637fa..e79db7717 100644 --- a/src/apps/CMakeLists.txt +++ b/src/apps/CMakeLists.txt @@ -88,5 +88,16 @@ blt_add_library( VOL3D-OMP.cpp VOL3D-OMPTarget.cpp WIP-COUPLE.cpp + DEL_DOT_VEC_2D-StdPar.cpp + ENERGY-StdPar.cpp + FIR-StdPar.cpp + HALOEXCHANGE-StdPar.cpp + HALOEXCHANGE_FUSED-StdPar.cpp + LTIMES-StdPar.cpp + LTIMES_NOVIEW-StdPar.cpp + MASS3DPA-StdPar.cpp + PRESSURE-StdPar.cpp + VOL3D-StdPar.cpp + DIFFUSION3DPA-StdPar.cpp DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} ) diff --git a/src/apps/CONVECTION3DPA.hpp b/src/apps/CONVECTION3DPA.hpp index 810aaefe3..47ffbe4e4 100644 --- a/src/apps/CONVECTION3DPA.hpp +++ b/src/apps/CONVECTION3DPA.hpp @@ -378,6 +378,7 @@ class CONVECTION3DPA : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/DEL_DOT_VEC_2D.cpp b/src/apps/DEL_DOT_VEC_2D.cpp index 24121e157..7e3bf7579 100644 --- a/src/apps/DEL_DOT_VEC_2D.cpp +++ b/src/apps/DEL_DOT_VEC_2D.cpp @@ -62,6 +62,10 @@ DEL_DOT_VEC_2D::DEL_DOT_VEC_2D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } DEL_DOT_VEC_2D::~DEL_DOT_VEC_2D() diff --git a/src/apps/DEL_DOT_VEC_2D.hpp b/src/apps/DEL_DOT_VEC_2D.hpp index 60d577a05..65b073de2 100644 --- a/src/apps/DEL_DOT_VEC_2D.hpp +++ b/src/apps/DEL_DOT_VEC_2D.hpp @@ -113,6 +113,7 @@ class DEL_DOT_VEC_2D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/DIFFUSION3DPA.cpp b/src/apps/DIFFUSION3DPA.cpp index 3844668c6..69ee1aa3a 100644 --- a/src/apps/DIFFUSION3DPA.cpp +++ b/src/apps/DIFFUSION3DPA.cpp @@ -65,6 +65,8 @@ DIFFUSION3DPA::DIFFUSION3DPA(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( RAJA_StdPar ); } DIFFUSION3DPA::~DIFFUSION3DPA() diff --git a/src/apps/DIFFUSION3DPA.hpp b/src/apps/DIFFUSION3DPA.hpp index b0ba7c977..a811769f0 100644 --- a/src/apps/DIFFUSION3DPA.hpp +++ b/src/apps/DIFFUSION3DPA.hpp @@ -481,6 +481,7 @@ class DIFFUSION3DPA : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/ENERGY.cpp b/src/apps/ENERGY.cpp index 9ed11381a..66f796db1 100644 --- a/src/apps/ENERGY.cpp +++ b/src/apps/ENERGY.cpp @@ -62,6 +62,10 @@ ENERGY::ENERGY(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } ENERGY::~ENERGY() diff --git a/src/apps/ENERGY.hpp b/src/apps/ENERGY.hpp index 6461fdd5f..2848fd3b4 100644 --- a/src/apps/ENERGY.hpp +++ b/src/apps/ENERGY.hpp @@ -203,6 +203,7 @@ class ENERGY : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/FIR.cpp b/src/apps/FIR.cpp index 8dd25358e..90871a160 100644 --- a/src/apps/FIR.cpp +++ b/src/apps/FIR.cpp @@ -56,6 +56,10 @@ FIR::FIR(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } FIR::~FIR() diff --git a/src/apps/FIR.hpp b/src/apps/FIR.hpp index dd46d9934..6ce82907a 100644 --- a/src/apps/FIR.hpp +++ b/src/apps/FIR.hpp @@ -78,6 +78,7 @@ class FIR : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/HALOEXCHANGE.cpp b/src/apps/HALOEXCHANGE.cpp index 890fcf0a9..35c9839b1 100644 --- a/src/apps/HALOEXCHANGE.cpp +++ b/src/apps/HALOEXCHANGE.cpp @@ -98,6 +98,10 @@ HALOEXCHANGE::HALOEXCHANGE(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } HALOEXCHANGE::~HALOEXCHANGE() diff --git a/src/apps/HALOEXCHANGE.hpp b/src/apps/HALOEXCHANGE.hpp index 5d653762a..de1398210 100644 --- a/src/apps/HALOEXCHANGE.hpp +++ b/src/apps/HALOEXCHANGE.hpp @@ -93,6 +93,7 @@ class HALOEXCHANGE : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/HALOEXCHANGE_FUSED.cpp b/src/apps/HALOEXCHANGE_FUSED.cpp index 406cc654b..272d66de4 100644 --- a/src/apps/HALOEXCHANGE_FUSED.cpp +++ b/src/apps/HALOEXCHANGE_FUSED.cpp @@ -98,6 +98,10 @@ HALOEXCHANGE_FUSED::HALOEXCHANGE_FUSED(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } HALOEXCHANGE_FUSED::~HALOEXCHANGE_FUSED() diff --git a/src/apps/HALOEXCHANGE_FUSED.hpp b/src/apps/HALOEXCHANGE_FUSED.hpp index e47c1e14e..b83129571 100644 --- a/src/apps/HALOEXCHANGE_FUSED.hpp +++ b/src/apps/HALOEXCHANGE_FUSED.hpp @@ -137,6 +137,7 @@ class HALOEXCHANGE_FUSED : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/LTIMES.cpp b/src/apps/LTIMES.cpp index ede451a0a..9d170071b 100644 --- a/src/apps/LTIMES.cpp +++ b/src/apps/LTIMES.cpp @@ -77,6 +77,10 @@ LTIMES::LTIMES(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } LTIMES::~LTIMES() diff --git a/src/apps/LTIMES.hpp b/src/apps/LTIMES.hpp index 31eae0f83..64e773c5c 100644 --- a/src/apps/LTIMES.hpp +++ b/src/apps/LTIMES.hpp @@ -116,6 +116,7 @@ class LTIMES : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/LTIMES_NOVIEW.cpp b/src/apps/LTIMES_NOVIEW.cpp index c0c0f7413..a4f53d360 100644 --- a/src/apps/LTIMES_NOVIEW.cpp +++ b/src/apps/LTIMES_NOVIEW.cpp @@ -76,6 +76,10 @@ LTIMES_NOVIEW::LTIMES_NOVIEW(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } LTIMES_NOVIEW::~LTIMES_NOVIEW() diff --git a/src/apps/LTIMES_NOVIEW.hpp b/src/apps/LTIMES_NOVIEW.hpp index 1385864fb..09fa881cc 100644 --- a/src/apps/LTIMES_NOVIEW.hpp +++ b/src/apps/LTIMES_NOVIEW.hpp @@ -66,6 +66,7 @@ class LTIMES_NOVIEW : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/MASS3DPA.cpp b/src/apps/MASS3DPA.cpp index 288e7ff82..c951336ad 100644 --- a/src/apps/MASS3DPA.cpp +++ b/src/apps/MASS3DPA.cpp @@ -61,6 +61,8 @@ MASS3DPA::MASS3DPA(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( RAJA_StdPar ); } MASS3DPA::~MASS3DPA() diff --git a/src/apps/MASS3DPA.hpp b/src/apps/MASS3DPA.hpp index 0d1c3a42d..de29544d2 100644 --- a/src/apps/MASS3DPA.hpp +++ b/src/apps/MASS3DPA.hpp @@ -363,6 +363,7 @@ class MASS3DPA : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/NODAL_ACCUMULATION_3D.hpp b/src/apps/NODAL_ACCUMULATION_3D.hpp index a574f331a..52b8faa7a 100644 --- a/src/apps/NODAL_ACCUMULATION_3D.hpp +++ b/src/apps/NODAL_ACCUMULATION_3D.hpp @@ -95,6 +95,7 @@ class NODAL_ACCUMULATION_3D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/PRESSURE.cpp b/src/apps/PRESSURE.cpp index df2cb744f..29fc72adc 100644 --- a/src/apps/PRESSURE.cpp +++ b/src/apps/PRESSURE.cpp @@ -52,6 +52,10 @@ PRESSURE::PRESSURE(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } PRESSURE::~PRESSURE() diff --git a/src/apps/PRESSURE.hpp b/src/apps/PRESSURE.hpp index 6421ce6b0..16bcb2b1f 100644 --- a/src/apps/PRESSURE.hpp +++ b/src/apps/PRESSURE.hpp @@ -72,6 +72,7 @@ class PRESSURE : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/VOL3D.cpp b/src/apps/VOL3D.cpp index fd2ebb5aa..b05511f99 100644 --- a/src/apps/VOL3D.cpp +++ b/src/apps/VOL3D.cpp @@ -64,6 +64,10 @@ VOL3D::VOL3D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } VOL3D::~VOL3D() diff --git a/src/apps/VOL3D.hpp b/src/apps/VOL3D.hpp index 9ddedbd19..289b07b83 100644 --- a/src/apps/VOL3D.hpp +++ b/src/apps/VOL3D.hpp @@ -169,6 +169,7 @@ class VOL3D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/WIP-COUPLE.hpp b/src/apps/WIP-COUPLE.hpp index cdafcd5eb..a37875418 100644 --- a/src/apps/WIP-COUPLE.hpp +++ b/src/apps/WIP-COUPLE.hpp @@ -171,6 +171,7 @@ class COUPLE : public KernelBase void runCudaVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) {(void) vid;} void runHipVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) {(void) vid;} void runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) {(void) vid;} + void runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) {(void) vid;} private: Complex_ptr m_t0; diff --git a/src/basic/CMakeLists.txt b/src/basic/CMakeLists.txt index ceeb1a502..cca6bf286 100644 --- a/src/basic/CMakeLists.txt +++ b/src/basic/CMakeLists.txt @@ -10,6 +10,7 @@ blt_add_library( NAME basic SOURCES DAXPY.cpp DAXPY-Seq.cpp + DAXPY-StdPar.cpp DAXPY-Hip.cpp DAXPY-Cuda.cpp DAXPY-OMP.cpp @@ -22,6 +23,7 @@ blt_add_library( DAXPY_ATOMIC-OMPTarget.cpp IF_QUAD.cpp IF_QUAD-Seq.cpp + IF_QUAD-StdPar.cpp IF_QUAD-Hip.cpp IF_QUAD-Cuda.cpp IF_QUAD-OMP.cpp @@ -46,48 +48,56 @@ blt_add_library( INIT3-OMPTarget.cpp INIT_VIEW1D.cpp INIT_VIEW1D-Seq.cpp + INIT_VIEW1D-StdPar.cpp INIT_VIEW1D-Hip.cpp INIT_VIEW1D-Cuda.cpp INIT_VIEW1D-OMP.cpp INIT_VIEW1D-OMPTarget.cpp INIT_VIEW1D_OFFSET.cpp INIT_VIEW1D_OFFSET-Seq.cpp + INIT_VIEW1D_OFFSET-StdPar.cpp INIT_VIEW1D_OFFSET-Hip.cpp INIT_VIEW1D_OFFSET-Cuda.cpp INIT_VIEW1D_OFFSET-OMP.cpp INIT_VIEW1D_OFFSET-OMPTarget.cpp MAT_MAT_SHARED.cpp MAT_MAT_SHARED-Seq.cpp + MAT_MAT_SHARED-StdPar.cpp MAT_MAT_SHARED-Hip.cpp MAT_MAT_SHARED-Cuda.cpp MAT_MAT_SHARED-OMP.cpp MAT_MAT_SHARED-OMPTarget.cpp MULADDSUB.cpp MULADDSUB-Seq.cpp + MULADDSUB-StdPar.cpp MULADDSUB-Hip.cpp MULADDSUB-Cuda.cpp MULADDSUB-OMP.cpp MULADDSUB-OMPTarget.cpp NESTED_INIT.cpp NESTED_INIT-Seq.cpp + NESTED_INIT-StdPar.cpp NESTED_INIT-Hip.cpp NESTED_INIT-Cuda.cpp NESTED_INIT-OMP.cpp NESTED_INIT-OMPTarget.cpp PI_ATOMIC.cpp PI_ATOMIC-Seq.cpp + PI_ATOMIC-StdPar.cpp PI_ATOMIC-Hip.cpp PI_ATOMIC-Cuda.cpp PI_ATOMIC-OMP.cpp PI_ATOMIC-OMPTarget.cpp PI_REDUCE.cpp PI_REDUCE-Seq.cpp + PI_REDUCE-StdPar.cpp PI_REDUCE-Hip.cpp PI_REDUCE-Cuda.cpp PI_REDUCE-OMP.cpp PI_REDUCE-OMPTarget.cpp REDUCE3_INT.cpp REDUCE3_INT-Seq.cpp + REDUCE3_INT-StdPar.cpp REDUCE3_INT-Hip.cpp REDUCE3_INT-Cuda.cpp REDUCE3_INT-OMP.cpp @@ -100,9 +110,11 @@ blt_add_library( REDUCE_STRUCT-OMPTarget.cpp TRAP_INT.cpp TRAP_INT-Seq.cpp + TRAP_INT-StdPar.cpp TRAP_INT-Hip.cpp TRAP_INT-Cuda.cpp TRAP_INT-OMPTarget.cpp TRAP_INT-OMP.cpp + INIT3-StdPar.cpp DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} ) diff --git a/src/basic/DAXPY.cpp b/src/basic/DAXPY.cpp index 69a5a152e..a0cd60977 100644 --- a/src/basic/DAXPY.cpp +++ b/src/basic/DAXPY.cpp @@ -52,6 +52,10 @@ DAXPY::DAXPY(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/DAXPY.hpp b/src/basic/DAXPY.hpp index 82a6fd9ff..840371fc1 100644 --- a/src/basic/DAXPY.hpp +++ b/src/basic/DAXPY.hpp @@ -52,6 +52,7 @@ class DAXPY : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/DAXPY_ATOMIC.hpp b/src/basic/DAXPY_ATOMIC.hpp index dd52d777c..0a702deec 100644 --- a/src/basic/DAXPY_ATOMIC.hpp +++ b/src/basic/DAXPY_ATOMIC.hpp @@ -55,6 +55,7 @@ class DAXPY_ATOMIC : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/IF_QUAD.cpp b/src/basic/IF_QUAD.cpp index 4a8d60035..799c02865 100644 --- a/src/basic/IF_QUAD.cpp +++ b/src/basic/IF_QUAD.cpp @@ -56,6 +56,10 @@ IF_QUAD::IF_QUAD(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/IF_QUAD.hpp b/src/basic/IF_QUAD.hpp index a03727a6c..4aac072fc 100644 --- a/src/basic/IF_QUAD.hpp +++ b/src/basic/IF_QUAD.hpp @@ -69,6 +69,7 @@ class IF_QUAD : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/INDEXLIST.hpp b/src/basic/INDEXLIST.hpp index 0836d8197..0bd51f947 100644 --- a/src/basic/INDEXLIST.hpp +++ b/src/basic/INDEXLIST.hpp @@ -60,6 +60,7 @@ class INDEXLIST : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/INDEXLIST_3LOOP.hpp b/src/basic/INDEXLIST_3LOOP.hpp index e19ee5508..408c6483c 100644 --- a/src/basic/INDEXLIST_3LOOP.hpp +++ b/src/basic/INDEXLIST_3LOOP.hpp @@ -71,6 +71,7 @@ class INDEXLIST_3LOOP : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/INIT3.cpp b/src/basic/INIT3.cpp index a504fa914..990278e36 100644 --- a/src/basic/INIT3.cpp +++ b/src/basic/INIT3.cpp @@ -52,6 +52,10 @@ INIT3::INIT3(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/INIT3.hpp b/src/basic/INIT3.hpp index 0f89b7c54..3ad27a2b2 100644 --- a/src/basic/INIT3.hpp +++ b/src/basic/INIT3.hpp @@ -55,6 +55,7 @@ class INIT3 : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/INIT_VIEW1D.cpp b/src/basic/INIT_VIEW1D.cpp index 2cb2b2376..ea68d0951 100644 --- a/src/basic/INIT_VIEW1D.cpp +++ b/src/basic/INIT_VIEW1D.cpp @@ -53,6 +53,10 @@ INIT_VIEW1D::INIT_VIEW1D(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/INIT_VIEW1D.hpp b/src/basic/INIT_VIEW1D.hpp index b5dfbf097..54c0f54d6 100644 --- a/src/basic/INIT_VIEW1D.hpp +++ b/src/basic/INIT_VIEW1D.hpp @@ -66,6 +66,7 @@ class INIT_VIEW1D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/INIT_VIEW1D_OFFSET.cpp b/src/basic/INIT_VIEW1D_OFFSET.cpp index f31395b07..1c482cec7 100644 --- a/src/basic/INIT_VIEW1D_OFFSET.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET.cpp @@ -53,6 +53,10 @@ INIT_VIEW1D_OFFSET::INIT_VIEW1D_OFFSET(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/INIT_VIEW1D_OFFSET.hpp b/src/basic/INIT_VIEW1D_OFFSET.hpp index 4cc3548c7..b9cd47ee8 100644 --- a/src/basic/INIT_VIEW1D_OFFSET.hpp +++ b/src/basic/INIT_VIEW1D_OFFSET.hpp @@ -65,6 +65,7 @@ class INIT_VIEW1D_OFFSET : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/MAT_MAT_SHARED.cpp b/src/basic/MAT_MAT_SHARED.cpp index 98cd878ce..747aa8413 100644 --- a/src/basic/MAT_MAT_SHARED.cpp +++ b/src/basic/MAT_MAT_SHARED.cpp @@ -60,6 +60,10 @@ MAT_MAT_SHARED::MAT_MAT_SHARED(const RunParams ¶ms) setVariantDefined(Base_HIP); setVariantDefined(Lambda_HIP); setVariantDefined(RAJA_HIP); + + //setVariantDefined( Base_StdPar ); + //setVariantDefined( Lambda_StdPar ); + //setVariantDefined( RAJA_StdPar ); } MAT_MAT_SHARED::~MAT_MAT_SHARED() {} diff --git a/src/basic/MAT_MAT_SHARED.hpp b/src/basic/MAT_MAT_SHARED.hpp index 095721c27..c18682960 100644 --- a/src/basic/MAT_MAT_SHARED.hpp +++ b/src/basic/MAT_MAT_SHARED.hpp @@ -139,6 +139,7 @@ class MAT_MAT_SHARED : public KernelBase { void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/MULADDSUB.cpp b/src/basic/MULADDSUB.cpp index 1d4981ca2..8e6b76b5d 100644 --- a/src/basic/MULADDSUB.cpp +++ b/src/basic/MULADDSUB.cpp @@ -52,6 +52,10 @@ MULADDSUB::MULADDSUB(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/MULADDSUB.hpp b/src/basic/MULADDSUB.hpp index 63d637073..ccec343e2 100644 --- a/src/basic/MULADDSUB.hpp +++ b/src/basic/MULADDSUB.hpp @@ -58,6 +58,7 @@ class MULADDSUB : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/NESTED_INIT-Seq.cpp b/src/basic/NESTED_INIT-Seq.cpp index 48da1b37a..6f0e2265a 100644 --- a/src/basic/NESTED_INIT-Seq.cpp +++ b/src/basic/NESTED_INIT-Seq.cpp @@ -39,6 +39,7 @@ void NESTED_INIT::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_i for (Index_type j = 0; j < nj; ++j ) { for (Index_type i = 0; i < ni; ++i ) { NESTED_INIT_BODY; + //std::cout << i << "," << j << "," << k << ";" << k*nj*ni+j*ni+i << " SEQ\n"; } } } diff --git a/src/basic/NESTED_INIT.cpp b/src/basic/NESTED_INIT.cpp index 30cbd0254..3c01fe350 100644 --- a/src/basic/NESTED_INIT.cpp +++ b/src/basic/NESTED_INIT.cpp @@ -63,6 +63,10 @@ NESTED_INIT::NESTED_INIT(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/NESTED_INIT.hpp b/src/basic/NESTED_INIT.hpp index 6849c9a73..54099a840 100644 --- a/src/basic/NESTED_INIT.hpp +++ b/src/basic/NESTED_INIT.hpp @@ -58,6 +58,7 @@ class NESTED_INIT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/PI_ATOMIC.cpp b/src/basic/PI_ATOMIC.cpp index 607ad1312..0633887a0 100644 --- a/src/basic/PI_ATOMIC.cpp +++ b/src/basic/PI_ATOMIC.cpp @@ -54,6 +54,10 @@ PI_ATOMIC::PI_ATOMIC(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/PI_ATOMIC.hpp b/src/basic/PI_ATOMIC.hpp index e69cbdb56..4b2353a38 100644 --- a/src/basic/PI_ATOMIC.hpp +++ b/src/basic/PI_ATOMIC.hpp @@ -54,6 +54,7 @@ class PI_ATOMIC : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/PI_REDUCE.cpp b/src/basic/PI_REDUCE.cpp index 16d0770ba..5af375f56 100644 --- a/src/basic/PI_REDUCE.cpp +++ b/src/basic/PI_REDUCE.cpp @@ -51,6 +51,10 @@ PI_REDUCE::PI_REDUCE(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } PI_REDUCE::~PI_REDUCE() diff --git a/src/basic/PI_REDUCE.hpp b/src/basic/PI_REDUCE.hpp index c7cc3258a..f3655d503 100644 --- a/src/basic/PI_REDUCE.hpp +++ b/src/basic/PI_REDUCE.hpp @@ -56,6 +56,7 @@ class PI_REDUCE : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/REDUCE3_INT.cpp b/src/basic/REDUCE3_INT.cpp index 941d85ac1..e39f0c031 100644 --- a/src/basic/REDUCE3_INT.cpp +++ b/src/basic/REDUCE3_INT.cpp @@ -57,6 +57,10 @@ REDUCE3_INT::REDUCE3_INT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/REDUCE3_INT.hpp b/src/basic/REDUCE3_INT.hpp index c84fa84b2..7a0a1f2c8 100644 --- a/src/basic/REDUCE3_INT.hpp +++ b/src/basic/REDUCE3_INT.hpp @@ -70,6 +70,7 @@ class REDUCE3_INT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/REDUCE_STRUCT.hpp b/src/basic/REDUCE_STRUCT.hpp index b1d188ca1..0bd168914 100644 --- a/src/basic/REDUCE_STRUCT.hpp +++ b/src/basic/REDUCE_STRUCT.hpp @@ -86,6 +86,7 @@ class REDUCE_STRUCT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/TRAP_INT.cpp b/src/basic/TRAP_INT.cpp index 63da29799..7ddc1991b 100644 --- a/src/basic/TRAP_INT.cpp +++ b/src/basic/TRAP_INT.cpp @@ -52,6 +52,10 @@ TRAP_INT::TRAP_INT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/TRAP_INT.hpp b/src/basic/TRAP_INT.hpp index eff85b90e..d23e34164 100644 --- a/src/basic/TRAP_INT.hpp +++ b/src/basic/TRAP_INT.hpp @@ -67,6 +67,7 @@ class TRAP_INT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/common/KernelBase.cpp b/src/common/KernelBase.cpp index 36efa5170..0b526afd0 100644 --- a/src/common/KernelBase.cpp +++ b/src/common/KernelBase.cpp @@ -246,6 +246,22 @@ void KernelBase::runKernel(VariantID vid, size_t tune_idx) #endif break; } + + case Base_StdPar : + case Lambda_StdPar : + { + runStdParVariant(vid, tune_idx); + break; + } + + case RAJA_StdPar : + { +#if defined(RUN_RAJA_STDPAR) + runStdParVariant(vid, tune_idx); +#endif + break; + } + case Kokkos_Lambda : { #if defined(RUN_KOKKOS) diff --git a/src/common/KernelBase.hpp b/src/common/KernelBase.hpp index ed3429643..8c72e854e 100644 --- a/src/common/KernelBase.hpp +++ b/src/common/KernelBase.hpp @@ -234,6 +234,7 @@ class KernelBase #if defined(RAJA_ENABLE_TARGET_OPENMP) virtual void runOpenMPTargetVariant(VariantID vid, size_t tune_idx) = 0; #endif + virtual void runStdParVariant(VariantID vid, size_t tune_idx) = 0; #if defined(RUN_KOKKOS) virtual void runKokkosVariant(VariantID vid, size_t tune_idx) { diff --git a/src/common/RAJAPerfSuite.cpp b/src/common/RAJAPerfSuite.cpp index 7578f5afd..5c1144ef3 100644 --- a/src/common/RAJAPerfSuite.cpp +++ b/src/common/RAJAPerfSuite.cpp @@ -278,6 +278,10 @@ static const std::string VariantNames [] = std::string("Lambda_HIP"), std::string("RAJA_HIP"), + std::string("Base_StdPar"), + std::string("Lambda_StdPar"), + std::string("RAJA_StdPar"), + std::string("Kokkos_Lambda"), std::string("Unknown Variant") // Keep this at the end and DO NOT remove.... @@ -424,6 +428,16 @@ bool isVariantAvailable(VariantID vid) } #endif + if ( vid == Base_StdPar || + vid == Lambda_StdPar) { + ret_val = true; + } +#if defined(RUN_RAJA_STDPAR) + if ( vid == RAJA_StdPar ) { + ret_val = true; + } +#endif + #if defined(RUN_KOKKOS) if ( vid == Kokkos_Lambda ) { ret_val = true; @@ -485,6 +499,16 @@ bool isVariantGPU(VariantID vid) } #endif + if ( vid == Base_StdPar || + vid == Lambda_StdPar) { + ret_val = true; + } +#if defined(RUN_RAJA_STDPAR) + if ( vid == RAJA_StdPar ) { + ret_val = true; + } +#endif + #if defined(RUN_KOKKOS) if ( vid == Kokkos_Lambda ) { ret_val = true; diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index 07feeede9..ab93280f1 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -194,6 +194,10 @@ enum VariantID { Lambda_HIP, RAJA_HIP, + Base_StdPar, + Lambda_StdPar, + RAJA_StdPar, + Kokkos_Lambda, NumVariants // Keep this one last and NEVER comment out (!!) diff --git a/src/lcals/CMakeLists.txt b/src/lcals/CMakeLists.txt index 5f88c8c69..6e0325bcc 100644 --- a/src/lcals/CMakeLists.txt +++ b/src/lcals/CMakeLists.txt @@ -74,5 +74,16 @@ blt_add_library( TRIDIAG_ELIM-Cuda.cpp TRIDIAG_ELIM-OMP.cpp TRIDIAG_ELIM-OMPTarget.cpp + DIFF_PREDICT-StdPar.cpp + EOS-StdPar.cpp + FIRST_DIFF-StdPar.cpp + FIRST_MIN-StdPar.cpp + FIRST_SUM-StdPar.cpp + GEN_LIN_RECUR-StdPar.cpp + HYDRO_1D-StdPar.cpp + HYDRO_2D-StdPar.cpp + INT_PREDICT-StdPar.cpp + PLANCKIAN-StdPar.cpp + TRIDIAG_ELIM-StdPar.cpp DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} ) diff --git a/src/lcals/DIFF_PREDICT.cpp b/src/lcals/DIFF_PREDICT.cpp index 338ba7d0d..e60d636e1 100644 --- a/src/lcals/DIFF_PREDICT.cpp +++ b/src/lcals/DIFF_PREDICT.cpp @@ -49,6 +49,10 @@ DIFF_PREDICT::DIFF_PREDICT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } DIFF_PREDICT::~DIFF_PREDICT() diff --git a/src/lcals/DIFF_PREDICT.hpp b/src/lcals/DIFF_PREDICT.hpp index 130071412..d7631953d 100644 --- a/src/lcals/DIFF_PREDICT.hpp +++ b/src/lcals/DIFF_PREDICT.hpp @@ -93,6 +93,7 @@ class DIFF_PREDICT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/EOS.cpp b/src/lcals/EOS.cpp index 27bc43d06..b7b3813b3 100644 --- a/src/lcals/EOS.cpp +++ b/src/lcals/EOS.cpp @@ -57,6 +57,10 @@ EOS::EOS(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } EOS::~EOS() diff --git a/src/lcals/EOS.hpp b/src/lcals/EOS.hpp index f2d38b5e9..6715ce857 100644 --- a/src/lcals/EOS.hpp +++ b/src/lcals/EOS.hpp @@ -62,6 +62,7 @@ class EOS : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/FIRST_DIFF.cpp b/src/lcals/FIRST_DIFF.cpp index 9272b20d4..54a7c0326 100644 --- a/src/lcals/FIRST_DIFF.cpp +++ b/src/lcals/FIRST_DIFF.cpp @@ -29,7 +29,7 @@ FIRST_DIFF::FIRST_DIFF(const RunParams& params) m_N = getActualProblemSize()+1; setItsPerRep( getActualProblemSize() ); - setItsPerRep( getActualProblemSize() ); + setItsPerRep( getActualProblemSize() ); // why twice? setKernelsPerRep(1); setBytesPerRep( (1*sizeof(Real_type) + 0*sizeof(Real_type)) * getActualProblemSize() + (0*sizeof(Real_type) + 1*sizeof(Real_type)) * m_N ); @@ -53,6 +53,10 @@ FIRST_DIFF::FIRST_DIFF(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } FIRST_DIFF::~FIRST_DIFF() diff --git a/src/lcals/FIRST_DIFF.hpp b/src/lcals/FIRST_DIFF.hpp index 51de73049..655596c3b 100644 --- a/src/lcals/FIRST_DIFF.hpp +++ b/src/lcals/FIRST_DIFF.hpp @@ -52,6 +52,7 @@ class FIRST_DIFF : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/FIRST_MIN.cpp b/src/lcals/FIRST_MIN.cpp index 8fe9a8c93..a1cffc072 100644 --- a/src/lcals/FIRST_MIN.cpp +++ b/src/lcals/FIRST_MIN.cpp @@ -57,6 +57,10 @@ FIRST_MIN::FIRST_MIN(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } FIRST_MIN::~FIRST_MIN() diff --git a/src/lcals/FIRST_MIN.hpp b/src/lcals/FIRST_MIN.hpp index c10839ec7..0f6f172be 100644 --- a/src/lcals/FIRST_MIN.hpp +++ b/src/lcals/FIRST_MIN.hpp @@ -81,6 +81,7 @@ class FIRST_MIN : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/FIRST_SUM.cpp b/src/lcals/FIRST_SUM.cpp index a9d135446..109c6499a 100644 --- a/src/lcals/FIRST_SUM.cpp +++ b/src/lcals/FIRST_SUM.cpp @@ -52,6 +52,10 @@ FIRST_SUM::FIRST_SUM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } FIRST_SUM::~FIRST_SUM() diff --git a/src/lcals/FIRST_SUM.hpp b/src/lcals/FIRST_SUM.hpp index 5f019c08c..d97b9d264 100644 --- a/src/lcals/FIRST_SUM.hpp +++ b/src/lcals/FIRST_SUM.hpp @@ -55,6 +55,7 @@ class FIRST_SUM : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/GEN_LIN_RECUR.cpp b/src/lcals/GEN_LIN_RECUR.cpp index b0598aa8e..eb21f7f5c 100644 --- a/src/lcals/GEN_LIN_RECUR.cpp +++ b/src/lcals/GEN_LIN_RECUR.cpp @@ -57,6 +57,10 @@ GEN_LIN_RECUR::GEN_LIN_RECUR(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } GEN_LIN_RECUR::~GEN_LIN_RECUR() diff --git a/src/lcals/GEN_LIN_RECUR.hpp b/src/lcals/GEN_LIN_RECUR.hpp index d6d20b43b..21516a0dc 100644 --- a/src/lcals/GEN_LIN_RECUR.hpp +++ b/src/lcals/GEN_LIN_RECUR.hpp @@ -76,6 +76,7 @@ class GEN_LIN_RECUR : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/HYDRO_1D.cpp b/src/lcals/HYDRO_1D.cpp index 5ce1d0700..32d40d978 100644 --- a/src/lcals/HYDRO_1D.cpp +++ b/src/lcals/HYDRO_1D.cpp @@ -56,6 +56,10 @@ HYDRO_1D::HYDRO_1D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } HYDRO_1D::~HYDRO_1D() diff --git a/src/lcals/HYDRO_1D.hpp b/src/lcals/HYDRO_1D.hpp index 692e40a8e..01d039314 100644 --- a/src/lcals/HYDRO_1D.hpp +++ b/src/lcals/HYDRO_1D.hpp @@ -57,6 +57,7 @@ class HYDRO_1D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/HYDRO_2D.cpp b/src/lcals/HYDRO_2D.cpp index 9b6c2a643..331e6e695 100644 --- a/src/lcals/HYDRO_2D.cpp +++ b/src/lcals/HYDRO_2D.cpp @@ -71,6 +71,10 @@ HYDRO_2D::HYDRO_2D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } HYDRO_2D::~HYDRO_2D() diff --git a/src/lcals/HYDRO_2D.hpp b/src/lcals/HYDRO_2D.hpp index 4363ea633..93cce3305 100644 --- a/src/lcals/HYDRO_2D.hpp +++ b/src/lcals/HYDRO_2D.hpp @@ -153,6 +153,7 @@ class HYDRO_2D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/INT_PREDICT.cpp b/src/lcals/INT_PREDICT.cpp index c2062fffa..dd4ff83d8 100644 --- a/src/lcals/INT_PREDICT.cpp +++ b/src/lcals/INT_PREDICT.cpp @@ -49,6 +49,10 @@ INT_PREDICT::INT_PREDICT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } INT_PREDICT::~INT_PREDICT() diff --git a/src/lcals/INT_PREDICT.hpp b/src/lcals/INT_PREDICT.hpp index 7a3c6fda6..92d87ab3a 100644 --- a/src/lcals/INT_PREDICT.hpp +++ b/src/lcals/INT_PREDICT.hpp @@ -72,6 +72,7 @@ class INT_PREDICT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/PLANCKIAN.cpp b/src/lcals/PLANCKIAN.cpp index 59de57231..74c65e31b 100644 --- a/src/lcals/PLANCKIAN.cpp +++ b/src/lcals/PLANCKIAN.cpp @@ -49,6 +49,10 @@ PLANCKIAN::PLANCKIAN(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } PLANCKIAN::~PLANCKIAN() diff --git a/src/lcals/PLANCKIAN.hpp b/src/lcals/PLANCKIAN.hpp index 46fba63db..0d85614fa 100644 --- a/src/lcals/PLANCKIAN.hpp +++ b/src/lcals/PLANCKIAN.hpp @@ -57,6 +57,7 @@ class PLANCKIAN : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/TRIDIAG_ELIM.cpp b/src/lcals/TRIDIAG_ELIM.cpp index 05d0100a8..d606e39be 100644 --- a/src/lcals/TRIDIAG_ELIM.cpp +++ b/src/lcals/TRIDIAG_ELIM.cpp @@ -51,6 +51,10 @@ TRIDIAG_ELIM::TRIDIAG_ELIM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } TRIDIAG_ELIM::~TRIDIAG_ELIM() diff --git a/src/lcals/TRIDIAG_ELIM.hpp b/src/lcals/TRIDIAG_ELIM.hpp index f593985a5..336fb2219 100644 --- a/src/lcals/TRIDIAG_ELIM.hpp +++ b/src/lcals/TRIDIAG_ELIM.hpp @@ -57,6 +57,7 @@ class TRIDIAG_ELIM : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/CMakeLists.txt b/src/polybench/CMakeLists.txt index 5805926f3..67343059e 100644 --- a/src/polybench/CMakeLists.txt +++ b/src/polybench/CMakeLists.txt @@ -86,5 +86,18 @@ blt_add_library( POLYBENCH_MVT-Cuda.cpp POLYBENCH_MVT-OMP.cpp POLYBENCH_MVT-OMPTarget.cpp + POLYBENCH_2MM-StdPar.cpp + POLYBENCH_3MM-StdPar.cpp + POLYBENCH_ADI-StdPar.cpp + POLYBENCH_ATAX-StdPar.cpp + POLYBENCH_FDTD_2D-StdPar.cpp + POLYBENCH_FLOYD_WARSHALL-StdPar.cpp + POLYBENCH_GEMM-StdPar.cpp + POLYBENCH_GEMVER-StdPar.cpp + POLYBENCH_GESUMMV-StdPar.cpp + POLYBENCH_HEAT_3D-StdPar.cpp + POLYBENCH_JACOBI_1D-StdPar.cpp + POLYBENCH_JACOBI_2D-StdPar.cpp + POLYBENCH_MVT-StdPar.cpp DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} ) diff --git a/src/polybench/POLYBENCH_2MM.cpp b/src/polybench/POLYBENCH_2MM.cpp index 03119a863..c1284791f 100644 --- a/src/polybench/POLYBENCH_2MM.cpp +++ b/src/polybench/POLYBENCH_2MM.cpp @@ -78,6 +78,10 @@ POLYBENCH_2MM::POLYBENCH_2MM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_2MM::~POLYBENCH_2MM() diff --git a/src/polybench/POLYBENCH_2MM.hpp b/src/polybench/POLYBENCH_2MM.hpp index 0624257f7..00c2de6d5 100644 --- a/src/polybench/POLYBENCH_2MM.hpp +++ b/src/polybench/POLYBENCH_2MM.hpp @@ -127,6 +127,7 @@ class POLYBENCH_2MM : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_3MM-Seq.cpp b/src/polybench/POLYBENCH_3MM-Seq.cpp index c1ca8c56d..9b99fb889 100644 --- a/src/polybench/POLYBENCH_3MM-Seq.cpp +++ b/src/polybench/POLYBENCH_3MM-Seq.cpp @@ -19,7 +19,6 @@ namespace rajaperf namespace polybench { - void POLYBENCH_3MM::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps = getRunReps(); diff --git a/src/polybench/POLYBENCH_3MM.cpp b/src/polybench/POLYBENCH_3MM.cpp index 75990394c..3cc6fce66 100644 --- a/src/polybench/POLYBENCH_3MM.cpp +++ b/src/polybench/POLYBENCH_3MM.cpp @@ -86,6 +86,10 @@ POLYBENCH_3MM::POLYBENCH_3MM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_3MM::~POLYBENCH_3MM() diff --git a/src/polybench/POLYBENCH_3MM.hpp b/src/polybench/POLYBENCH_3MM.hpp index 0cf9aabff..35c6407ba 100644 --- a/src/polybench/POLYBENCH_3MM.hpp +++ b/src/polybench/POLYBENCH_3MM.hpp @@ -153,6 +153,7 @@ class POLYBENCH_3MM : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_ADI.cpp b/src/polybench/POLYBENCH_ADI.cpp index 7d0844e69..5ad7544dd 100644 --- a/src/polybench/POLYBENCH_ADI.cpp +++ b/src/polybench/POLYBENCH_ADI.cpp @@ -63,6 +63,10 @@ POLYBENCH_ADI::POLYBENCH_ADI(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_ADI::~POLYBENCH_ADI() diff --git a/src/polybench/POLYBENCH_ADI.hpp b/src/polybench/POLYBENCH_ADI.hpp index 7cd579964..0941506ec 100644 --- a/src/polybench/POLYBENCH_ADI.hpp +++ b/src/polybench/POLYBENCH_ADI.hpp @@ -195,6 +195,7 @@ class POLYBENCH_ADI : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_ATAX.cpp b/src/polybench/POLYBENCH_ATAX.cpp index 44a805518..440586561 100644 --- a/src/polybench/POLYBENCH_ATAX.cpp +++ b/src/polybench/POLYBENCH_ATAX.cpp @@ -65,6 +65,10 @@ POLYBENCH_ATAX::POLYBENCH_ATAX(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_ATAX::~POLYBENCH_ATAX() diff --git a/src/polybench/POLYBENCH_ATAX.hpp b/src/polybench/POLYBENCH_ATAX.hpp index 8f28a1470..baf2d24f9 100644 --- a/src/polybench/POLYBENCH_ATAX.hpp +++ b/src/polybench/POLYBENCH_ATAX.hpp @@ -115,6 +115,7 @@ class POLYBENCH_ATAX : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_FDTD_2D.cpp b/src/polybench/POLYBENCH_FDTD_2D.cpp index dce05e76a..47bb79ce2 100644 --- a/src/polybench/POLYBENCH_FDTD_2D.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D.cpp @@ -84,6 +84,10 @@ POLYBENCH_FDTD_2D::POLYBENCH_FDTD_2D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_FDTD_2D::~POLYBENCH_FDTD_2D() diff --git a/src/polybench/POLYBENCH_FDTD_2D.hpp b/src/polybench/POLYBENCH_FDTD_2D.hpp index 7d3696293..1e8a0f68b 100644 --- a/src/polybench/POLYBENCH_FDTD_2D.hpp +++ b/src/polybench/POLYBENCH_FDTD_2D.hpp @@ -113,6 +113,7 @@ class POLYBENCH_FDTD_2D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-Seq.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-Seq.cpp index b9f42b0ed..749da2c3b 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-Seq.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-Seq.cpp @@ -17,7 +17,6 @@ namespace rajaperf namespace polybench { - void POLYBENCH_FLOYD_WARSHALL::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps= getRunReps(); diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp index 1022ffe4f..d48f141f0 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp @@ -60,6 +60,10 @@ POLYBENCH_FLOYD_WARSHALL::POLYBENCH_FLOYD_WARSHALL(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_FLOYD_WARSHALL::~POLYBENCH_FLOYD_WARSHALL() diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp index 283231d29..e543a188f 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp @@ -76,6 +76,7 @@ class POLYBENCH_FLOYD_WARSHALL : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_GEMM.cpp b/src/polybench/POLYBENCH_GEMM.cpp index 0ee1f41be..a7dec71b4 100644 --- a/src/polybench/POLYBENCH_GEMM.cpp +++ b/src/polybench/POLYBENCH_GEMM.cpp @@ -70,6 +70,10 @@ POLYBENCH_GEMM::POLYBENCH_GEMM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_GEMM::~POLYBENCH_GEMM() diff --git a/src/polybench/POLYBENCH_GEMM.hpp b/src/polybench/POLYBENCH_GEMM.hpp index ae218397d..72d653eb0 100644 --- a/src/polybench/POLYBENCH_GEMM.hpp +++ b/src/polybench/POLYBENCH_GEMM.hpp @@ -99,6 +99,7 @@ class POLYBENCH_GEMM : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_GEMVER-Seq.cpp b/src/polybench/POLYBENCH_GEMVER-Seq.cpp index eeee6f0ec..7ea35e871 100644 --- a/src/polybench/POLYBENCH_GEMVER-Seq.cpp +++ b/src/polybench/POLYBENCH_GEMVER-Seq.cpp @@ -131,7 +131,7 @@ void POLYBENCH_GEMVER::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t auto poly_gemver_lam1 = [=] (Index_type i, Index_type j) { POLYBENCH_GEMVER_BODY1_RAJA; }; - auto poly_gemver_lam2 = [=] (Index_type /* i */, Real_type &dot) { + auto poly_gemver_lam2 = [=] (Real_type &dot) { POLYBENCH_GEMVER_BODY2_RAJA; }; auto poly_gemver_lam3 = [=] (Index_type i, Index_type j, Real_type &dot) { @@ -162,10 +162,10 @@ void POLYBENCH_GEMVER::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t > >; - using EXEC_POL24 = + using EXEC_POL2 = RAJA::KernelPolicy< RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Segs<0>, RAJA::Params<0>>, + RAJA::statement::Lambda<0, RAJA::Params<0>>, RAJA::statement::For<1, RAJA::loop_exec, RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> >, @@ -175,6 +175,17 @@ void POLYBENCH_GEMVER::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t using EXEC_POL3 = RAJA::loop_exec; + using EXEC_POL4 = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Segs<0>, RAJA::Params<0>>, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> + > + >; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { @@ -183,7 +194,7 @@ void POLYBENCH_GEMVER::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t poly_gemver_lam1 ); - RAJA::kernel_param( + RAJA::kernel_param( RAJA::make_tuple(RAJA::RangeSegment{0, n}, RAJA::RangeSegment{0, n}), RAJA::tuple{0.0}, @@ -197,7 +208,7 @@ void POLYBENCH_GEMVER::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t poly_gemver_lam5 ); - RAJA::kernel_param( + RAJA::kernel_param( RAJA::make_tuple(RAJA::RangeSegment{0, n}, RAJA::RangeSegment{0, n}), RAJA::tuple{0.0}, diff --git a/src/polybench/POLYBENCH_GEMVER.cpp b/src/polybench/POLYBENCH_GEMVER.cpp index 24a3f3d1b..22a4837af 100644 --- a/src/polybench/POLYBENCH_GEMVER.cpp +++ b/src/polybench/POLYBENCH_GEMVER.cpp @@ -79,6 +79,10 @@ POLYBENCH_GEMVER::POLYBENCH_GEMVER(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_GEMVER::~POLYBENCH_GEMVER() diff --git a/src/polybench/POLYBENCH_GEMVER.hpp b/src/polybench/POLYBENCH_GEMVER.hpp index 80c96fa94..dd308fccf 100644 --- a/src/polybench/POLYBENCH_GEMVER.hpp +++ b/src/polybench/POLYBENCH_GEMVER.hpp @@ -18,11 +18,9 @@ /// Note: this part of the kernel is modified to avoid /// excessively large checksums /// for (Index_type i = 0; i < N; i++) { -/// Real_type dot = 0.0; /// for (Index_type j = 0; j < N; j++) { -/// dot += beta * A[j][i] * y[j]; +/// x[i] = x[i] + beta * A[j][i] * y[j]; /// } -/// x[i] = dot; /// } /// /// for (Index_type i = 0; i < N; i++) { @@ -98,7 +96,7 @@ xview(i) += zview(i); #define POLYBENCH_GEMVER_BODY6_RAJA \ - dot = wview(i); + dot = w[i]; #define POLYBENCH_GEMVER_BODY7_RAJA \ dot += alpha * Aview(i,j) * xview(j); @@ -152,6 +150,7 @@ class POLYBENCH_GEMVER : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_GESUMMV-Seq.cpp b/src/polybench/POLYBENCH_GESUMMV-Seq.cpp index c65897e5d..14a86a0fe 100644 --- a/src/polybench/POLYBENCH_GESUMMV-Seq.cpp +++ b/src/polybench/POLYBENCH_GESUMMV-Seq.cpp @@ -93,9 +93,9 @@ void POLYBENCH_GESUMMV::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( using EXEC_POL = RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, // i + RAJA::statement::For<0, RAJA::loop_exec, RAJA::statement::Lambda<0, RAJA::Params<0,1>>, - RAJA::statement::For<1, RAJA::loop_exec, // j + RAJA::statement::For<1, RAJA::loop_exec, RAJA::statement::Lambda<1, RAJA::Segs<0, 1>, RAJA::Params<0,1>> >, RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0,1>> diff --git a/src/polybench/POLYBENCH_GESUMMV.cpp b/src/polybench/POLYBENCH_GESUMMV.cpp index eb527af27..f1f10c645 100644 --- a/src/polybench/POLYBENCH_GESUMMV.cpp +++ b/src/polybench/POLYBENCH_GESUMMV.cpp @@ -59,6 +59,10 @@ POLYBENCH_GESUMMV::POLYBENCH_GESUMMV(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_GESUMMV::~POLYBENCH_GESUMMV() diff --git a/src/polybench/POLYBENCH_GESUMMV.hpp b/src/polybench/POLYBENCH_GESUMMV.hpp index c8f71ee84..00361d757 100644 --- a/src/polybench/POLYBENCH_GESUMMV.hpp +++ b/src/polybench/POLYBENCH_GESUMMV.hpp @@ -98,6 +98,7 @@ class POLYBENCH_GESUMMV : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_HEAT_3D-Seq.cpp b/src/polybench/POLYBENCH_HEAT_3D-Seq.cpp index 4afb06d21..be1feed9c 100644 --- a/src/polybench/POLYBENCH_HEAT_3D-Seq.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D-Seq.cpp @@ -107,6 +107,13 @@ void POLYBENCH_HEAT_3D::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( POLYBENCH_HEAT_3D_VIEWS_RAJA; + auto poly_heat3d_lam1 = [=](Index_type i, Index_type j, Index_type k) { + POLYBENCH_HEAT_3D_BODY1_RAJA; + }; + auto poly_heat3d_lam2 = [=](Index_type i, Index_type j, Index_type k) { + POLYBENCH_HEAT_3D_BODY2_RAJA; + }; + using EXEC_POL = RAJA::KernelPolicy< RAJA::statement::For<0, RAJA::loop_exec, @@ -115,6 +122,13 @@ void POLYBENCH_HEAT_3D::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( RAJA::statement::Lambda<0> > > + >, + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<1> + > + > > >; @@ -127,20 +141,8 @@ void POLYBENCH_HEAT_3D::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( RAJA::RangeSegment{1, N-1}, RAJA::RangeSegment{1, N-1}), - [=](Index_type i, Index_type j, Index_type k) { - POLYBENCH_HEAT_3D_BODY1_RAJA; - } - - ); - - RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{1, N-1}, - RAJA::RangeSegment{1, N-1}, - RAJA::RangeSegment{1, N-1}), - - [=](Index_type i, Index_type j, Index_type k) { - POLYBENCH_HEAT_3D_BODY2_RAJA; - } - + poly_heat3d_lam1, + poly_heat3d_lam2 ); } diff --git a/src/polybench/POLYBENCH_HEAT_3D.cpp b/src/polybench/POLYBENCH_HEAT_3D.cpp index 567192b9a..ec86de900 100644 --- a/src/polybench/POLYBENCH_HEAT_3D.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D.cpp @@ -70,6 +70,10 @@ POLYBENCH_HEAT_3D::POLYBENCH_HEAT_3D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_HEAT_3D::~POLYBENCH_HEAT_3D() diff --git a/src/polybench/POLYBENCH_HEAT_3D.hpp b/src/polybench/POLYBENCH_HEAT_3D.hpp index 81ab06e0e..850efee83 100644 --- a/src/polybench/POLYBENCH_HEAT_3D.hpp +++ b/src/polybench/POLYBENCH_HEAT_3D.hpp @@ -124,6 +124,7 @@ class POLYBENCH_HEAT_3D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); @@ -140,6 +141,8 @@ class POLYBENCH_HEAT_3D : public KernelBase Index_type m_N; Index_type m_tsteps; + Real_type m_factor; + Real_ptr m_A; Real_ptr m_B; Real_ptr m_Ainit; diff --git a/src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp b/src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp index f23ccdf06..0dec5ba7b 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp @@ -18,7 +18,6 @@ namespace rajaperf namespace polybench { - void POLYBENCH_JACOBI_1D::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps= getRunReps(); diff --git a/src/polybench/POLYBENCH_JACOBI_1D.cpp b/src/polybench/POLYBENCH_JACOBI_1D.cpp index f86bb5956..a8aa3e089 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D.cpp @@ -67,6 +67,10 @@ POLYBENCH_JACOBI_1D::POLYBENCH_JACOBI_1D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_JACOBI_1D::~POLYBENCH_JACOBI_1D() diff --git a/src/polybench/POLYBENCH_JACOBI_1D.hpp b/src/polybench/POLYBENCH_JACOBI_1D.hpp index cb3131490..7974d9b47 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D.hpp +++ b/src/polybench/POLYBENCH_JACOBI_1D.hpp @@ -70,6 +70,7 @@ class POLYBENCH_JACOBI_1D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_JACOBI_2D.cpp b/src/polybench/POLYBENCH_JACOBI_2D.cpp index 1b4f9378a..a8d54e751 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D.cpp @@ -69,6 +69,10 @@ POLYBENCH_JACOBI_2D::POLYBENCH_JACOBI_2D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_JACOBI_2D::~POLYBENCH_JACOBI_2D() diff --git a/src/polybench/POLYBENCH_JACOBI_2D.hpp b/src/polybench/POLYBENCH_JACOBI_2D.hpp index a2ba63181..8adb8bef5 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D.hpp +++ b/src/polybench/POLYBENCH_JACOBI_2D.hpp @@ -90,6 +90,7 @@ class POLYBENCH_JACOBI_2D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_MVT.cpp b/src/polybench/POLYBENCH_MVT.cpp index 3354ca97d..3ac9d680f 100644 --- a/src/polybench/POLYBENCH_MVT.cpp +++ b/src/polybench/POLYBENCH_MVT.cpp @@ -62,6 +62,10 @@ POLYBENCH_MVT::POLYBENCH_MVT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } POLYBENCH_MVT::~POLYBENCH_MVT() diff --git a/src/polybench/POLYBENCH_MVT.hpp b/src/polybench/POLYBENCH_MVT.hpp index dce40baf2..63c8b4287 100644 --- a/src/polybench/POLYBENCH_MVT.hpp +++ b/src/polybench/POLYBENCH_MVT.hpp @@ -112,6 +112,7 @@ class POLYBENCH_MVT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/stream/ADD-Seq.cpp b/src/stream/ADD-Seq.cpp index f421d44c2..224c92e7a 100644 --- a/src/stream/ADD-Seq.cpp +++ b/src/stream/ADD-Seq.cpp @@ -17,7 +17,6 @@ namespace rajaperf namespace stream { - void ADD::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps = getRunReps(); diff --git a/src/stream/ADD.cpp b/src/stream/ADD.cpp index 904c0804b..534deee28 100644 --- a/src/stream/ADD.cpp +++ b/src/stream/ADD.cpp @@ -52,6 +52,10 @@ ADD::ADD(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } ADD::~ADD() diff --git a/src/stream/ADD.hpp b/src/stream/ADD.hpp index 07d0dea79..5b144ff1b 100644 --- a/src/stream/ADD.hpp +++ b/src/stream/ADD.hpp @@ -52,6 +52,7 @@ class ADD : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/stream/CMakeLists.txt b/src/stream/CMakeLists.txt index 2122b7867..5b475dc91 100644 --- a/src/stream/CMakeLists.txt +++ b/src/stream/CMakeLists.txt @@ -10,30 +10,35 @@ blt_add_library( NAME stream SOURCES ADD.cpp ADD-Seq.cpp + ADD-StdPar.cpp ADD-Hip.cpp ADD-Cuda.cpp ADD-OMP.cpp ADD-OMPTarget.cpp COPY.cpp COPY-Seq.cpp + COPY-StdPar.cpp COPY-Hip.cpp COPY-Cuda.cpp COPY-OMP.cpp COPY-OMPTarget.cpp DOT.cpp DOT-Seq.cpp + DOT-StdPar.cpp DOT-Hip.cpp DOT-Cuda.cpp DOT-OMP.cpp DOT-OMPTarget.cpp MUL.cpp MUL-Seq.cpp + MUL-StdPar.cpp MUL-Hip.cpp MUL-Cuda.cpp MUL-OMP.cpp MUL-OMPTarget.cpp TRIAD.cpp TRIAD-Seq.cpp + TRIAD-StdPar.cpp TRIAD-Hip.cpp TRIAD-Cuda.cpp TRIAD-OMPTarget.cpp diff --git a/src/stream/COPY.cpp b/src/stream/COPY.cpp index 251208a4d..40fae2467 100644 --- a/src/stream/COPY.cpp +++ b/src/stream/COPY.cpp @@ -52,6 +52,10 @@ COPY::COPY(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } COPY::~COPY() diff --git a/src/stream/COPY.hpp b/src/stream/COPY.hpp index 0f23bfa68..ced0ad20a 100644 --- a/src/stream/COPY.hpp +++ b/src/stream/COPY.hpp @@ -51,6 +51,7 @@ class COPY : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/stream/DOT.cpp b/src/stream/DOT.cpp index 0d9657a8a..d1f701431 100644 --- a/src/stream/DOT.cpp +++ b/src/stream/DOT.cpp @@ -52,6 +52,10 @@ DOT::DOT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } DOT::~DOT() diff --git a/src/stream/DOT.hpp b/src/stream/DOT.hpp index 64d70c630..1a846e747 100644 --- a/src/stream/DOT.hpp +++ b/src/stream/DOT.hpp @@ -51,6 +51,7 @@ class DOT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/stream/MUL.cpp b/src/stream/MUL.cpp index 55eced2b0..38bfe4aca 100644 --- a/src/stream/MUL.cpp +++ b/src/stream/MUL.cpp @@ -52,6 +52,10 @@ MUL::MUL(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } MUL::~MUL() diff --git a/src/stream/MUL.hpp b/src/stream/MUL.hpp index 1e79e17f9..c8fbbda30 100644 --- a/src/stream/MUL.hpp +++ b/src/stream/MUL.hpp @@ -52,6 +52,7 @@ class MUL : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/stream/TRIAD.cpp b/src/stream/TRIAD.cpp index 543b19642..e4064b9fd 100644 --- a/src/stream/TRIAD.cpp +++ b/src/stream/TRIAD.cpp @@ -56,6 +56,10 @@ TRIAD::TRIAD(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } TRIAD::~TRIAD() diff --git a/src/stream/TRIAD.hpp b/src/stream/TRIAD.hpp index 80685ce3c..4426560a1 100644 --- a/src/stream/TRIAD.hpp +++ b/src/stream/TRIAD.hpp @@ -53,6 +53,7 @@ class TRIAD : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); From 4b6d7c9bac12c08ed8d5345609dc2ccd90c58aa6 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 7 Jul 2022 17:26:10 +0300 Subject: [PATCH 054/174] starting over with StdPar because git submodules are trash --- src/algorithm/SORT-StdPar.cpp | 75 ++++ src/algorithm/SORTPAIRS-StdPar.cpp | 101 ++++++ src/apps/DEL_DOT_VEC_2D-StdPar.cpp | 123 +++++++ src/apps/DIFFUSION3DPA-StdPar.cpp | 334 ++++++++++++++++++ src/apps/ENERGY-StdPar.cpp | 198 +++++++++++ src/apps/FIR-StdPar.cpp | 109 ++++++ src/apps/HALOEXCHANGE-StdPar.cpp | 182 ++++++++++ src/apps/HALOEXCHANGE_FUSED-StdPar.cpp | 265 ++++++++++++++ src/apps/LTIMES-StdPar.cpp | 137 +++++++ src/apps/LTIMES_NOVIEW-StdPar.cpp | 131 +++++++ src/apps/MASS3DPA-StdPar.cpp | 231 ++++++++++++ src/apps/PRESSURE-StdPar.cpp | 126 +++++++ src/apps/VOL3D-StdPar.cpp | 111 ++++++ src/basic/DAXPY-StdPar.cpp | 103 ++++++ src/basic/IF_QUAD-StdPar.cpp | 104 ++++++ src/basic/INIT3-StdPar.cpp | 104 ++++++ src/basic/INIT_VIEW1D-StdPar.cpp | 110 ++++++ src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp | 110 ++++++ src/basic/MAT_MAT_SHARED-StdPar.cpp | 254 +++++++++++++ src/basic/MULADDSUB-StdPar.cpp | 103 ++++++ src/basic/NESTED_INIT-StdPar.cpp | 150 ++++++++ src/basic/PI_ATOMIC-StdPar.cpp | 121 +++++++ src/basic/PI_REDUCE-StdPar.cpp | 118 +++++++ src/basic/REDUCE3_INT-StdPar.cpp | 141 ++++++++ src/basic/TRAP_INT-StdPar.cpp | 132 +++++++ src/lcals/DIFF_PREDICT-StdPar.cpp | 103 ++++++ src/lcals/EOS-StdPar.cpp | 103 ++++++ src/lcals/FIRST_DIFF-StdPar.cpp | 103 ++++++ src/lcals/FIRST_MIN-StdPar.cpp | 114 ++++++ src/lcals/FIRST_SUM-StdPar.cpp | 103 ++++++ src/lcals/GEN_LIN_RECUR-StdPar.cpp | 126 +++++++ src/lcals/HYDRO_1D-StdPar.cpp | 104 ++++++ src/lcals/HYDRO_2D-StdPar.cpp | 195 ++++++++++ src/lcals/INT_PREDICT-StdPar.cpp | 104 ++++++ src/lcals/PLANCKIAN-StdPar.cpp | 105 ++++++ src/lcals/TRIDIAG_ELIM-StdPar.cpp | 104 ++++++ src/polybench/POLYBENCH_2MM-StdPar.cpp | 255 +++++++++++++ src/polybench/POLYBENCH_3MM-StdPar.cpp | 331 +++++++++++++++++ src/polybench/POLYBENCH_ADI-StdPar.cpp | 236 +++++++++++++ src/polybench/POLYBENCH_ATAX-StdPar.cpp | 213 +++++++++++ src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp | 226 ++++++++++++ .../POLYBENCH_FLOYD_WARSHALL-StdPar.cpp | 158 +++++++++ src/polybench/POLYBENCH_GEMM-StdPar.cpp | 192 ++++++++++ src/polybench/POLYBENCH_GEMVER-StdPar.cpp | 255 +++++++++++++ src/polybench/POLYBENCH_GESUMMV-StdPar.cpp | 145 ++++++++ src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp | 188 ++++++++++ src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp | 137 +++++++ src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp | 176 +++++++++ src/polybench/POLYBENCH_MVT-StdPar.cpp | 204 +++++++++++ src/stream/ADD-StdPar.cpp | 103 ++++++ src/stream/COPY-StdPar.cpp | 88 +++++ src/stream/DOT-StdPar.cpp | 116 ++++++ src/stream/MUL-StdPar.cpp | 103 ++++++ src/stream/TRIAD-StdPar.cpp | 103 ++++++ 54 files changed, 8166 insertions(+) create mode 100644 src/algorithm/SORT-StdPar.cpp create mode 100644 src/algorithm/SORTPAIRS-StdPar.cpp create mode 100644 src/apps/DEL_DOT_VEC_2D-StdPar.cpp create mode 100644 src/apps/DIFFUSION3DPA-StdPar.cpp create mode 100644 src/apps/ENERGY-StdPar.cpp create mode 100644 src/apps/FIR-StdPar.cpp create mode 100644 src/apps/HALOEXCHANGE-StdPar.cpp create mode 100644 src/apps/HALOEXCHANGE_FUSED-StdPar.cpp create mode 100644 src/apps/LTIMES-StdPar.cpp create mode 100644 src/apps/LTIMES_NOVIEW-StdPar.cpp create mode 100644 src/apps/MASS3DPA-StdPar.cpp create mode 100644 src/apps/PRESSURE-StdPar.cpp create mode 100644 src/apps/VOL3D-StdPar.cpp create mode 100644 src/basic/DAXPY-StdPar.cpp create mode 100644 src/basic/IF_QUAD-StdPar.cpp create mode 100644 src/basic/INIT3-StdPar.cpp create mode 100644 src/basic/INIT_VIEW1D-StdPar.cpp create mode 100644 src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp create mode 100644 src/basic/MAT_MAT_SHARED-StdPar.cpp create mode 100644 src/basic/MULADDSUB-StdPar.cpp create mode 100644 src/basic/NESTED_INIT-StdPar.cpp create mode 100644 src/basic/PI_ATOMIC-StdPar.cpp create mode 100644 src/basic/PI_REDUCE-StdPar.cpp create mode 100644 src/basic/REDUCE3_INT-StdPar.cpp create mode 100644 src/basic/TRAP_INT-StdPar.cpp create mode 100644 src/lcals/DIFF_PREDICT-StdPar.cpp create mode 100644 src/lcals/EOS-StdPar.cpp create mode 100644 src/lcals/FIRST_DIFF-StdPar.cpp create mode 100644 src/lcals/FIRST_MIN-StdPar.cpp create mode 100644 src/lcals/FIRST_SUM-StdPar.cpp create mode 100644 src/lcals/GEN_LIN_RECUR-StdPar.cpp create mode 100644 src/lcals/HYDRO_1D-StdPar.cpp create mode 100644 src/lcals/HYDRO_2D-StdPar.cpp create mode 100644 src/lcals/INT_PREDICT-StdPar.cpp create mode 100644 src/lcals/PLANCKIAN-StdPar.cpp create mode 100644 src/lcals/TRIDIAG_ELIM-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_2MM-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_3MM-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_ADI-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_ATAX-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_GEMM-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_GEMVER-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_GESUMMV-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp create mode 100644 src/polybench/POLYBENCH_MVT-StdPar.cpp create mode 100644 src/stream/ADD-StdPar.cpp create mode 100644 src/stream/COPY-StdPar.cpp create mode 100644 src/stream/DOT-StdPar.cpp create mode 100644 src/stream/MUL-StdPar.cpp create mode 100644 src/stream/TRIAD-StdPar.cpp diff --git a/src/algorithm/SORT-StdPar.cpp b/src/algorithm/SORT-StdPar.cpp new file mode 100644 index 000000000..ba808313e --- /dev/null +++ b/src/algorithm/SORT-StdPar.cpp @@ -0,0 +1,75 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "SORT.hpp" + +#include "RAJA/RAJA.hpp" + +#include +#include + +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void SORT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + SORT_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::sort( std::execution::par_unseq, + STD_SORT_ARGS); + + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::sort(RAJA_SORT_ARGS); + + } + stopTimer(); + + break; + } +#endif + + default : { + std::cout << "\n SORT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace algorithm +} // end namespace rajaperf diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp new file mode 100644 index 000000000..f82b260e5 --- /dev/null +++ b/src/algorithm/SORTPAIRS-StdPar.cpp @@ -0,0 +1,101 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "SORTPAIRS.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include +#include +#include +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + SORTPAIRS_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + using pair_type = std::pair; + + std::vector vector_of_pairs; + vector_of_pairs.reserve(iend-ibegin); + + for (Index_type iemp = ibegin; iemp < iend; ++iemp) { + vector_of_pairs.emplace_back(x[iend*irep + iemp], i[iend*irep + iemp]); + } + + std::sort( std::execution::par_unseq, + vector_of_pairs.begin(), vector_of_pairs.end(), + [](pair_type const& lhs, pair_type const& rhs) { + return lhs.first < rhs.first; + }); + + //for (Index_type iemp = ibegin; iemp < iend; ++iemp) { + std::for_each( //std::execution::par_unseq, + begin, end, + [=](Index_type iemp) { + const pair_type& pair = vector_of_pairs[iemp - ibegin]; + x[iend*irep + iemp] = pair.first; + i[iend*irep + iemp] = pair.second; + }); + + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::sort_pairs(RAJA_SORTPAIRS_ARGS); + + } + stopTimer(); + + break; + } +#endif + + default : { + std::cout << "\n SORTPAIRS : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace algorithm +} // end namespace rajaperf diff --git a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp new file mode 100644 index 000000000..93fde5151 --- /dev/null +++ b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp @@ -0,0 +1,123 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DEL_DOT_VEC_2D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include "AppsData.hpp" + +#include "camp/resource.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void DEL_DOT_VEC_2D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = m_domain->n_real_zones; + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + DEL_DOT_VEC_2D_DATA_SETUP; + + NDSET2D(m_domain->jp, x,x1,x2,x3,x4) ; + NDSET2D(m_domain->jp, y,y1,y2,y3,y4) ; + NDSET2D(m_domain->jp, xdot,fx1,fx2,fx3,fx4) ; + NDSET2D(m_domain->jp, ydot,fy1,fy2,fy3,fy4) ; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type ii) { + DEL_DOT_VEC_2D_BODY_INDEX; + DEL_DOT_VEC_2D_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto deldotvec2d_base_lam = [=](Index_type ii) { + DEL_DOT_VEC_2D_BODY_INDEX; + DEL_DOT_VEC_2D_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type ii) { + deldotvec2d_base_lam(ii); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + camp::resources::Resource working_res{camp::resources::Host()}; + RAJA::TypedListSegment zones(m_domain->real_zones, + m_domain->n_real_zones, + working_res); + + auto deldotvec2d_lam = [=](Index_type i) { + DEL_DOT_VEC_2D_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall(zones, deldotvec2d_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n DEL_DOT_VEC_2D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/DIFFUSION3DPA-StdPar.cpp b/src/apps/DIFFUSION3DPA-StdPar.cpp new file mode 100644 index 000000000..38ee4da02 --- /dev/null +++ b/src/apps/DIFFUSION3DPA-StdPar.cpp @@ -0,0 +1,334 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +// Uncomment to add compiler directives for loop unrolling +//#define USE_RAJAPERF_UNROLL + +#include "DIFFUSION3DPA.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf { +namespace apps { + +void DIFFUSION3DPA::runStdParVariant(VariantID vid, size_t tune_idx) { + +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + DIFFUSION3DPA_DATA_SETUP; + + auto begin = counting_iterator(0); + auto end = counting_iterator(NE); + + switch (vid) { + + case Base_StdPar: { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](int e) { + + DIFFUSION3DPA_0_CPU; + + CPU_FOREACH(dz, z, DPA_D1D) { + CPU_FOREACH(dy, y, DPA_D1D) { + CPU_FOREACH(dx, x, DPA_D1D) { + DIFFUSION3DPA_1; + } + } + } + + CPU_FOREACH(dy, y, DPA_D1D) { + CPU_FOREACH(qx, x, DPA_Q1D) { + DIFFUSION3DPA_2; + } + } + + CPU_FOREACH(dz, z, DPA_D1D) { + CPU_FOREACH(dy, y, DPA_D1D) { + CPU_FOREACH(qx, x, DPA_Q1D) { + DIFFUSION3DPA_3; + } + } + } + + CPU_FOREACH(dz, z, DPA_D1D) { + CPU_FOREACH(qy, y, DPA_Q1D) { + CPU_FOREACH(qx, x, DPA_Q1D) { + DIFFUSION3DPA_4; + } + } + } + + CPU_FOREACH(qz, z, DPA_Q1D) { + CPU_FOREACH(qy, y, DPA_Q1D) { + CPU_FOREACH(qx, x, DPA_Q1D) { + DIFFUSION3DPA_5; + } + } + } + + CPU_FOREACH(d, y, DPA_D1D) { + CPU_FOREACH(q, x, DPA_Q1D) { + DIFFUSION3DPA_6; + } + } + + CPU_FOREACH(qz, z, DPA_Q1D) { + CPU_FOREACH(qy, y, DPA_Q1D) { + CPU_FOREACH(dx, x, DPA_D1D) { + DIFFUSION3DPA_7; + } + } + } + + CPU_FOREACH(qz, z, DPA_Q1D) { + CPU_FOREACH(dy, y, DPA_D1D) { + CPU_FOREACH(dx, x, DPA_D1D) { + DIFFUSION3DPA_8; + } + } + } + + CPU_FOREACH(dz, z, DPA_D1D) { + CPU_FOREACH(dy, y, DPA_D1D) { + CPU_FOREACH(dx, x, DPA_D1D) { + DIFFUSION3DPA_9; + } + } + } + + }); // element loop + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_Seq: { + + // Currently Teams requires two policies if compiled with a device + using launch_policy = RAJA::expt::LaunchPolicy; + + using outer_x = RAJA::expt::LoopPolicy; + + using inner_x = RAJA::expt::LoopPolicy; + + using inner_y = RAJA::expt::LoopPolicy; + + using inner_z = RAJA::expt::LoopPolicy; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // Grid is empty as the host does not need a compute grid to be specified + RAJA::expt::launch( + RAJA::expt::Grid(), + [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, NE), + [&](int e) { + + DIFFUSION3DPA_0_CPU; + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dx) { + + DIFFUSION3DPA_1; + + } // lambda (dx) + ); // RAJA::expt::loop + } // lambda (dy) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, 1), + [&](int RAJA_UNUSED_ARG(dz)) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int qx) { + + DIFFUSION3DPA_2; + + } // lambda (qx) + ); // RAJA::expt::loop + } // lambda (dy) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int qx) { + + DIFFUSION3DPA_3; + + } // lambda (qx) + ); // RAJA::expt::loop + } // lambda (dy) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int qy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int qx) { + + DIFFUSION3DPA_4; + + } // lambda (qx) + ); // RAJA::expt::loop + } // lambda (qy) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int qz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int qy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int qx) { + + DIFFUSION3DPA_5; + + } // lambda (qx) + ); // RAJA::expt::loop + } // lambda (qy) + ); //RAJA::expt::loop + } // lambda (qz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, 1), + [&](int RAJA_UNUSED_ARG(dz)) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int d) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int q) { + + DIFFUSION3DPA_6; + + } // lambda (q) + ); // RAJA::expt::loop + } // lambda (d) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int qz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int qy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dx) { + + DIFFUSION3DPA_7; + + } // lambda (dx) + ); // RAJA::expt::loop + } // lambda (qy) + ); //RAJA::expt::loop + } // lambda (qz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), + [&](int qz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dx) { + + DIFFUSION3DPA_8; + + } // lambda (dx) + ); // RAJA::expt::loop + } // lambda (dy) + ); //RAJA::expt::loop + } // lambda (qz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), + [&](int dx) { + + DIFFUSION3DPA_9; + + } // lambda (dx) + ); // RAJA::expt::loop + } // lambda (dy) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + } // lambda (e) + ); // RAJA::expt::loop + + } // outer lambda (ctx) + ); // RAJA::expt::launch + } // loop over kernel reps + stopTimer(); + + return; + } +#endif // RUN_RAJA_STDPAR + + default: + std::cout << "\n DIFFUSION3DPA : Unknown StdPar variant id = " << vid + << std::endl; + } + +#else + RAJA_UNUSED_VAR(vid); +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/ENERGY-StdPar.cpp b/src/apps/ENERGY-StdPar.cpp new file mode 100644 index 000000000..fceadd05e --- /dev/null +++ b/src/apps/ENERGY-StdPar.cpp @@ -0,0 +1,198 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "ENERGY.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void ENERGY::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + ENERGY_DATA_SETUP; + + auto energy_lam1 = [=](Index_type i) { + ENERGY_BODY1; + }; + auto energy_lam2 = [=](Index_type i) { + ENERGY_BODY2; + }; + auto energy_lam3 = [=](Index_type i) { + ENERGY_BODY3; + }; + auto energy_lam4 = [=](Index_type i) { + ENERGY_BODY4; + }; + auto energy_lam5 = [=](Index_type i) { + ENERGY_BODY5; + }; + auto energy_lam6 = [=](Index_type i) { + ENERGY_BODY6; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + ENERGY_BODY1; + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + ENERGY_BODY2; + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + ENERGY_BODY3; + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + ENERGY_BODY4; + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + ENERGY_BODY5; + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + ENERGY_BODY6; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + energy_lam1(i); + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + energy_lam2(i); + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + energy_lam3(i); + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + energy_lam4(i); + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + energy_lam5(i); + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + energy_lam6(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::region( [=]() { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), energy_lam1); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), energy_lam2); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), energy_lam3); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), energy_lam4); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), energy_lam5); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), energy_lam6); + + }); // end sequential region (for single-source code) + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n ENERGY : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/FIR-StdPar.cpp b/src/apps/FIR-StdPar.cpp new file mode 100644 index 000000000..782a36321 --- /dev/null +++ b/src/apps/FIR-StdPar.cpp @@ -0,0 +1,109 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "FIR.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void FIR::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize() - m_coefflen; + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + FIR_COEFF; + + FIR_DATA_SETUP; + + Real_type coeff[FIR_COEFFLEN]; + std::copy(std::begin(coeff_array), std::end(coeff_array), std::begin(coeff)); + + auto fir_lam = [=](Index_type i) { + FIR_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + FIR_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + fir_lam(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), fir_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n FIR : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/HALOEXCHANGE-StdPar.cpp b/src/apps/HALOEXCHANGE-StdPar.cpp new file mode 100644 index 000000000..11e551fda --- /dev/null +++ b/src/apps/HALOEXCHANGE-StdPar.cpp @@ -0,0 +1,182 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "HALOEXCHANGE.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + HALOEXCHANGE_DATA_SETUP; + + auto begin = counting_iterator(0); + auto end = counting_iterator(num_neighbors); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = pack_index_lists[l]; + Index_type len = pack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + for (Index_type i = 0; i < len; i++) { + HALOEXCHANGE_PACK_BODY; + } + buffer += len; + } + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = unpack_index_lists[l]; + Index_type len = unpack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + for (Index_type i = 0; i < len; i++) { + HALOEXCHANGE_UNPACK_BODY; + } + buffer += len; + } + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = pack_index_lists[l]; + Index_type len = pack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + auto haloexchange_pack_base_lam = [=](Index_type i) { + HALOEXCHANGE_PACK_BODY; + }; + for (Index_type i = 0; i < len; i++) { + haloexchange_pack_base_lam(i); + } + buffer += len; + } + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = unpack_index_lists[l]; + Index_type len = unpack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + auto haloexchange_unpack_base_lam = [=](Index_type i) { + HALOEXCHANGE_UNPACK_BODY; + }; + for (Index_type i = 0; i < len; i++) { + haloexchange_unpack_base_lam(i); + } + buffer += len; + } + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + using EXEC_POL = RAJA::loop_exec; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type l = 0; l < num_neighbors; ++l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = pack_index_lists[l]; + Index_type len = pack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + auto haloexchange_pack_base_lam = [=](Index_type i) { + HALOEXCHANGE_PACK_BODY; + }; + RAJA::forall( + RAJA::TypedRangeSegment(0, len), + haloexchange_pack_base_lam ); + buffer += len; + } + } + + for (Index_type l = 0; l < num_neighbors; ++l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = unpack_index_lists[l]; + Index_type len = unpack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + auto haloexchange_unpack_base_lam = [=](Index_type i) { + HALOEXCHANGE_UNPACK_BODY; + }; + RAJA::forall( + RAJA::TypedRangeSegment(0, len), + haloexchange_unpack_base_lam ); + buffer += len; + } + } + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n HALOEXCHANGE : Unknown variant id = " << vid << std::endl; + } + + } +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp new file mode 100644 index 000000000..86967eac6 --- /dev/null +++ b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp @@ -0,0 +1,265 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "HALOEXCHANGE_FUSED.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void HALOEXCHANGE_FUSED::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + HALOEXCHANGE_FUSED_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + HALOEXCHANGE_FUSED_MANUAL_FUSER_SETUP; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Index_type pack_index = 0; + + for (Index_type l = 0; l < num_neighbors; ++l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = pack_index_lists[l]; + Index_type len = pack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + pack_ptr_holders[pack_index] = ptr_holder{buffer, list, var}; + pack_lens[pack_index] = len; + pack_index += 1; + buffer += len; + } + } + + auto begin = counting_iterator(0); + auto end = counting_iterator(pack_index); + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type j) { + Real_ptr buffer = pack_ptr_holders[j].buffer; + Int_ptr list = pack_ptr_holders[j].list; + Real_ptr var = pack_ptr_holders[j].var; + Index_type len = pack_lens[j]; + for (Index_type i = 0; i < len; i++) { + HALOEXCHANGE_FUSED_PACK_BODY; + } + }); + + Index_type unpack_index = 0; + + for (Index_type l = 0; l < num_neighbors; ++l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = unpack_index_lists[l]; + Index_type len = unpack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + unpack_ptr_holders[unpack_index] = ptr_holder{buffer, list, var}; + unpack_lens[unpack_index] = len; + unpack_index += 1; + buffer += len; + } + } + + auto begin2 = counting_iterator(0); + auto end2 = counting_iterator(unpack_index); + std::for_each( std::execution::par_unseq, + begin2, end2, + [=](Index_type j) { + Real_ptr buffer = unpack_ptr_holders[j].buffer; + Int_ptr list = unpack_ptr_holders[j].list; + Real_ptr var = unpack_ptr_holders[j].var; + Index_type len = unpack_lens[j]; + for (Index_type i = 0; i < len; i++) { + HALOEXCHANGE_FUSED_UNPACK_BODY; + } + }); + + } + stopTimer(); + + HALOEXCHANGE_FUSED_MANUAL_FUSER_TEARDOWN; + + break; + } + + case Lambda_StdPar : { + + HALOEXCHANGE_FUSED_MANUAL_LAMBDA_FUSER_SETUP; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Index_type pack_index = 0; + + for (Index_type l = 0; l < num_neighbors; ++l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = pack_index_lists[l]; + Index_type len = pack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + new(&pack_lambdas[pack_index]) pack_lambda_type(make_pack_lambda(buffer, list, var)); + pack_lens[pack_index] = len; + pack_index += 1; + buffer += len; + } + } + auto begin = counting_iterator(0); + auto end = counting_iterator(pack_index); + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type j) { + auto pack_lambda = pack_lambdas[j]; + Index_type len = pack_lens[j]; + for (Index_type i = 0; i < len; i++) { + pack_lambda(i); + } + }); + + Index_type unpack_index = 0; + + for (Index_type l = 0; l < num_neighbors; ++l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = unpack_index_lists[l]; + Index_type len = unpack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + new(&unpack_lambdas[unpack_index]) unpack_lambda_type(make_unpack_lambda(buffer, list, var)); + unpack_lens[unpack_index] = len; + unpack_index += 1; + buffer += len; + } + } + auto begin2 = counting_iterator(0); + auto end2 = counting_iterator(unpack_index); + std::for_each( std::execution::par_unseq, + begin2, end2, + [=](Index_type j) { + //for (Index_type j = 0; j < unpack_index; j++) { + auto unpack_lambda = unpack_lambdas[j]; + Index_type len = unpack_lens[j]; + for (Index_type i = 0; i < len; i++) { + unpack_lambda(i); + } + }); + + } + stopTimer(); + + HALOEXCHANGE_FUSED_MANUAL_LAMBDA_FUSER_TEARDOWN; + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + using AllocatorHolder = RAJAPoolAllocatorHolder< + RAJA::basic_mempool::MemPool>; + using Allocator = AllocatorHolder::Allocator; + + AllocatorHolder allocatorHolder; + + using workgroup_policy = RAJA::WorkGroupPolicy < + RAJA::loop_work, + RAJA::ordered, + RAJA::constant_stride_array_of_objects >; + + using workpool = RAJA::WorkPool< workgroup_policy, + Index_type, + RAJA::xargs<>, + Allocator >; + + using workgroup = RAJA::WorkGroup< workgroup_policy, + Index_type, + RAJA::xargs<>, + Allocator >; + + using worksite = RAJA::WorkSite< workgroup_policy, + Index_type, + RAJA::xargs<>, + Allocator >; + + workpool pool_pack (allocatorHolder.template getAllocator()); + workpool pool_unpack(allocatorHolder.template getAllocator()); + pool_pack.reserve(num_neighbors * num_vars, 1024ull*1024ull); + pool_unpack.reserve(num_neighbors * num_vars, 1024ull*1024ull); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type l = 0; l < num_neighbors; ++l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = pack_index_lists[l]; + Index_type len = pack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + auto haloexchange_fused_pack_base_lam = [=](Index_type i) { + HALOEXCHANGE_FUSED_PACK_BODY; + }; + pool_pack.enqueue( + RAJA::TypedRangeSegment(0, len), + haloexchange_fused_pack_base_lam ); + buffer += len; + } + } + workgroup group_pack = pool_pack.instantiate(); + worksite site_pack = group_pack.run(); + + for (Index_type l = 0; l < num_neighbors; ++l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = unpack_index_lists[l]; + Index_type len = unpack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + auto haloexchange_fused_unpack_base_lam = [=](Index_type i) { + HALOEXCHANGE_FUSED_UNPACK_BODY; + }; + pool_unpack.enqueue( + RAJA::TypedRangeSegment(0, len), + haloexchange_fused_unpack_base_lam ); + buffer += len; + } + } + workgroup group_unpack = pool_unpack.instantiate(); + worksite site_unpack = group_unpack.run(); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n HALOEXCHANGE_FUSED : Unknown variant id = " << vid << std::endl; + } + + } +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/LTIMES-StdPar.cpp b/src/apps/LTIMES-StdPar.cpp new file mode 100644 index 000000000..59422d859 --- /dev/null +++ b/src/apps/LTIMES-StdPar.cpp @@ -0,0 +1,137 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "LTIMES.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void LTIMES::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + LTIMES_DATA_SETUP; + + auto begin = counting_iterator(0); + auto end = counting_iterator(num_z); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type z) { + for (Index_type g = 0; g < num_g; ++g ) { + for (Index_type m = 0; m < num_m; ++m ) { + for (Index_type d = 0; d < num_d; ++d ) { + LTIMES_BODY; + } + } + } + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto ltimes_base_lam = [=](Index_type d, Index_type z, + Index_type g, Index_type m) { + LTIMES_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type z) { + for (Index_type g = 0; g < num_g; ++g ) { + for (Index_type m = 0; m < num_m; ++m ) { + for (Index_type d = 0; d < num_d; ++d ) { + ltimes_base_lam(d, z, g, m); + } + } + } + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + LTIMES_VIEWS_RANGES_RAJA; + + auto ltimes_lam = [=](ID d, IZ z, IG g, IM m) { + LTIMES_BODY_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<1, RAJA::loop_exec, // z + RAJA::statement::For<2, RAJA::loop_exec, // g + RAJA::statement::For<3, RAJA::loop_exec, // m + RAJA::statement::For<0, RAJA::loop_exec, // d + RAJA::statement::Lambda<0> + > + > + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel( RAJA::make_tuple(IDRange(0, num_d), + IZRange(0, num_z), + IGRange(0, num_g), + IMRange(0, num_m)), + ltimes_lam + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n LTIMES : Unknown variant id = " << vid << std::endl; + } + + } +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/LTIMES_NOVIEW-StdPar.cpp b/src/apps/LTIMES_NOVIEW-StdPar.cpp new file mode 100644 index 000000000..4039f4ffc --- /dev/null +++ b/src/apps/LTIMES_NOVIEW-StdPar.cpp @@ -0,0 +1,131 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "LTIMES_NOVIEW.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void LTIMES_NOVIEW::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + LTIMES_NOVIEW_DATA_SETUP; + + auto begin = counting_iterator(0); + auto end = counting_iterator(num_z); + + auto ltimesnoview_lam = [=](Index_type d, Index_type z, + Index_type g, Index_type m) { + LTIMES_NOVIEW_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type z) { + for (Index_type g = 0; g < num_g; ++g ) { + for (Index_type m = 0; m < num_m; ++m ) { + for (Index_type d = 0; d < num_d; ++d ) { + LTIMES_NOVIEW_BODY; + } + } + } + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type z) { + for (Index_type g = 0; g < num_g; ++g ) { + for (Index_type m = 0; m < num_m; ++m ) { + for (Index_type d = 0; d < num_d; ++d ) { + ltimesnoview_lam(d, z, g, m); + } + } + } + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<1, RAJA::loop_exec, // z + RAJA::statement::For<2, RAJA::loop_exec, // g + RAJA::statement::For<3, RAJA::loop_exec, // m + RAJA::statement::For<0, RAJA::loop_exec, // d + RAJA::statement::Lambda<0> + > + > + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment(0, num_d), + RAJA::RangeSegment(0, num_z), + RAJA::RangeSegment(0, num_g), + RAJA::RangeSegment(0, num_m)), + ltimesnoview_lam + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n LTIMES_NOVIEW : Unknown variant id = " << vid << std::endl; + } + + } +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/MASS3DPA-StdPar.cpp b/src/apps/MASS3DPA-StdPar.cpp new file mode 100644 index 000000000..8c8a6a328 --- /dev/null +++ b/src/apps/MASS3DPA-StdPar.cpp @@ -0,0 +1,231 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MASS3DPA.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf { +namespace apps { + +//#define USE_RAJA_UNROLL +#define RAJA_DIRECT_PRAGMA(X) _Pragma(#X) +#if defined(USE_RAJA_UNROLL) +#define RAJA_UNROLL(N) RAJA_DIRECT_PRAGMA(unroll(N)) +#else +#define RAJA_UNROLL(N) +#endif +#define CPU_FOREACH(i, k, N) for (int i = 0; i < N; i++) + +void MASS3DPA::runStdParVariant(VariantID vid, size_t tune_idx) { +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + MASS3DPA_DATA_SETUP; + + switch (vid) { + + case Base_StdPar: { + + auto begin = counting_iterator(0); + auto end = counting_iterator((int)NE); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](int e) { + + MASS3DPA_0_CPU + + CPU_FOREACH(dy, y, MPA_D1D) { + CPU_FOREACH(dx, x, MPA_D1D){ + MASS3DPA_1 + } + CPU_FOREACH(dx, x, MPA_Q1D) { + MASS3DPA_2 + } + } + + CPU_FOREACH(dy, y, MPA_D1D) { + CPU_FOREACH(qx, x, MPA_Q1D) { + MASS3DPA_3 + } + } + + CPU_FOREACH(qy, y, MPA_Q1D) { + CPU_FOREACH(qx, x, MPA_Q1D) { + MASS3DPA_4 + } + } + + CPU_FOREACH(qy, y, MPA_Q1D) { + CPU_FOREACH(qx, x, MPA_Q1D) { + MASS3DPA_5 + } + } + + CPU_FOREACH(d, y, MPA_D1D) { + CPU_FOREACH(q, x, MPA_Q1D) { + MASS3DPA_6 + } + } + + CPU_FOREACH(qy, y, MPA_Q1D) { + CPU_FOREACH(dx, x, MPA_D1D) { + MASS3DPA_7 + } + } + + CPU_FOREACH(dy, y, MPA_D1D) { + CPU_FOREACH(dx, x, MPA_D1D) { + MASS3DPA_8 + } + } + + CPU_FOREACH(dy, y, MPA_D1D) { + CPU_FOREACH(dx, x, MPA_D1D) { + MASS3DPA_9 + } + } + + }); // element loop + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar: { + + //Currently Teams requires two policies if compiled with a device + using launch_policy = RAJA::expt::LaunchPolicy; + + using outer_x = RAJA::expt::LoopPolicy; + + using inner_x = RAJA::expt::LoopPolicy; + + using inner_y = RAJA::expt::LoopPolicy; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::expt::launch( + RAJA::expt::HOST, RAJA::expt::Resources(), + [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, NE), [&](int e) { + + MASS3DPA_0_CPU + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { + MASS3DPA_1 + }); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int dx) { + MASS3DPA_2 + }); + }); + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { + MASS3DPA_3 + }); + }); + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { + MASS3DPA_4 + }); + }); + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { + MASS3DPA_5 + }); + }); + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int d) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int q) { + MASS3DPA_6 + }); + }); + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { + MASS3DPA_7 + }); + }); + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { + MASS3DPA_8 + }); + }); + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { + MASS3DPA_9 + }); + }); + }); + }); + } + stopTimer(); + + return; + } +#endif // RUN_RAJA_STDPAR + + default: + std::cout << "\n MASS3DPA : Unknown StdPar variant id = " << vid << std::endl; + } +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/PRESSURE-StdPar.cpp b/src/apps/PRESSURE-StdPar.cpp new file mode 100644 index 000000000..cc1eb2c0a --- /dev/null +++ b/src/apps/PRESSURE-StdPar.cpp @@ -0,0 +1,126 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "PRESSURE.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void PRESSURE::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + PRESSURE_DATA_SETUP; + + auto pressure_lam1 = [=](Index_type i) { + PRESSURE_BODY1; + }; + auto pressure_lam2 = [=](Index_type i) { + PRESSURE_BODY2; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + PRESSURE_BODY1; + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + PRESSURE_BODY2; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + pressure_lam1(i); + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + pressure_lam2(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::region( [=]() { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), pressure_lam1); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), pressure_lam2); + + }); // end sequential region (for single-source code) + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n PRESSURE : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/VOL3D-StdPar.cpp b/src/apps/VOL3D-StdPar.cpp new file mode 100644 index 000000000..1997e95cf --- /dev/null +++ b/src/apps/VOL3D-StdPar.cpp @@ -0,0 +1,111 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "VOL3D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "AppsData.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void VOL3D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = m_domain->fpz; + const Index_type iend = m_domain->lpz+1; + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + VOL3D_DATA_SETUP; + + NDPTRSET(m_domain->jp, m_domain->kp, x,x0,x1,x2,x3,x4,x5,x6,x7) ; + NDPTRSET(m_domain->jp, m_domain->kp, y,y0,y1,y2,y3,y4,y5,y6,y7) ; + NDPTRSET(m_domain->jp, m_domain->kp, z,z0,z1,z2,z3,z4,z5,z6,z7) ; + + auto vol3d_lam = [=](Index_type i) { + VOL3D_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + VOL3D_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + vol3d_lam(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), vol3d_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n VOL3D : Unknown variant id = " << vid << std::endl; + } + } + +#else + RAJA_UNUSED_VAR(vid); +#endif +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/basic/DAXPY-StdPar.cpp b/src/basic/DAXPY-StdPar.cpp new file mode 100644 index 000000000..61ed338ec --- /dev/null +++ b/src/basic/DAXPY-StdPar.cpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DAXPY.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void DAXPY::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + DAXPY_DATA_SETUP; + + auto daxpy_lam = [=](Index_type i) { + DAXPY_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + DAXPY_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + daxpy_lam(i); + }); + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), daxpy_lam); + + } + stopTimer(); + + break; + } +#endif + + default : { + std::cout << "\n DAXPY : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/IF_QUAD-StdPar.cpp b/src/basic/IF_QUAD-StdPar.cpp new file mode 100644 index 000000000..3c86353ef --- /dev/null +++ b/src/basic/IF_QUAD-StdPar.cpp @@ -0,0 +1,104 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "IF_QUAD.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void IF_QUAD::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + IF_QUAD_DATA_SETUP; + + auto ifquad_lam = [=](Index_type i) { + IF_QUAD_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + IF_QUAD_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + ifquad_lam(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), ifquad_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n IF_QUAD : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/INIT3-StdPar.cpp b/src/basic/INIT3-StdPar.cpp new file mode 100644 index 000000000..7105fc9d3 --- /dev/null +++ b/src/basic/INIT3-StdPar.cpp @@ -0,0 +1,104 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INIT3.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void INIT3::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + INIT3_DATA_SETUP; + + auto init3_lam = [=](Index_type i) { + INIT3_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + INIT3_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + init3_lam(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), init3_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n INIT3 : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/INIT_VIEW1D-StdPar.cpp b/src/basic/INIT_VIEW1D-StdPar.cpp new file mode 100644 index 000000000..c79d29b97 --- /dev/null +++ b/src/basic/INIT_VIEW1D-StdPar.cpp @@ -0,0 +1,110 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INIT_VIEW1D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void INIT_VIEW1D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + INIT_VIEW1D_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + INIT_VIEW1D_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto initview1d_base_lam = [=](Index_type i) { + INIT_VIEW1D_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + initview1d_base_lam(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + INIT_VIEW1D_VIEW_RAJA; + + auto initview1d_lam = [=](Index_type i) { + INIT_VIEW1D_BODY_RAJA; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), initview1d_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n INIT_VIEW1D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp new file mode 100644 index 000000000..4014ccacd --- /dev/null +++ b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp @@ -0,0 +1,110 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INIT_VIEW1D_OFFSET.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void INIT_VIEW1D_OFFSET::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 1; + const Index_type iend = getActualProblemSize()+1; + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + INIT_VIEW1D_OFFSET_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + INIT_VIEW1D_OFFSET_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto initview1doffset_base_lam = [=](Index_type i) { + INIT_VIEW1D_OFFSET_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + initview1doffset_base_lam(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + INIT_VIEW1D_OFFSET_VIEW_RAJA; + + auto initview1doffset_lam = [=](Index_type i) { + INIT_VIEW1D_OFFSET_BODY_RAJA; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), initview1doffset_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n INIT_VIEW1D_OFFSET : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/MAT_MAT_SHARED-StdPar.cpp b/src/basic/MAT_MAT_SHARED-StdPar.cpp new file mode 100644 index 000000000..02cb8622d --- /dev/null +++ b/src/basic/MAT_MAT_SHARED-StdPar.cpp @@ -0,0 +1,254 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MAT_MAT_SHARED.hpp" + +#include + +namespace rajaperf { +namespace basic { + +void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) { + + const Index_type run_reps = getRunReps(); + const Index_type N = m_N; + + MAT_MAT_SHARED_DATA_SETUP; + const Index_type Nx = RAJA_DIVIDE_CEILING_INT(N, TL_SZ); + const Index_type Ny = RAJA_DIVIDE_CEILING_INT(N, TL_SZ); + + switch (vid) { + + case Base_StdPar: { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type by = 0; by < Ny; ++by) { + for (Index_type bx = 0; bx < Nx; ++bx) { + + //Work around for when compiling with CLANG and HIP + //See notes in MAT_MAT_SHARED.hpp + MAT_MAT_SHARED_BODY_0_CLANG_HIP_CPU(TL_SZ) + + for (Index_type ty = 0; ty < TL_SZ; ++ty) { + for (Index_type tx = 0; tx < TL_SZ; ++tx) { + MAT_MAT_SHARED_BODY_1(TL_SZ) + } + } + + for (Index_type k = 0; k < (TL_SZ + N - 1) / TL_SZ; ++k) { + + for (Index_type ty = 0; ty < TL_SZ; ++ty) { + for (Index_type tx = 0; tx < TL_SZ; ++tx) { + MAT_MAT_SHARED_BODY_2(TL_SZ) + } + } + + for (Index_type ty = 0; ty < TL_SZ; ++ty) { + for (Index_type tx = 0; tx < TL_SZ; ++tx) { + MAT_MAT_SHARED_BODY_3(TL_SZ) + } + } + + } // Sequential loop + + for (Index_type ty = 0; ty < TL_SZ; ++ty) { + for (Index_type tx = 0; tx < TL_SZ; ++tx) { + MAT_MAT_SHARED_BODY_4(TL_SZ) + } + } + } + } + + } // number of iterations + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case Lambda_StdPar: { + + + startTimer(); + for (Index_type irep = 0; irep < run_reps; ++irep) { + + auto outer_y = [&](Index_type by) { + auto outer_x = [&](Index_type bx) { + + MAT_MAT_SHARED_BODY_0_CLANG_HIP_CPU(TL_SZ) + + auto inner_y_1 = [&](Index_type ty) { + auto inner_x_1 = [&](Index_type tx) { MAT_MAT_SHARED_BODY_1(TL_SZ) }; + + for (Index_type tx = 0; tx < TL_SZ; ++tx) { + if (tx < TL_SZ) + inner_x_1(tx); + } + }; + + for (Index_type ty = 0; ty < TL_SZ; ++ty) { + if (ty < TL_SZ) + inner_y_1(ty); + } + + for (Index_type k = 0; k < (TL_SZ + N - 1) / TL_SZ; ++k) { + + auto inner_y_2 = [&](Index_type ty) { + auto inner_x_2 = [&](Index_type tx) { MAT_MAT_SHARED_BODY_2(TL_SZ) }; + + for (Index_type tx = 0; tx < TL_SZ; ++tx) { + inner_x_2(tx); + } + }; + + for (Index_type ty = 0; ty < TL_SZ; ++ty) { + inner_y_2(ty); + } + + auto inner_y_3 = [&](Index_type ty) { + auto inner_x_3 = [&](Index_type tx) { MAT_MAT_SHARED_BODY_3(TL_SZ) }; + + for (Index_type tx = 0; tx < TL_SZ; ++tx) { + inner_x_3(tx); + } + }; + + for (Index_type ty = 0; ty < TL_SZ; ++ty) { + inner_y_3(ty); + } + } + + auto inner_y_4 = [&](Index_type ty) { + auto inner_x_4 = [&](Index_type tx) { MAT_MAT_SHARED_BODY_4(TL_SZ) }; + + for (Index_type tx = 0; tx < TL_SZ; ++tx) { + inner_x_4(tx); + } + }; + + for (Index_type ty = 0; ty < TL_SZ; ++ty) { + inner_y_4(ty); + } + }; // outer_x + + for (Index_type bx = 0; bx < Nx; ++bx) { + outer_x(bx); + } + }; + + for (Index_type by = 0; by < Ny; ++by) { + outer_y(by); + } + + } // irep + stopTimer(); + + break; + } + + case RAJA_Sq: { + + using launch_policy = RAJA::expt::LaunchPolicy; + + using outer_x = RAJA::expt::LoopPolicy; + + using outer_y = RAJA::expt::LoopPolicy; + + using inner_x = RAJA::expt::LoopPolicy; + + using inner_y = RAJA::expt::LoopPolicy; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + //Grid is empty as the host does not need a compute grid to be specified + RAJA::expt::launch(RAJA::expt::Grid(), + [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, Ny), + [&](Index_type by) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, Nx), + [&](Index_type bx) { + + MAT_MAT_SHARED_BODY_0(TL_SZ) + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + + [&](Index_type ty) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + [&](Index_type tx) { + MAT_MAT_SHARED_BODY_1(TL_SZ) + } + ); // RAJA::expt::loop + } + ); // RAJA::expt::loop + + for (Index_type k = 0; k < (TL_SZ + N - 1) / TL_SZ; k++) { + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + [&](Index_type ty) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + [&](Index_type tx) { + MAT_MAT_SHARED_BODY_2(TL_SZ) + } + ); // RAJA::expt::loop + } + ); // RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + [&](Index_type ty) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + [&](Index_type tx) { + MAT_MAT_SHARED_BODY_3(TL_SZ) + } + ); // RAJA::expt::loop + } + ); // RAJA::expt::loop + + ctx.teamSync(); + + } // for (k) + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + [&](Index_type ty) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), + [&](Index_type tx) { + MAT_MAT_SHARED_BODY_4(TL_SZ) + } + ); // RAJA::expt::loop + } + ); // RAJA::expt::loop + + } // lambda (bx) + ); // RAJA::expt::loop + } // lambda (by) + ); // RAJA::expt::loop + + } // outer lambda (ctx) + ); // RAJA::expt::launch + + } // loop over kernel reps + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default: { + std::cout << "\n MAT_MAT_SHARED : Unknown variant id = " << vid + << std::endl; + } + } +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/MULADDSUB-StdPar.cpp b/src/basic/MULADDSUB-StdPar.cpp new file mode 100644 index 000000000..e86287d75 --- /dev/null +++ b/src/basic/MULADDSUB-StdPar.cpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MULADDSUB.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void MULADDSUB::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + MULADDSUB_DATA_SETUP; + + auto mas_lam = [=](Index_type i) { + MULADDSUB_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + MULADDSUB_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + mas_lam(i); + }); + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), mas_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n MULADDSUB : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/NESTED_INIT-StdPar.cpp b/src/basic/NESTED_INIT-StdPar.cpp new file mode 100644 index 000000000..a37a88dda --- /dev/null +++ b/src/basic/NESTED_INIT-StdPar.cpp @@ -0,0 +1,150 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "NESTED_INIT.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void NESTED_INIT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + NESTED_INIT_DATA_SETUP; + +#ifdef USE_STDPAR_COLLAPSE + auto begin = counting_iterator(0); + auto end = counting_iterator(ni*nj*nk); +#else + auto begin = counting_iterator(0); + auto end = counting_iterator(nk); +#endif + + auto nestedinit_lam = [=](Index_type i, Index_type j, Index_type k) { + NESTED_INIT_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type idx) { + const auto k = idx / (nj*ni); + const auto ij = idx % (nj*ni); + const auto j = ij / ni; + const auto i = ij % ni; +#else + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type k) { + for (Index_type j = 0; j < nj; ++j ) + for (Index_type i = 0; i < ni; ++i ) +#endif + { + NESTED_INIT_BODY; + //std::cout << i << "," << j << "," << k << ";" << idx << " PAR\n"; + } + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type idx) { + const auto k = idx / (nj*ni); + const auto ij = idx % (nj*ni); + const auto j = ij / ni; + const auto i = ij % ni; +#else + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type k) { + for (Index_type j = 0; j < nj; ++j ) + for (Index_type i = 0; i < ni; ++i ) +#endif + { + nestedinit_lam(i, j, k); + } + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<2, RAJA::loop_exec, // k + RAJA::statement::For<1, RAJA::loop_exec, // j + RAJA::statement::For<0, RAJA::loop_exec,// i + RAJA::statement::Lambda<0> + > + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment(0, ni), + RAJA::RangeSegment(0, nj), + RAJA::RangeSegment(0, nk)), + nestedinit_lam + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n NESTED_INIT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/PI_ATOMIC-StdPar.cpp b/src/basic/PI_ATOMIC-StdPar.cpp new file mode 100644 index 000000000..27b7557bf --- /dev/null +++ b/src/basic/PI_ATOMIC-StdPar.cpp @@ -0,0 +1,121 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "PI_ATOMIC.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(__NVCOMPILER_CUDA__) || defined(_NVHPC_STDPAR_CUDA) +#include +typedef cuda::std::atomic myAtomic; +#else +#include +typedef std::atomic myAtomic; +#endif + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + PI_ATOMIC_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + myAtomic a_pi{m_pi_init}; + std::for_each( std::execution::par_unseq, + begin, end, + [=,&a_pi](Index_type i) { + double x = (double(i) + 0.5) * dx; + a_pi = a_pi + dx / (1.0 + x * x); + }); + *pi = a_pi * 4.0; + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto piatomic_base_lam = [=](Index_type i, myAtomic &a_pi) { + double x = (double(i) + 0.5) * dx; + a_pi = a_pi + dx / (1.0 + x * x); + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + myAtomic a_pi{m_pi_init}; + for (Index_type i = ibegin; i < iend; ++i ) { + piatomic_base_lam(i,a_pi); + } + *pi = a_pi * 4.0; + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + *pi = m_pi_init; + RAJA::forall( RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + double x = (double(i) + 0.5) * dx; + RAJA::atomicAdd(pi, dx / (1.0 + x * x)); + }); + *pi *= 4.0; + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/PI_REDUCE-StdPar.cpp b/src/basic/PI_REDUCE-StdPar.cpp new file mode 100644 index 000000000..b2c075278 --- /dev/null +++ b/src/basic/PI_REDUCE-StdPar.cpp @@ -0,0 +1,118 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "PI_REDUCE.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void PI_REDUCE::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + PI_REDUCE_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type pi = m_pi_init; + pi += std::transform_reduce( std::execution::par_unseq, + begin, end, + Real_type(0), std::plus(), + [=](Index_type i) { + Real_type x = (Real_type(i) + 0.5) * dx; + return dx / (1.0 + x * x); + }); + m_pi = 4.0 * pi; + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto pireduce_base_lam = [=](Index_type i) -> Real_type { + Real_type x = (Real_type(i) + 0.5) * dx; + return dx / (1.0 + x * x); + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type pi = m_pi_init; + + pi += std::transform_reduce( std::execution::par_unseq, + begin, end, + Real_type(0), std::plus(), pireduce_base_lam); + + m_pi = 4.0 * pi; + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::ReduceSum pi(m_pi_init); + + RAJA::forall( RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + PI_REDUCE_BODY; + }); + + m_pi = 4.0 * pi.get(); + + } + stopTimer(); + + break; + } +#endif + + default : { + std::cout << "\n PI_REDUCE : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/REDUCE3_INT-StdPar.cpp b/src/basic/REDUCE3_INT-StdPar.cpp new file mode 100644 index 000000000..b40129c17 --- /dev/null +++ b/src/basic/REDUCE3_INT-StdPar.cpp @@ -0,0 +1,141 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "REDUCE3_INT.hpp" + +#include "RAJA/RAJA.hpp" + +#include +#include "common/StdParUtils.hpp" +#include +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void REDUCE3_INT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + REDUCE3_INT_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + typedef std::array Reduce_type; + Reduce_type result = + std::transform_reduce( std::execution::par_unseq, + begin, end, + Reduce_type{m_vsum_init,m_vmin_init,m_vmax_init}, + [=](Reduce_type a, Reduce_type b) -> Reduce_type { + auto plus = a[0] + b[0]; + auto min = std::min(a[1],b[1]); + auto max = std::max(a[2],b[2]); + Reduce_type red{ plus, min, max }; + return red; + }, + [=](Index_type i) -> std::array{ + Reduce_type val{ vec[i], vec[i], vec[i] }; + return val; + + } + ); + + m_vsum += result[0]; + m_vmin = RAJA_MIN(m_vmin, result[1]); + m_vmax = RAJA_MAX(m_vmax, result[2]); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto init3_base_lam = [=](Index_type i) -> Int_type { + return vec[i]; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Int_type vsum = m_vsum_init; + Int_type vmin = m_vmin_init; + Int_type vmax = m_vmax_init; + + for (Index_type i = ibegin; i < iend; ++i ) { + vsum += init3_base_lam(i); + vmin = RAJA_MIN(vmin, init3_base_lam(i)); + vmax = RAJA_MAX(vmax, init3_base_lam(i)); + } + + m_vsum += vsum; + m_vmin = RAJA_MIN(m_vmin, vmin); + m_vmax = RAJA_MAX(m_vmax, vmax); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::ReduceSum vsum(m_vsum_init); + RAJA::ReduceMin vmin(m_vmin_init); + RAJA::ReduceMax vmax(m_vmax_init); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { + REDUCE3_INT_BODY_RAJA; + }); + + m_vsum += static_cast(vsum.get()); + m_vmin = RAJA_MIN(m_vmin, static_cast(vmin.get())); + m_vmax = RAJA_MAX(m_vmax, static_cast(vmax.get())); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n REDUCE3_INT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/TRAP_INT-StdPar.cpp b/src/basic/TRAP_INT-StdPar.cpp new file mode 100644 index 000000000..359ed363a --- /dev/null +++ b/src/basic/TRAP_INT-StdPar.cpp @@ -0,0 +1,132 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "TRAP_INT.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include +#include + +#include + +namespace rajaperf +{ +namespace basic +{ + +// +// Function used in TRAP_INT loop. +// +RAJA_INLINE +Real_type trap_int_func(Real_type x, + Real_type y, + Real_type xp, + Real_type yp) +{ + Real_type denom = (x - xp)*(x - xp) + (y - yp)*(y - yp); + denom = 1.0/sqrt(denom); + return denom; +} + +void TRAP_INT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + TRAP_INT_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type sumx = m_sumx_init; + + sumx += std::transform_reduce( std::execution::par_unseq, + begin, end, + Real_type(0), std::plus(), + [=](Index_type i) { + Real_type x = x0 + i*h; + return trap_int_func(x, y, xp, yp); + }); + m_sumx += sumx * h; + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto trapint_base_lam = [=](Index_type i) -> Real_type { + Real_type x = x0 + i*h; + return trap_int_func(x, y, xp, yp); + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type sumx = m_sumx_init; + + sumx += std::transform_reduce( std::execution::par_unseq, + begin, end, + Real_type(0), std::plus(), trapint_base_lam); + + m_sumx += sumx * h; + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::ReduceSum sumx(m_sumx_init); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { + TRAP_INT_BODY; + }); + + m_sumx += static_cast(sumx.get()) * h; + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n TRAP_INT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/lcals/DIFF_PREDICT-StdPar.cpp b/src/lcals/DIFF_PREDICT-StdPar.cpp new file mode 100644 index 000000000..b86723185 --- /dev/null +++ b/src/lcals/DIFF_PREDICT-StdPar.cpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DIFF_PREDICT.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void DIFF_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + DIFF_PREDICT_DATA_SETUP; + + auto diffpredict_lam = [=](Index_type i) { + DIFF_PREDICT_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + DIFF_PREDICT_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + diffpredict_lam(i); + }); + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), diffpredict_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n DIFF_PREDICT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals/EOS-StdPar.cpp b/src/lcals/EOS-StdPar.cpp new file mode 100644 index 000000000..a3aa279f2 --- /dev/null +++ b/src/lcals/EOS-StdPar.cpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "EOS.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void EOS::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + EOS_DATA_SETUP; + + auto eos_lam = [=](Index_type i) { + EOS_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + EOS_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + eos_lam(i); + }); + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), eos_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n EOS : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals/FIRST_DIFF-StdPar.cpp b/src/lcals/FIRST_DIFF-StdPar.cpp new file mode 100644 index 000000000..1a2d15e6c --- /dev/null +++ b/src/lcals/FIRST_DIFF-StdPar.cpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "FIRST_DIFF.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void FIRST_DIFF::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + FIRST_DIFF_DATA_SETUP; + + auto firstdiff_lam = [=](Index_type i) { + FIRST_DIFF_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + FIRST_DIFF_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + firstdiff_lam(i); + }); + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), firstdiff_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n FIRST_DIFF : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals/FIRST_MIN-StdPar.cpp b/src/lcals/FIRST_MIN-StdPar.cpp new file mode 100644 index 000000000..4a019b5b3 --- /dev/null +++ b/src/lcals/FIRST_MIN-StdPar.cpp @@ -0,0 +1,114 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "FIRST_MIN.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void FIRST_MIN::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + FIRST_MIN_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + auto result = + std::min_element( std::execution::par_unseq, + &x[ibegin], &x[iend]); + auto loc = std::distance(&x[ibegin], result); + + m_minloc = RAJA_MAX(m_minloc, loc); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto firstmin_base_lam = [=](Index_type i) -> Real_type { + return x[i]; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + FIRST_MIN_MINLOC_INIT; + + for (Index_type i = ibegin; i < iend; ++i ) { + if ( firstmin_base_lam(i) < mymin.val ) { \ + mymin.val = x[i]; \ + mymin.loc = i; \ + } + } + + m_minloc = RAJA_MAX(m_minloc, mymin.loc); + + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::ReduceMinLoc loc( + m_xmin_init, m_initloc); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { + FIRST_MIN_BODY_RAJA; + }); + + m_minloc = RAJA_MAX(m_minloc, loc.getLoc()); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n FIRST_MIN : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals/FIRST_SUM-StdPar.cpp b/src/lcals/FIRST_SUM-StdPar.cpp new file mode 100644 index 000000000..1f47f9412 --- /dev/null +++ b/src/lcals/FIRST_SUM-StdPar.cpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "FIRST_SUM.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void FIRST_SUM::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 1; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + FIRST_SUM_DATA_SETUP; + + auto firstsum_lam = [=](Index_type i) { + FIRST_SUM_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + FIRST_SUM_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + firstsum_lam(i); + }); + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), firstsum_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n FIRST_SUM : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals/GEN_LIN_RECUR-StdPar.cpp b/src/lcals/GEN_LIN_RECUR-StdPar.cpp new file mode 100644 index 000000000..f1cd69a0d --- /dev/null +++ b/src/lcals/GEN_LIN_RECUR-StdPar.cpp @@ -0,0 +1,126 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "GEN_LIN_RECUR.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void GEN_LIN_RECUR::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + GEN_LIN_RECUR_DATA_SETUP; + + auto beginK = counting_iterator(0); + auto endK = counting_iterator(N); + auto beginI = counting_iterator(1); + auto endI = counting_iterator(N+1); + + auto genlinrecur_lam1 = [=](Index_type k) { + GEN_LIN_RECUR_BODY1; + }; + auto genlinrecur_lam2 = [=](Index_type i) { + GEN_LIN_RECUR_BODY2; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + //for (Index_type k = 0; k < N; ++k ) { + std::for_each( std::execution::par_unseq, + beginK, endK, + [=](Index_type k) { + GEN_LIN_RECUR_BODY1; + }); + + //for (Index_type i = 1; i < N+1; ++i ) { + std::for_each( std::execution::par_unseq, + beginI, endI, + [=](Index_type i) { + GEN_LIN_RECUR_BODY2; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + //for (Index_type k = 0; k < N; ++k ) { + std::for_each( std::execution::par_unseq, + beginK, endK, + [=](Index_type k) { + genlinrecur_lam1(k); + }); + + //for (Index_type i = 1; i < N+1; ++i ) { + std::for_each( std::execution::par_unseq, + beginI, endI, + [=](Index_type i) { + genlinrecur_lam2(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(0, N), genlinrecur_lam1); + + RAJA::forall( + RAJA::RangeSegment(1, N+1), genlinrecur_lam2); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n GEN_LIN_RECUR : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals/HYDRO_1D-StdPar.cpp b/src/lcals/HYDRO_1D-StdPar.cpp new file mode 100644 index 000000000..45601b347 --- /dev/null +++ b/src/lcals/HYDRO_1D-StdPar.cpp @@ -0,0 +1,104 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "HYDRO_1D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void HYDRO_1D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + HYDRO_1D_DATA_SETUP; + + auto hydro1d_lam = [=](Index_type i) { + HYDRO_1D_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + HYDRO_1D_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + hydro1d_lam(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), hydro1d_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n HYDRO_1D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals/HYDRO_2D-StdPar.cpp b/src/lcals/HYDRO_2D-StdPar.cpp new file mode 100644 index 000000000..1650dffd5 --- /dev/null +++ b/src/lcals/HYDRO_2D-StdPar.cpp @@ -0,0 +1,195 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "HYDRO_2D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void HYDRO_2D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type kbeg = 1; + const Index_type kend = m_kn - 1; + const Index_type jbeg = 1; + const Index_type jend = m_jn - 1; + + auto beginK = counting_iterator(kbeg); + auto endK = counting_iterator(kend); + auto beginJ = counting_iterator(jbeg); + auto endJ = counting_iterator(jend); + + HYDRO_2D_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par, + beginK, endK, + [=](Index_type k) { + std::for_each( std::execution::unseq, + beginJ, endJ, + [=](Index_type j) { + HYDRO_2D_BODY1; + }); + }); + + std::for_each( std::execution::par, + beginK, endK, + [=](Index_type k) { + std::for_each( std::execution::unseq, + beginJ, endJ, + [=](Index_type j) { + HYDRO_2D_BODY2; + }); + }); + + std::for_each( std::execution::par, + beginK, endK, + [=](Index_type k) { + std::for_each( std::execution::unseq, + beginJ, endJ, + [=](Index_type j) { + HYDRO_2D_BODY3; + }); + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto hydro2d_base_lam1 = [=] (Index_type k, Index_type j) { + HYDRO_2D_BODY1; + }; + auto hydro2d_base_lam2 = [=] (Index_type k, Index_type j) { + HYDRO_2D_BODY2; + }; + auto hydro2d_base_lam3 = [=] (Index_type k, Index_type j) { + HYDRO_2D_BODY3; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par, + beginK, endK, + [=](Index_type k) { + std::for_each( std::execution::unseq, + beginJ, endJ, + [=](Index_type j) { + hydro2d_base_lam1(k, j); + }); + }); + + std::for_each( std::execution::par, + beginK, endK, + [=](Index_type k) { + std::for_each( std::execution::unseq, + beginJ, endJ, + [=](Index_type j) { + hydro2d_base_lam2(k, j); + }); + }); + + std::for_each( std::execution::par, + beginK, endK, + [=](Index_type k) { + std::for_each( std::execution::unseq, + beginJ, endJ, + [=](Index_type j) { + hydro2d_base_lam3(k, j); + }); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + HYDRO_2D_VIEWS_RAJA; + + auto hydro2d_lam1 = [=] (Index_type k, Index_type j) { + HYDRO_2D_BODY1_RAJA; + }; + auto hydro2d_lam2 = [=] (Index_type k, Index_type j) { + HYDRO_2D_BODY2_RAJA; + }; + auto hydro2d_lam3 = [=] (Index_type k, Index_type j) { + HYDRO_2D_BODY3_RAJA; + }; + + using EXECPOL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, // k + RAJA::statement::For<1, RAJA::loop_exec, // j + RAJA::statement::Lambda<0> + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel( + RAJA::make_tuple( RAJA::RangeSegment(kbeg, kend), + RAJA::RangeSegment(jbeg, jend)), + hydro2d_lam1); + + RAJA::kernel( + RAJA::make_tuple( RAJA::RangeSegment(kbeg, kend), + RAJA::RangeSegment(jbeg, jend)), + hydro2d_lam2); + + RAJA::kernel( + RAJA::make_tuple( RAJA::RangeSegment(kbeg, kend), + RAJA::RangeSegment(jbeg, jend)), + hydro2d_lam3); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n HYDRO_2D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals/INT_PREDICT-StdPar.cpp b/src/lcals/INT_PREDICT-StdPar.cpp new file mode 100644 index 000000000..d8139dfbe --- /dev/null +++ b/src/lcals/INT_PREDICT-StdPar.cpp @@ -0,0 +1,104 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INT_PREDICT.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void INT_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + INT_PREDICT_DATA_SETUP; + + auto intpredict_lam = [=](Index_type i) { + INT_PREDICT_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + INT_PREDICT_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + intpredict_lam(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), intpredict_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n INT_PREDICT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals/PLANCKIAN-StdPar.cpp b/src/lcals/PLANCKIAN-StdPar.cpp new file mode 100644 index 000000000..3d937bb22 --- /dev/null +++ b/src/lcals/PLANCKIAN-StdPar.cpp @@ -0,0 +1,105 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "PLANCKIAN.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void PLANCKIAN::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + PLANCKIAN_DATA_SETUP; + + auto planckian_lam = [=](Index_type i) { + PLANCKIAN_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + PLANCKIAN_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + planckian_lam(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), planckian_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n PLANCKIAN : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals/TRIDIAG_ELIM-StdPar.cpp b/src/lcals/TRIDIAG_ELIM-StdPar.cpp new file mode 100644 index 000000000..ff1986bc1 --- /dev/null +++ b/src/lcals/TRIDIAG_ELIM-StdPar.cpp @@ -0,0 +1,104 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "TRIDIAG_ELIM.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void TRIDIAG_ELIM::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 1; + const Index_type iend = m_N; + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + TRIDIAG_ELIM_DATA_SETUP; + + auto tridiag_elim_lam = [=](Index_type i) { + TRIDIAG_ELIM_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + TRIDIAG_ELIM_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + tridiag_elim_lam(i); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), tridiag_elim_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n TRIDIAG_ELIM : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_2MM-StdPar.cpp b/src/polybench/POLYBENCH_2MM-StdPar.cpp new file mode 100644 index 000000000..feb441614 --- /dev/null +++ b/src/polybench/POLYBENCH_2MM-StdPar.cpp @@ -0,0 +1,255 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_2MM.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +#define USE_STDPAR_COLLAPSE 1 + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_2MM::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_2MM_DATA_SETUP; + +#ifdef USE_STDPAR_COLLAPSE + counting_iterator beginIJ(0); + counting_iterator endIJ(ni*nj); + counting_iterator beginIL(0); + counting_iterator endIL(ni*nl); +#else + counting_iterator beginI(0); + counting_iterator endI(ni); + counting_iterator beginL(0); + counting_iterator endL(nl); +#endif + counting_iterator beginJ(0); + counting_iterator endJ(nj); + counting_iterator beginK(0); + counting_iterator endK(nk); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginIJ, endIJ, [=](Index_type ij) { + const auto i = ij / nj; + const auto j = ij % nj; +#else + std::for_each( std::execution::par_unseq, + beginI, endI, [=](Index_type i) { + std::for_each(beginJ, endJ, [=](Index_type j) { +#endif + POLYBENCH_2MM_BODY1; + std::for_each(beginK, endK, [=,&dot](Index_type k) { + POLYBENCH_2MM_BODY2; + }); + POLYBENCH_2MM_BODY3; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginIL, endIL, [=](Index_type il) { + const auto i = il / nl; + const auto l = il % nl; +#else + std::for_each( std::execution::par_unseq, + beginI, endI, [=](Index_type i) { + std::for_each(beginL, endL, [=](Index_type l) { +#endif + POLYBENCH_2MM_BODY4; + std::for_each(beginJ, endJ, [=,&dot](Index_type j) { + POLYBENCH_2MM_BODY5; + }); + POLYBENCH_2MM_BODY6; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_2mm_base_lam2 = [=](Index_type i, Index_type j, + Index_type k, Real_type &dot) { + POLYBENCH_2MM_BODY2; + }; + auto poly_2mm_base_lam3 = [=](Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_2MM_BODY3; + }; + auto poly_2mm_base_lam5 = [=](Index_type i, Index_type l, + Index_type j, Real_type &dot) { + POLYBENCH_2MM_BODY5; + }; + auto poly_2mm_base_lam6 = [=](Index_type i, Index_type l, + Real_type &dot) { + POLYBENCH_2MM_BODY6; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginIJ, endIJ, [=](Index_type ij) { + const auto i = ij / nj; + const auto j = ij % nj; +#else + std::for_each( std::execution::par_unseq, + beginI, endI, [=](Index_type i) { + std::for_each(beginJ, endJ, [=](Index_type j) { +#endif + POLYBENCH_2MM_BODY1; + std::for_each(beginK, endK, [=,&dot](Index_type k) { + poly_2mm_base_lam2(i, j, k, dot); + }); + poly_2mm_base_lam3(i, j, dot); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginIL, endIL, [=](Index_type il) { + const auto i = il / nl; + const auto l = il % nl; +#else + std::for_each( std::execution::par_unseq, + beginI, endI, [=](Index_type i) { + std::for_each(beginL, endL, [=](Index_type l) { +#endif + POLYBENCH_2MM_BODY4; + std::for_each(beginJ, endJ, [=,&dot](Index_type j) { + poly_2mm_base_lam5(i, l, j, dot); + }); + poly_2mm_base_lam6(i, l, dot); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_2MM_VIEWS_RAJA; + + auto poly_2mm_lam1 = [=](Real_type &dot) { + POLYBENCH_2MM_BODY1_RAJA; + }; + auto poly_2mm_lam2 = [=](Index_type i, Index_type j, Index_type k, + Real_type &dot) { + POLYBENCH_2MM_BODY2_RAJA; + }; + auto poly_2mm_lam3 = [=](Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_2MM_BODY3_RAJA; + }; + auto poly_2mm_lam4 = [=](Real_type &dot) { + POLYBENCH_2MM_BODY4_RAJA; + }; + auto poly_2mm_lam5 = [=](Index_type i, Index_type l, Index_type j, + Real_type &dot) { + POLYBENCH_2MM_BODY5_RAJA; + }; + auto poly_2mm_lam6 = [=](Index_type i, Index_type l, + Real_type &dot) { + POLYBENCH_2MM_BODY6_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Params<0>>, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1,2>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0,1>, RAJA::Params<0>> + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, ni}, + RAJA::RangeSegment{0, nj}, + RAJA::RangeSegment{0, nk}), + RAJA::tuple{0.0}, + + poly_2mm_lam1, + poly_2mm_lam2, + poly_2mm_lam3 + ); + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, ni}, + RAJA::RangeSegment{0, nl}, + RAJA::RangeSegment{0, nj}), + RAJA::tuple{0.0}, + + poly_2mm_lam4, + poly_2mm_lam5, + poly_2mm_lam6 + + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_2MM : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_3MM-StdPar.cpp b/src/polybench/POLYBENCH_3MM-StdPar.cpp new file mode 100644 index 000000000..189caa032 --- /dev/null +++ b/src/polybench/POLYBENCH_3MM-StdPar.cpp @@ -0,0 +1,331 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_3MM.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +#define USE_STDPAR_COLLAPSE 1 + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_3MM_DATA_SETUP; + +#ifdef USE_STDPAR_COLLAPSE + counting_iterator beginIJ(0); + counting_iterator endIJ(ni*nj); + counting_iterator beginIL(0); + counting_iterator endIL(ni*nl); + counting_iterator beginJL(0); + counting_iterator endJL(nj*nl); +#else + counting_iterator beginI(0); + counting_iterator endI(ni); + counting_iterator beginL(0); + counting_iterator endL(nl); +#endif + counting_iterator beginJ(0); + counting_iterator endJ(nj); + counting_iterator beginK(0); + counting_iterator endK(nk); + counting_iterator beginM(0); + counting_iterator endM(nm); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginIJ, endIJ, [=](Index_type ij) { + const auto i = ij / nj; + const auto j = ij % nj; +#else + std::for_each( std::execution::par_unseq, + beginI, endI, [=](Index_type i) { + std::for_each(beginJ, endJ, [=](Index_type j) { +#endif + POLYBENCH_3MM_BODY1; + std::for_each(beginK, endK, [=,&dot](Index_type k) { + POLYBENCH_3MM_BODY2; + }); + POLYBENCH_3MM_BODY3; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginJL, endJL, [=](Index_type jl) { + const auto j = jl / nl; + const auto l = jl % nl; +#else + std::for_each( std::execution::par_unseq, + beginJ, endJ, [=](Index_type j) { + std::for_each(beginL, endL, [=](Index_type l) { +#endif + POLYBENCH_3MM_BODY4; + std::for_each(beginM, endM, [=,&dot](Index_type m) { + POLYBENCH_3MM_BODY5; + }); + POLYBENCH_3MM_BODY6; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginIL, endIL, [=](Index_type il) { + const auto i = il / nl; + const auto l = il % nl; +#else + std::for_each( std::execution::par_unseq, + beginI, endI, [=](Index_type i) { + std::for_each(beginL, endL, [=](Index_type l) { +#endif + POLYBENCH_3MM_BODY7; + std::for_each(beginJ, endJ, [=,&dot](Index_type j) { + POLYBENCH_3MM_BODY8; + }); + POLYBENCH_3MM_BODY9; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_3mm_base_lam2 = [=] (Index_type i, Index_type j, Index_type k, + Real_type &dot) { + POLYBENCH_3MM_BODY2; + }; + auto poly_3mm_base_lam3 = [=] (Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_3MM_BODY3; + }; + auto poly_3mm_base_lam5 = [=] (Index_type j, Index_type l, Index_type m, + Real_type &dot) { + POLYBENCH_3MM_BODY5; + }; + auto poly_3mm_base_lam6 = [=] (Index_type j, Index_type l, + Real_type &dot) { + POLYBENCH_3MM_BODY6; + }; + auto poly_3mm_base_lam8 = [=] (Index_type i, Index_type l, Index_type j, + Real_type &dot) { + POLYBENCH_3MM_BODY8; + }; + auto poly_3mm_base_lam9 = [=] (Index_type i, Index_type l, + Real_type &dot) { + POLYBENCH_3MM_BODY9; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginIJ, endIJ, [=](Index_type ij) { + const auto i = ij / nj; + const auto j = ij % nj; +#else + std::for_each( std::execution::par_unseq, + beginI, endI, [=](Index_type i) { + std::for_each(beginJ, endJ, [=](Index_type j) { +#endif + POLYBENCH_3MM_BODY1; + std::for_each(beginK, endK, [=,&dot](Index_type k) { + poly_3mm_base_lam2(i, j, k, dot); + }); + poly_3mm_base_lam3(i, j, dot); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginJL, endJL, [=](Index_type jl) { + const auto j = jl / nl; + const auto l = jl % nl; +#else + std::for_each( std::execution::par_unseq, + beginJ, endJ, [=](Index_type j) { + std::for_each(beginL, endL, [=](Index_type l) { +#endif + POLYBENCH_3MM_BODY4; + std::for_each(beginM, endM, [=,&dot](Index_type m) { + poly_3mm_base_lam5(j, l, m, dot); + }); + poly_3mm_base_lam6(j, l, dot); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginIL, endIL, [=](Index_type il) { + const auto i = il / nl; + const auto l = il % nl; +#else + std::for_each( std::execution::par_unseq, + beginI, endI, [=](Index_type i) { + std::for_each(beginL, endL, [=](Index_type l) { +#endif + POLYBENCH_3MM_BODY7; + std::for_each(beginJ, endJ, [=,&dot](Index_type j) { + poly_3mm_base_lam8(i, l, j, dot); + }); + poly_3mm_base_lam9(i, l, dot); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_3MM_VIEWS_RAJA; + + auto poly_3mm_lam1 = [=] (Real_type &dot) { + POLYBENCH_3MM_BODY1_RAJA; + }; + auto poly_3mm_lam2 = [=] (Index_type i, Index_type j, Index_type k, + Real_type &dot) { + POLYBENCH_3MM_BODY2_RAJA; + }; + auto poly_3mm_lam3 = [=] (Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_3MM_BODY3_RAJA; + }; + auto poly_3mm_lam4 = [=] (Real_type &dot) { + POLYBENCH_3MM_BODY4_RAJA; + }; + auto poly_3mm_lam5 = [=] (Index_type j, Index_type l, Index_type m, + Real_type &dot) { + POLYBENCH_3MM_BODY5_RAJA; + }; + auto poly_3mm_lam6 = [=] (Index_type j, Index_type l, + Real_type &dot) { + POLYBENCH_3MM_BODY6_RAJA; + }; + auto poly_3mm_lam7 = [=] (Real_type &dot) { + POLYBENCH_3MM_BODY7_RAJA; + }; + auto poly_3mm_lam8 = [=] (Index_type i, Index_type l, Index_type j, + Real_type &dot) { + POLYBENCH_3MM_BODY8_RAJA; + }; + auto poly_3mm_lam9 = [=] (Index_type i, Index_type l, + Real_type &dot) { + POLYBENCH_3MM_BODY9_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Params<0>>, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1,2>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0,1>, RAJA::Params<0>> + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, ni}, + RAJA::RangeSegment{0, nj}, + RAJA::RangeSegment{0, nk}), + RAJA::tuple{0.0}, + + poly_3mm_lam1, + poly_3mm_lam2, + poly_3mm_lam3 + + ); + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, nj}, + RAJA::RangeSegment{0, nl}, + RAJA::RangeSegment{0, nm}), + RAJA::tuple{0.0}, + + poly_3mm_lam4, + poly_3mm_lam5, + poly_3mm_lam6 + + ); + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, ni}, + RAJA::RangeSegment{0, nl}, + RAJA::RangeSegment{0, nj}), + RAJA::tuple{0.0}, + + poly_3mm_lam7, + poly_3mm_lam8, + poly_3mm_lam9 + + ); + + } // end run_reps + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_3MM : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_ADI-StdPar.cpp b/src/polybench/POLYBENCH_ADI-StdPar.cpp new file mode 100644 index 000000000..6d2a99650 --- /dev/null +++ b/src/polybench/POLYBENCH_ADI-StdPar.cpp @@ -0,0 +1,236 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_ADI.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_ADI::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_ADI_DATA_SETUP; + + counting_iterator begin(1); + counting_iterator end(n-1); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 1; t <= tsteps; ++t) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + POLYBENCH_ADI_BODY2; + for (Index_type j = 1; j < n-1; ++j) { + POLYBENCH_ADI_BODY3; + } + POLYBENCH_ADI_BODY4; + for (Index_type k = n-2; k >= 1; --k) { + POLYBENCH_ADI_BODY5; + } + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + POLYBENCH_ADI_BODY6; + for (Index_type j = 1; j < n-1; ++j) { + POLYBENCH_ADI_BODY7; + } + POLYBENCH_ADI_BODY8; + for (Index_type k = n-2; k >= 1; --k) { + POLYBENCH_ADI_BODY9; + } + }); + + } // tstep loop + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_adi_base_lam2 = [=](Index_type i) { + POLYBENCH_ADI_BODY2; + }; + auto poly_adi_base_lam3 = [=](Index_type i, Index_type j) { + POLYBENCH_ADI_BODY3; + }; + auto poly_adi_base_lam4 = [=](Index_type i) { + POLYBENCH_ADI_BODY4; + }; + auto poly_adi_base_lam5 = [=](Index_type i, Index_type k) { + POLYBENCH_ADI_BODY5; + }; + auto poly_adi_base_lam6 = [=](Index_type i) { + POLYBENCH_ADI_BODY6; + }; + auto poly_adi_base_lam7 = [=](Index_type i, Index_type j) { + POLYBENCH_ADI_BODY7; + }; + auto poly_adi_base_lam8 = [=](Index_type i) { + POLYBENCH_ADI_BODY8; + }; + auto poly_adi_base_lam9 = [=](Index_type i, Index_type k) { + POLYBENCH_ADI_BODY9; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 1; t <= tsteps; ++t) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + poly_adi_base_lam2(i); + for (Index_type j = 1; j < n-1; ++j) { + poly_adi_base_lam3(i, j); + } + poly_adi_base_lam4(i); + for (Index_type k = n-2; k >= 1; --k) { + poly_adi_base_lam5(i, k); + } + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + poly_adi_base_lam6(i); + for (Index_type j = 1; j < n-1; ++j) { + poly_adi_base_lam7(i, j); + } + poly_adi_base_lam8(i); + for (Index_type k = n-2; k >= 1; --k) { + poly_adi_base_lam9(i, k); + } + }); + + } // tstep loop + + } // run_reps + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_ADI_VIEWS_RAJA; + + auto poly_adi_lam2 = [=](Index_type i) { + POLYBENCH_ADI_BODY2_RAJA; + }; + auto poly_adi_lam3 = [=](Index_type i, Index_type j) { + POLYBENCH_ADI_BODY3_RAJA; + }; + auto poly_adi_lam4 = [=](Index_type i) { + POLYBENCH_ADI_BODY4_RAJA; + }; + auto poly_adi_lam5 = [=](Index_type i, Index_type k) { + POLYBENCH_ADI_BODY5_RAJA; + }; + auto poly_adi_lam6 = [=](Index_type i) { + POLYBENCH_ADI_BODY6_RAJA; + }; + auto poly_adi_lam7 = [=](Index_type i, Index_type j) { + POLYBENCH_ADI_BODY7_RAJA; + }; + auto poly_adi_lam8 = [=](Index_type i) { + POLYBENCH_ADI_BODY8_RAJA; + }; + auto poly_adi_lam9 = [=](Index_type i, Index_type k) { + POLYBENCH_ADI_BODY9_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Segs<0>>, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0>>, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<3, RAJA::Segs<0,2>> + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 1; t <= tsteps; ++t) { + + RAJA::kernel( + RAJA::make_tuple(RAJA::RangeSegment{1, n-1}, + RAJA::RangeSegment{1, n-1}, + RAJA::RangeStrideSegment{n-2, 0, -1}), + + poly_adi_lam2, + poly_adi_lam3, + poly_adi_lam4, + poly_adi_lam5 + + ); + + RAJA::kernel( + RAJA::make_tuple(RAJA::RangeSegment{1, n-1}, + RAJA::RangeSegment{1, n-1}, + RAJA::RangeStrideSegment{n-2, 0, -1}), + + poly_adi_lam6, + poly_adi_lam7, + poly_adi_lam8, + poly_adi_lam9 + + ); + + } // tstep loop + + } // run_reps + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\nPOLYBENCH_ADI Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_ATAX-StdPar.cpp b/src/polybench/POLYBENCH_ATAX-StdPar.cpp new file mode 100644 index 000000000..1c3d1a3a9 --- /dev/null +++ b/src/polybench/POLYBENCH_ATAX-StdPar.cpp @@ -0,0 +1,213 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_ATAX.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_ATAX::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps= getRunReps(); + + POLYBENCH_ATAX_DATA_SETUP; + + counting_iterator begin(0); + counting_iterator end(N); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + POLYBENCH_ATAX_BODY1; + std::for_each( std::execution::unseq, + begin, end, + [=,&dot](Index_type j) { + POLYBENCH_ATAX_BODY2; + }); + POLYBENCH_ATAX_BODY3; + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type j) { + POLYBENCH_ATAX_BODY4; + std::for_each( std::execution::unseq, + begin, end, + [=,&dot](Index_type i) { + POLYBENCH_ATAX_BODY5; + }); + POLYBENCH_ATAX_BODY6; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_atax_base_lam2 = [=] (Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_ATAX_BODY2; + }; + auto poly_atax_base_lam3 = [=] (Index_type i, + Real_type &dot) { + POLYBENCH_ATAX_BODY3; + }; + auto poly_atax_base_lam5 = [=] (Index_type i, Index_type j , + Real_type &dot) { + POLYBENCH_ATAX_BODY5; + }; + auto poly_atax_base_lam6 = [=] (Index_type j, + Real_type &dot) { + POLYBENCH_ATAX_BODY6; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + POLYBENCH_ATAX_BODY1; + std::for_each( std::execution::unseq, + begin, end, + [=,&dot](Index_type j) { + poly_atax_base_lam2(i, j, dot); + }); + poly_atax_base_lam3(i, dot); + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type j) { + POLYBENCH_ATAX_BODY4; + std::for_each( std::execution::unseq, + begin, end, + [=,&dot](Index_type i) { + poly_atax_base_lam5(i, j, dot); + }); + poly_atax_base_lam6(j, dot); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_ATAX_VIEWS_RAJA; + + auto poly_atax_lam1 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_ATAX_BODY1_RAJA; + }; + auto poly_atax_lam2 = [=] (Index_type i, Index_type j, Real_type &dot) { + POLYBENCH_ATAX_BODY2_RAJA; + }; + auto poly_atax_lam3 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_ATAX_BODY3_RAJA; + }; + auto poly_atax_lam4 = [=] (Index_type j, Real_type &dot) { + POLYBENCH_ATAX_BODY4_RAJA; + }; + auto poly_atax_lam5 = [=] (Index_type i, Index_type j , Real_type &dot) { + POLYBENCH_ATAX_BODY5_RAJA; + }; + auto poly_atax_lam6 = [=] (Index_type j, Real_type &dot) { + POLYBENCH_ATAX_BODY6_RAJA; + }; + + using EXEC_POL1 = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Segs<0>, RAJA::Params<0>>, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> + > + >; + + using EXEC_POL2 = + RAJA::KernelPolicy< + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Segs<1>, RAJA::Params<0>>, + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<1>, RAJA::Params<0>> + > + >; + + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, N}, + RAJA::RangeSegment{0, N}), + RAJA::tuple{0.0}, + + poly_atax_lam1, + poly_atax_lam2, + poly_atax_lam3 + + ); + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, N}, + RAJA::RangeSegment{0, N}), + RAJA::tuple{0.0}, + + poly_atax_lam4, + poly_atax_lam5, + poly_atax_lam6 + + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_ATAX : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp new file mode 100644 index 000000000..5bd7435dd --- /dev/null +++ b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp @@ -0,0 +1,226 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_FDTD_2D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_FDTD_2D_DATA_SETUP; + + counting_iterator beginX(0); + counting_iterator endX(nx); + counting_iterator beginY(0); + counting_iterator endY(ny); + counting_iterator begin1X(1); + counting_iterator end1X(nx); + counting_iterator beginXm1(0); + counting_iterator endXm1(nx-1); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (t = 0; t < tsteps; ++t) { + + std::for_each( std::execution::par_unseq, + beginY, endY, + [=](Index_type j) { + POLYBENCH_FDTD_2D_BODY1; + }); + std::for_each( std::execution::par_unseq, + begin1X, end1X, + [=](Index_type i) { + for (Index_type j = 0; j < ny; j++) { + POLYBENCH_FDTD_2D_BODY2; + } + }); + std::for_each( std::execution::par_unseq, + beginX, endX, + [=](Index_type i) { + for (Index_type j = 1; j < ny; j++) { + POLYBENCH_FDTD_2D_BODY3; + } + }); + std::for_each( std::execution::par_unseq, + beginXm1, endXm1, + [=](Index_type i) { + for (Index_type j = 0; j < ny - 1; j++) { + POLYBENCH_FDTD_2D_BODY4; + } + }); + + } // tstep loop + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + // + // Note: first lambda must use capture by reference so that the + // scalar variable 't' used in it is updated for each + // t-loop iteration. + // + auto poly_fdtd2d_base_lam1 = [&](Index_type j) { + POLYBENCH_FDTD_2D_BODY1; + }; + auto poly_fdtd2d_base_lam2 = [=](Index_type i, Index_type j) { + POLYBENCH_FDTD_2D_BODY2; + }; + auto poly_fdtd2d_base_lam3 = [=](Index_type i, Index_type j) { + POLYBENCH_FDTD_2D_BODY3; + }; + auto poly_fdtd2d_base_lam4 = [=](Index_type i, Index_type j) { + POLYBENCH_FDTD_2D_BODY4; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (t = 0; t < tsteps; ++t) { + + std::for_each( //std::execution::par_unseq, + beginY, endY, + [=](Index_type j) { + poly_fdtd2d_base_lam1(j); + }); + std::for_each( //std::execution::par_unseq, + begin1X, end1X, + [=](Index_type i) { + for (Index_type j = 0; j < ny; j++) { + poly_fdtd2d_base_lam2(i, j); + } + }); + std::for_each( //std::execution::par_unseq, + beginX, endX, + [=](Index_type i) { + for (Index_type j = 1; j < ny; j++) { + poly_fdtd2d_base_lam3(i, j); + } + }); + std::for_each( //std::execution::par_unseq, + beginXm1, endXm1, + [=](Index_type i) { + for (Index_type j = 0; j < ny - 1; j++) { + poly_fdtd2d_base_lam4(i, j); + } + }); + + } // tstep loop + + } // run_reps + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_FDTD_2D_VIEWS_RAJA; + + // + // Note: first lambda must use capture by reference so that the + // scalar variable 't' used in it is updated for each + // t-loop iteration. + // + auto poly_fdtd2d_lam1 = [&](Index_type j) { + POLYBENCH_FDTD_2D_BODY1_RAJA; + }; + auto poly_fdtd2d_lam2 = [=](Index_type i, Index_type j) { + POLYBENCH_FDTD_2D_BODY2_RAJA; + }; + auto poly_fdtd2d_lam3 = [=](Index_type i, Index_type j) { + POLYBENCH_FDTD_2D_BODY3_RAJA; + }; + auto poly_fdtd2d_lam4 = [=](Index_type i, Index_type j) { + POLYBENCH_FDTD_2D_BODY4_RAJA; + }; + + using EXEC_POL1 = RAJA::loop_exec; + + using EXEC_POL234 = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<0> + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (t = 0; t < tsteps; ++t) { + + RAJA::forall( RAJA::RangeSegment(0, ny), + poly_fdtd2d_lam1 + ); + + RAJA::kernel( + RAJA::make_tuple(RAJA::RangeSegment{1, nx}, + RAJA::RangeSegment{0, ny}), + poly_fdtd2d_lam2 + ); + + RAJA::kernel( + RAJA::make_tuple(RAJA::RangeSegment{0, nx}, + RAJA::RangeSegment{1, ny}), + poly_fdtd2d_lam3 + ); + + RAJA::kernel( + RAJA::make_tuple(RAJA::RangeSegment{0, nx-1}, + RAJA::RangeSegment{0, ny-1}), + poly_fdtd2d_lam4 + ); + + } // tstep loop + + } // run_reps + stopTimer(); + + break; + } + +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\nPOLYBENCH_FDTD_2D Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp new file mode 100644 index 000000000..023b125d3 --- /dev/null +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp @@ -0,0 +1,158 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_FLOYD_WARSHALL.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +//#define USE_STDPAR_COLLAPSE 1 + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_FLOYD_WARSHALL::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_FLOYD_WARSHALL_DATA_SETUP; + +#ifdef USE_STDPAR_COLLAPSE + counting_iterator begin2(0); + counting_iterator end2(N*N); +#else + counting_iterator begin(0); + counting_iterator end(N); +#endif + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin2, end2, [=](Index_type ki) { + const auto k = ki / N; + const auto i = ki % N; +#else + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type k) { + std::for_each(begin, end, + [=](Index_type i) { +#endif + for (Index_type j = 0; j < N; ++j) { + POLYBENCH_FLOYD_WARSHALL_BODY; + } +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_floydwarshall_base_lam = [=](Index_type k, Index_type i, + Index_type j) { + POLYBENCH_FLOYD_WARSHALL_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin2, end2, [=](Index_type ki) { + const auto k = ki / N; + const auto i = ki % N; +#else + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type k) { + std::for_each(begin, end, + [=](Index_type i) { +#endif + for (Index_type j = 0; j < N; ++j) { + poly_floydwarshall_base_lam(k, i, j); + } +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_FLOYD_WARSHALL_VIEWS_RAJA; + + auto poly_floydwarshall_lam = [=](Index_type k, Index_type i, + Index_type j) { + POLYBENCH_FLOYD_WARSHALL_BODY_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<0> + > + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{0, N}, + RAJA::RangeSegment{0, N}, + RAJA::RangeSegment{0, N}), + poly_floydwarshall_lam + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_FLOYD_WARSHALL : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_GEMM-StdPar.cpp b/src/polybench/POLYBENCH_GEMM-StdPar.cpp new file mode 100644 index 000000000..1fd75528e --- /dev/null +++ b/src/polybench/POLYBENCH_GEMM-StdPar.cpp @@ -0,0 +1,192 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_GEMM.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +#define USE_STDPAR_COLLAPSE 1 + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_GEMM::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_GEMM_DATA_SETUP; + +#ifdef USE_STDPAR_COLLAPSE + counting_iterator beginIJ(0); + counting_iterator endIJ(ni*nj); +#else + counting_iterator beginI(0); + counting_iterator beginJ(0); + counting_iterator endJ(nj); + counting_iterator endI(ni); +#endif + counting_iterator beginK(0); + counting_iterator endK(nk); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginIJ, endIJ, [=](Index_type ij) { + const auto i = ij / nj; + const auto j = ij % nj; +#else + std::for_each( std::execution::par_unseq, + beginI, endI, [=](Index_type i) { + std::for_each(beginJ, endJ, [=](Index_type j) { +#endif + POLYBENCH_GEMM_BODY1; + POLYBENCH_GEMM_BODY2; + std::for_each(beginK, endK, [=,&dot](Index_type k) { + POLYBENCH_GEMM_BODY3; + }); + POLYBENCH_GEMM_BODY4; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_gemm_base_lam2 = [=](Index_type i, Index_type j) { + POLYBENCH_GEMM_BODY2; + }; + auto poly_gemm_base_lam3 = [=](Index_type i, Index_type j, Index_type k, + Real_type& dot) { + POLYBENCH_GEMM_BODY3; + }; + auto poly_gemm_base_lam4 = [=](Index_type i, Index_type j, + Real_type& dot) { + POLYBENCH_GEMM_BODY4; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + beginIJ, endIJ, [=](Index_type ij) { + const auto i = ij / nj; + const auto j = ij % nj; +#else + std::for_each( std::execution::par_unseq, + beginI, endI, [=](Index_type i) { + std::for_each(beginJ, endJ, [=](Index_type j) { +#endif + POLYBENCH_GEMM_BODY1; + poly_gemm_base_lam2(i, j); + std::for_each(beginK, endK, [=,&dot](Index_type k) { + poly_gemm_base_lam3(i, j, k, dot); + }); + poly_gemm_base_lam4(i, j, dot); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_GEMM_VIEWS_RAJA; + + auto poly_gemm_lam1 = [=](Real_type& dot) { + POLYBENCH_GEMM_BODY1_RAJA; + }; + auto poly_gemm_lam2 = [=](Index_type i, Index_type j) { + POLYBENCH_GEMM_BODY2_RAJA; + }; + auto poly_gemm_lam3 = [=](Index_type i, Index_type j, Index_type k, + Real_type& dot) { + POLYBENCH_GEMM_BODY3_RAJA; + }; + auto poly_gemm_lam4 = [=](Index_type i, Index_type j, + Real_type& dot) { + POLYBENCH_GEMM_BODY4_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Params<0>>, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>>, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<2, RAJA::Segs<0,1,2>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<3, RAJA::Segs<0,1>, RAJA::Params<0>> + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel_param( + + RAJA::make_tuple( RAJA::RangeSegment{0, ni}, + RAJA::RangeSegment{0, nj}, + RAJA::RangeSegment{0, nk} ), + RAJA::tuple{0.0}, // variable for dot + + poly_gemm_lam1, + poly_gemm_lam2, + poly_gemm_lam3, + poly_gemm_lam4 + + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_GEMM : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp new file mode 100644 index 000000000..2673abd45 --- /dev/null +++ b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp @@ -0,0 +1,255 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_GEMVER.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +#define USE_STDPAR_COLLAPSE 1 + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_GEMVER_DATA_SETUP; + + counting_iterator begin(0); + counting_iterator end(n); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type i) { + std::for_each(begin, end, [=](Index_type j) { + POLYBENCH_GEMVER_BODY1; + }); + }); + + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type i) { + POLYBENCH_GEMVER_BODY2; + std::for_each(begin, end, [=,&dot](Index_type j) { + POLYBENCH_GEMVER_BODY3; + }); + POLYBENCH_GEMVER_BODY4; + }); + + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type i) { + POLYBENCH_GEMVER_BODY5; + }); + + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type i) { + POLYBENCH_GEMVER_BODY6; + std::for_each(begin, end, [=,&dot](Index_type j) { + POLYBENCH_GEMVER_BODY7; + }); + POLYBENCH_GEMVER_BODY8; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_gemver_base_lam1 = [=](Index_type i, Index_type j) { + POLYBENCH_GEMVER_BODY1; + }; + auto poly_gemver_base_lam3 = [=](Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_GEMVER_BODY3; + }; + auto poly_gemver_base_lam4 = [=](Index_type i, Real_type &dot) { + POLYBENCH_GEMVER_BODY4; + }; + auto poly_gemver_base_lam5 = [=](Index_type i) { + POLYBENCH_GEMVER_BODY5; + }; + auto poly_gemver_base_lam7 = [=](Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_GEMVER_BODY7; + }; + auto poly_gemver_base_lam8 = [=](Index_type i, Real_type &dot) { + POLYBENCH_GEMVER_BODY8; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type i) { + std::for_each(begin, end, [=](Index_type j) { + poly_gemver_base_lam1(i, j); + }); + }); + + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type i) { + POLYBENCH_GEMVER_BODY2; + std::for_each(begin, end, [=,&dot](Index_type j) { + poly_gemver_base_lam3(i, j, dot); + }); + poly_gemver_base_lam4(i, dot); + }); + + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type i) { + poly_gemver_base_lam5(i); + }); + + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type i) { + POLYBENCH_GEMVER_BODY6; + std::for_each(begin, end, [=,&dot](Index_type j) { + poly_gemver_base_lam7(i, j, dot); + }); + poly_gemver_base_lam8(i, dot); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_GEMVER_VIEWS_RAJA; + + auto poly_gemver_lam1 = [=] (Index_type i, Index_type j) { + POLYBENCH_GEMVER_BODY1_RAJA; + }; + auto poly_gemver_lam2 = [=] (Real_type &dot) { + POLYBENCH_GEMVER_BODY2_RAJA; + }; + auto poly_gemver_lam3 = [=] (Index_type i, Index_type j, Real_type &dot) { + POLYBENCH_GEMVER_BODY3_RAJA; + }; + auto poly_gemver_lam4 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_GEMVER_BODY4_RAJA; + }; + auto poly_gemver_lam5 = [=] (Index_type i) { + POLYBENCH_GEMVER_BODY5_RAJA; + }; + auto poly_gemver_lam6 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_GEMVER_BODY6_RAJA; + }; + auto poly_gemver_lam7 = [=] (Index_type i, Index_type j, Real_type &dot) { + POLYBENCH_GEMVER_BODY7_RAJA; + }; + auto poly_gemver_lam8 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_GEMVER_BODY8_RAJA; + }; + + using EXEC_POL1 = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Segs<0,1>> + > + > + >; + + using EXEC_POL2 = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Params<0>>, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> + > + >; + + using EXEC_POL3 = RAJA::loop_exec; + + using EXEC_POL4 = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Segs<0>, RAJA::Params<0>>, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{0, n}, + RAJA::RangeSegment{0, n}), + poly_gemver_lam1 + ); + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, n}, + RAJA::RangeSegment{0, n}), + RAJA::tuple{0.0}, + + poly_gemver_lam2, + poly_gemver_lam3, + poly_gemver_lam4 + ); + + RAJA::forall (RAJA::RangeSegment{0, n}, + poly_gemver_lam5 + ); + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, n}, + RAJA::RangeSegment{0, n}), + RAJA::tuple{0.0}, + + poly_gemver_lam6, + poly_gemver_lam7, + poly_gemver_lam8 + + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_GEMVER : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp new file mode 100644 index 000000000..070e56c18 --- /dev/null +++ b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp @@ -0,0 +1,145 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_GESUMMV.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_GESUMMV::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps= getRunReps(); + + POLYBENCH_GESUMMV_DATA_SETUP; + + counting_iterator begin(0); + counting_iterator end(N); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type i) { + POLYBENCH_GESUMMV_BODY1; + std::for_each(begin, end, [=,&tmpdot,&ydot](Index_type j) { + POLYBENCH_GESUMMV_BODY2; + }); + POLYBENCH_GESUMMV_BODY3; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_gesummv_base_lam2 = [=](Index_type i, Index_type j, + Real_type& tmpdot, Real_type& ydot) { + POLYBENCH_GESUMMV_BODY2; + }; + auto poly_gesummv_base_lam3 = [=](Index_type i, + Real_type& tmpdot, Real_type& ydot) { + POLYBENCH_GESUMMV_BODY3; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type i) { + POLYBENCH_GESUMMV_BODY1; + std::for_each(begin, end, [=,&tmpdot,&ydot](Index_type j) { + poly_gesummv_base_lam2(i, j, tmpdot, ydot); + }); + poly_gesummv_base_lam3(i, tmpdot, ydot); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_GESUMMV_VIEWS_RAJA; + + auto poly_gesummv_lam1 = [=](Real_type& tmpdot, Real_type& ydot) { + POLYBENCH_GESUMMV_BODY1_RAJA; + }; + auto poly_gesummv_lam2 = [=](Index_type i, Index_type j, + Real_type& tmpdot, Real_type& ydot) { + POLYBENCH_GESUMMV_BODY2_RAJA; + }; + auto poly_gesummv_lam3 = [=](Index_type i, + Real_type& tmpdot, Real_type& ydot) { + POLYBENCH_GESUMMV_BODY3_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Params<0,1>>, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0, 1>, RAJA::Params<0,1>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0,1>> + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel_param( + RAJA::make_tuple( RAJA::RangeSegment{0, N}, + RAJA::RangeSegment{0, N} ), + RAJA::make_tuple(static_cast(0.0), + static_cast(0.0)), + + poly_gesummv_lam1, + poly_gesummv_lam2, + poly_gesummv_lam3 + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_GESUMMV : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp new file mode 100644 index 000000000..d18a359f9 --- /dev/null +++ b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp @@ -0,0 +1,188 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_HEAT_3D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_HEAT_3D_DATA_SETUP; + + counting_iterator begin(1); + counting_iterator end(N-1); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type j) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type k) { + POLYBENCH_HEAT_3D_BODY1; + }); + }); + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type j) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type k) { + POLYBENCH_HEAT_3D_BODY2; + }); + }); + }); + + } + + } + stopTimer(); + + POLYBENCH_HEAT_3D_DATA_RESET; + + break; + } + + case Lambda_StdPar : { + + auto poly_heat3d_base_lam1 = [=](Index_type i, Index_type j, + Index_type k) { + POLYBENCH_HEAT_3D_BODY1; + }; + auto poly_heat3d_base_lam2 = [=](Index_type i, Index_type j, + Index_type k) { + POLYBENCH_HEAT_3D_BODY2; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + for (Index_type i = 1; i < N-1; ++i ) { + for (Index_type j = 1; j < N-1; ++j ) { + for (Index_type k = 1; k < N-1; ++k ) { + poly_heat3d_base_lam1(i, j, k); + } + } + } + + for (Index_type i = 1; i < N-1; ++i ) { + for (Index_type j = 1; j < N-1; ++j ) { + for (Index_type k = 1; k < N-1; ++k ) { + poly_heat3d_base_lam2(i, j, k); + } + } + } + + } + + } + stopTimer(); + + POLYBENCH_HEAT_3D_DATA_RESET; + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_HEAT_3D_VIEWS_RAJA; + + auto poly_heat3d_lam1 = [=](Index_type i, Index_type j, Index_type k) { + POLYBENCH_HEAT_3D_BODY1_RAJA; + }; + auto poly_heat3d_lam2 = [=](Index_type i, Index_type j, Index_type k) { + POLYBENCH_HEAT_3D_BODY2_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<0> + > + > + >, + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<1> + > + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{1, N-1}, + RAJA::RangeSegment{1, N-1}, + RAJA::RangeSegment{1, N-1}), + + poly_heat3d_lam1, + poly_heat3d_lam2 + ); + + } + + } + stopTimer(); + + POLYBENCH_HEAT_3D_DATA_RESET; + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_HEAT_3D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp new file mode 100644 index 000000000..1b1ce72f2 --- /dev/null +++ b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp @@ -0,0 +1,137 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_JACOBI_1D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_JACOBI_1D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps= getRunReps(); + + POLYBENCH_JACOBI_1D_DATA_SETUP; + + counting_iterator begin(1); + counting_iterator end(N-1); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + POLYBENCH_JACOBI_1D_BODY1; + }); + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + POLYBENCH_JACOBI_1D_BODY2; + }); + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_1D_DATA_RESET; + + break; + } + + case Lambda_StdPar : { + + auto poly_jacobi1d_lam1 = [=] (Index_type i) { + POLYBENCH_JACOBI_1D_BODY1; + }; + auto poly_jacobi1d_lam2 = [=] (Index_type i) { + POLYBENCH_JACOBI_1D_BODY2; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + poly_jacobi1d_lam1(i); + }); + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + poly_jacobi1d_lam2(i); + }); + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_1D_DATA_RESET; + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + RAJA::forall ( RAJA::RangeSegment{1, N-1}, + poly_jacobi1d_lam1 + ); + + RAJA::forall ( RAJA::RangeSegment{1, N-1}, + poly_jacobi1d_lam2 + ); + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_1D_DATA_RESET; + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_JACOBI_1D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp new file mode 100644 index 000000000..41cd58b2b --- /dev/null +++ b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp @@ -0,0 +1,176 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_JACOBI_2D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps= getRunReps(); + + POLYBENCH_JACOBI_2D_DATA_SETUP; + + counting_iterator begin(1); + counting_iterator end(N-1); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type j) { + POLYBENCH_JACOBI_2D_BODY1; + }); + }); + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type j) { + POLYBENCH_JACOBI_2D_BODY2; + }); + }); + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_2D_DATA_RESET; + + break; + } + + case Lambda_StdPar : { + + auto poly_jacobi2d_base_lam1 = [=](Index_type i, Index_type j) { + POLYBENCH_JACOBI_2D_BODY1; + }; + auto poly_jacobi2d_base_lam2 = [=](Index_type i, Index_type j) { + POLYBENCH_JACOBI_2D_BODY2; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type j) { + poly_jacobi2d_base_lam1(i, j); + }); + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type j) { + poly_jacobi2d_base_lam2(i, j); + }); + }); + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_2D_DATA_RESET; + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_JACOBI_2D_VIEWS_RAJA; + + auto poly_jacobi2d_lam1 = [=](Index_type i, Index_type j) { + POLYBENCH_JACOBI_2D_BODY1_RAJA; + }; + auto poly_jacobi2d_lam2 = [=](Index_type i, Index_type j) { + POLYBENCH_JACOBI_2D_BODY2_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<0> + > + >, + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<1> + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{1, N-1}, + RAJA::RangeSegment{1, N-1}), + + poly_jacobi2d_lam1, + poly_jacobi2d_lam2 + ); + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_2D_DATA_RESET; + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_JACOBI_2D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_MVT-StdPar.cpp b/src/polybench/POLYBENCH_MVT-StdPar.cpp new file mode 100644 index 000000000..7adc162de --- /dev/null +++ b/src/polybench/POLYBENCH_MVT-StdPar.cpp @@ -0,0 +1,204 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_MVT.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_MVT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps= getRunReps(); + + POLYBENCH_MVT_DATA_SETUP; + + counting_iterator begin(0); + counting_iterator end(N); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + POLYBENCH_MVT_BODY1; + std::for_each( std::execution::unseq, + begin, end, + [=,&dot](Index_type j) { + POLYBENCH_MVT_BODY2; + }); + POLYBENCH_MVT_BODY3; + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + POLYBENCH_MVT_BODY4; + std::for_each( std::execution::unseq, + begin, end, + [=,&dot](Index_type j) { + POLYBENCH_MVT_BODY5; + }); + POLYBENCH_MVT_BODY6; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_mvt_base_lam2 = [=] (Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_MVT_BODY2; + }; + auto poly_mvt_base_lam3 = [=] (Index_type i, + Real_type &dot) { + POLYBENCH_MVT_BODY3; + }; + auto poly_mvt_base_lam5 = [=] (Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_MVT_BODY5; + }; + auto poly_mvt_base_lam6 = [=] (Index_type i, + Real_type &dot) { + POLYBENCH_MVT_BODY6; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + POLYBENCH_MVT_BODY1; + std::for_each( std::execution::unseq, + begin, end, + [=,&dot](Index_type j) { + poly_mvt_base_lam2(i, j, dot); + }); + poly_mvt_base_lam3(i, dot); + }); + + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + POLYBENCH_MVT_BODY4; + std::for_each( std::execution::unseq, + begin, end, + [=,&dot](Index_type j) { + poly_mvt_base_lam5(i, j, dot); + }); + poly_mvt_base_lam6(i, dot); + }); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + POLYBENCH_MVT_VIEWS_RAJA; + + auto poly_mvt_lam1 = [=] (Real_type &dot) { + POLYBENCH_MVT_BODY1_RAJA; + }; + auto poly_mvt_lam2 = [=] (Index_type i, Index_type j, Real_type &dot) { + POLYBENCH_MVT_BODY2_RAJA; + }; + auto poly_mvt_lam3 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_MVT_BODY3_RAJA; + }; + auto poly_mvt_lam4 = [=] (Real_type &dot) { + POLYBENCH_MVT_BODY4_RAJA; + }; + auto poly_mvt_lam5 = [=] (Index_type i, Index_type j, Real_type &dot) { + POLYBENCH_MVT_BODY5_RAJA; + }; + auto poly_mvt_lam6 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_MVT_BODY6_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Params<0>>, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::region( [=]() { + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, N}, + RAJA::RangeSegment{0, N}), + RAJA::tuple{0.0}, + + poly_mvt_lam1, + poly_mvt_lam2, + poly_mvt_lam3 + + ); + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, N}, + RAJA::RangeSegment{0, N}), + RAJA::tuple{0.0}, + + poly_mvt_lam4, + poly_mvt_lam5, + poly_mvt_lam6 + + ); + + }); // end sequential region (for single-source code) + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n POLYBENCH_MVT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/stream/ADD-StdPar.cpp b/src/stream/ADD-StdPar.cpp new file mode 100644 index 000000000..bde010541 --- /dev/null +++ b/src/stream/ADD-StdPar.cpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "ADD.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace stream +{ + + +void ADD::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + ADD_DATA_SETUP; + + auto add_lam = [=](Index_type i) { + ADD_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin,end, + [=](Index_type i) { + ADD_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin,end, + [=](Index_type i) { + add_lam(i); + }); + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), add_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n ADD : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace stream +} // end namespace rajaperf diff --git a/src/stream/COPY-StdPar.cpp b/src/stream/COPY-StdPar.cpp new file mode 100644 index 000000000..1fc757e22 --- /dev/null +++ b/src/stream/COPY-StdPar.cpp @@ -0,0 +1,88 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "COPY.hpp" + +#include "RAJA/RAJA.hpp" + +#include +#include + +#include + +namespace rajaperf +{ +namespace stream +{ + + +void COPY::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + COPY_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::copy( std::execution::par_unseq, + &a[ibegin], &a[iend], &c[ibegin]); + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::copy( std::execution::par_unseq, + &a[ibegin], &a[iend], &c[ibegin]); + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), copy_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n COPY : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace stream +} // end namespace rajaperf diff --git a/src/stream/DOT-StdPar.cpp b/src/stream/DOT-StdPar.cpp new file mode 100644 index 000000000..cf22a9e35 --- /dev/null +++ b/src/stream/DOT-StdPar.cpp @@ -0,0 +1,116 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DOT.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include +#include + +#include + +namespace rajaperf +{ +namespace stream +{ + + +void DOT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + DOT_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type dot = m_dot_init; + + dot += std::transform_reduce( std::execution::par_unseq, + &a[ibegin], &a[iend], &b[ibegin], + (Real_type)0); + + m_dot += dot; + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto dot_base_lam = [=](Index_type i) -> Real_type { + return a[i] * b[i]; + }; + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type dot = m_dot_init; + + dot += std::transform_reduce( std::execution::par_unseq, + begin,end, + (Real_type)0, + std::plus(), + dot_base_lam); + + m_dot += dot; + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::ReduceSum dot(m_dot_init); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { + DOT_BODY; + }); + + m_dot += static_cast(dot.get()); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n DOT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace stream +} // end namespace rajaperf diff --git a/src/stream/MUL-StdPar.cpp b/src/stream/MUL-StdPar.cpp new file mode 100644 index 000000000..6cf9f418f --- /dev/null +++ b/src/stream/MUL-StdPar.cpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MUL.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace stream +{ + + +void MUL::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + MUL_DATA_SETUP; + + auto mul_lam = [=](Index_type i) { + MUL_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin,end, + [=](Index_type i) { + MUL_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin,end, + [=](Index_type i) { + mul_lam(i); + }); + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), mul_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n MUL : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace stream +} // end namespace rajaperf diff --git a/src/stream/TRIAD-StdPar.cpp b/src/stream/TRIAD-StdPar.cpp new file mode 100644 index 000000000..484d79cc3 --- /dev/null +++ b/src/stream/TRIAD-StdPar.cpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "TRIAD.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace stream +{ + + +void TRIAD::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + TRIAD_DATA_SETUP; + + auto triad_lam = [=](Index_type i) { + TRIAD_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin,end, + [=](Index_type i) { + TRIAD_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin,end, + [=](Index_type i) { + triad_lam(i); + }); + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), triad_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + std::cout << "\n TRIAD : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace stream +} // end namespace rajaperf From baae1b28dc4fd8b3ef88cb519bb155c7e1c93795 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 7 Jul 2022 17:26:19 +0300 Subject: [PATCH 055/174] starting over with StdPar because git submodules are trash --- src/common/StdParUtils.hpp | 108 +++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 src/common/StdParUtils.hpp diff --git a/src/common/StdParUtils.hpp b/src/common/StdParUtils.hpp new file mode 100644 index 000000000..f765f517d --- /dev/null +++ b/src/common/StdParUtils.hpp @@ -0,0 +1,108 @@ +/* +Copyright (c) 2021, NVIDIA +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +// This implementation was authored by David Olsen + +#include + +template +struct counting_iterator { + +private: + typedef counting_iterator self; + +public: + typedef T value_type; + typedef typename std::make_signed::type difference_type; + typedef T const* pointer; + typedef T const& reference; + typedef std::random_access_iterator_tag iterator_category; + + explicit counting_iterator(value_type v) : value(v) { } + + value_type operator*() const { return value; } + value_type operator[](difference_type n) const { return value + n; } + + self& operator++() { ++value; return *this; } + self operator++(int) { + self result{value}; + ++value; + return result; + } + self& operator--() { --value; return *this; } + self operator--(int) { + self result{value}; + --value; + return result; + } + self& operator+=(difference_type n) { value += n; return *this; } + self& operator-=(difference_type n) { value -= n; return *this; } + + friend self operator+(self const& i, difference_type n) { + return self(i.value + n); + } + friend self operator+(difference_type n, self const& i) { + return self(i.value + n); + } + friend difference_type operator-(self const& x, self const& y) { + return x.value - y.value; + } + friend self operator-(self const& i, difference_type n) { + return self(i.value - n); + } + + friend bool operator==(self const& x, self const& y) { + return x.value == y.value; + } + friend bool operator!=(self const& x, self const& y) { + return x.value != y.value; + } + friend bool operator<(self const& x, self const& y) { + return x.value < y.value; + } + friend bool operator<=(self const& x, self const& y) { + return x.value <= y.value; + } + friend bool operator>(self const& x, self const& y) { + return x.value > y.value; + } + friend bool operator>=(self const& x, self const& y) { + return x.value >= y.value; + } +private: + value_type value; +}; + +template ::value>::type> +inline counting_iterator make_counter(T value) { + return counting_iterator{value}; +} + From 0044ac95084a41687121fa00e9d17919a640b877 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 11:18:53 +0300 Subject: [PATCH 056/174] fix MEM*** --- src/algorithm/CMakeLists.txt | 2 ++ src/algorithm/MEMCPY.cpp | 4 ++++ src/algorithm/MEMCPY.hpp | 3 +++ src/algorithm/MEMSET.cpp | 4 ++++ src/algorithm/MEMSET.hpp | 3 +++ 5 files changed, 16 insertions(+) diff --git a/src/algorithm/CMakeLists.txt b/src/algorithm/CMakeLists.txt index 7c0fcd39f..03d6069ba 100644 --- a/src/algorithm/CMakeLists.txt +++ b/src/algorithm/CMakeLists.txt @@ -33,12 +33,14 @@ blt_add_library( REDUCE_SUM-OMPTarget.cpp MEMSET.cpp MEMSET-Seq.cpp + MEMSET-StdPar.cpp MEMSET-Hip.cpp MEMSET-Cuda.cpp MEMSET-OMP.cpp MEMSET-OMPTarget.cpp MEMCPY.cpp MEMCPY-Seq.cpp + MEMCPY-StdPar.cpp MEMCPY-Hip.cpp MEMCPY-Cuda.cpp MEMCPY-OMP.cpp diff --git a/src/algorithm/MEMCPY.cpp b/src/algorithm/MEMCPY.cpp index 1447fc4f8..80c7f4f62 100644 --- a/src/algorithm/MEMCPY.cpp +++ b/src/algorithm/MEMCPY.cpp @@ -51,6 +51,10 @@ MEMCPY::MEMCPY(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } MEMCPY::~MEMCPY() diff --git a/src/algorithm/MEMCPY.hpp b/src/algorithm/MEMCPY.hpp index 2477115ce..117bd8a6d 100644 --- a/src/algorithm/MEMCPY.hpp +++ b/src/algorithm/MEMCPY.hpp @@ -57,10 +57,13 @@ class MEMCPY : public KernelBase void runStdParVariant(VariantID vid, size_t tune_idx); void setSeqTuningDefinitions(VariantID vid); + void setStdParTuningDefinitions(VariantID vid); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); void runSeqVariantDefault(VariantID vid); void runSeqVariantLibrary(VariantID vid); + void runStdParVariantDefault(VariantID vid); + void runStdParVariantLibrary(VariantID vid); template < size_t block_size > void runCudaVariantBlock(VariantID vid); diff --git a/src/algorithm/MEMSET.cpp b/src/algorithm/MEMSET.cpp index 98152d917..3cf345bd6 100644 --- a/src/algorithm/MEMSET.cpp +++ b/src/algorithm/MEMSET.cpp @@ -52,6 +52,10 @@ MEMSET::MEMSET(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } MEMSET::~MEMSET() diff --git a/src/algorithm/MEMSET.hpp b/src/algorithm/MEMSET.hpp index 0e9630fa7..0cf2a75bf 100644 --- a/src/algorithm/MEMSET.hpp +++ b/src/algorithm/MEMSET.hpp @@ -57,10 +57,13 @@ class MEMSET : public KernelBase void runStdParVariant(VariantID vid, size_t tune_idx); void setSeqTuningDefinitions(VariantID vid); + void setStdParTuningDefinitions(VariantID vid); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); void runSeqVariantDefault(VariantID vid); void runSeqVariantLibrary(VariantID vid); + void runStdParVariantDefault(VariantID vid); + void runStdParVariantLibrary(VariantID vid); template < size_t block_size > void runCudaVariantBlock(VariantID vid); From 4a57467b7dfe21e0ba99577d1a6f3a98934b1e87 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 11:59:28 +0300 Subject: [PATCH 057/174] add exec and alg to stdpar helper header --- src/common/StdParUtils.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/common/StdParUtils.hpp b/src/common/StdParUtils.hpp index f765f517d..ab2e49f46 100644 --- a/src/common/StdParUtils.hpp +++ b/src/common/StdParUtils.hpp @@ -30,6 +30,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // This implementation was authored by David Olsen +#include +#include #include template From e0349b459569e75ff2caa91c2a0bff0c51b89dd4 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 12:00:54 +0300 Subject: [PATCH 058/174] more stdpar --- src/algorithm/MEMCPY-StdPar.cpp | 193 ++++++++++++++++++ src/algorithm/MEMSET-StdPar.cpp | 193 ++++++++++++++++++ src/apps/CMakeLists.txt | 1 + src/apps/CONVECTION3DPA-StdPar.cpp | 316 +++++++++++++++++++++++++++++ src/apps/CONVECTION3DPA.cpp | 1 + src/basic/CMakeLists.txt | 1 + src/basic/DAXPY_ATOMIC-StdPar.cpp | 93 +++++++++ 7 files changed, 798 insertions(+) create mode 100644 src/algorithm/MEMCPY-StdPar.cpp create mode 100644 src/algorithm/MEMSET-StdPar.cpp create mode 100644 src/apps/CONVECTION3DPA-StdPar.cpp create mode 100644 src/basic/DAXPY_ATOMIC-StdPar.cpp diff --git a/src/algorithm/MEMCPY-StdPar.cpp b/src/algorithm/MEMCPY-StdPar.cpp new file mode 100644 index 000000000..ff8e66c08 --- /dev/null +++ b/src/algorithm/MEMCPY-StdPar.cpp @@ -0,0 +1,193 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MEMCPY.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void MEMCPY::runStdParVariantLibrary(VariantID vid) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMCPY_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::copy_n(std::execution::par_unseq, + x+ibegin, iend-ibegin, y+ibegin); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + camp::resources::Host res = camp::resources::Host::get_default(); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + res.memcpy(MEMCPY_STD_ARGS); + + } + stopTimer(); + + break; + } +#endif + + default : { + getCout() << "\n MEMCPY : Unknown variant id = " << vid << std::endl; + } + + } +#endif +} + +void MEMCPY::runStdParVariantDefault(VariantID vid) +{ +#if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMCPY_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin,end, + [=](Index_type i) { + MEMCPY_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto memcpy_lambda = [=](Index_type i) { + MEMCPY_BODY; + }; + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin,end, + [=](Index_type i) { + memcpy_lambda(i); + }); + + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + MEMCPY_BODY; + }); + + } + stopTimer(); + + break; + } +#endif + + default : { + getCout() << "\n MEMCPY : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +void MEMCPY::runStdParVariant(VariantID vid, size_t tune_idx) +{ + size_t t = 0; + + if (vid == Base_StdPar || vid == RAJA_StdPar) { + + if (tune_idx == t) { + + runStdParVariantLibrary(vid); + + } + + t += 1; + + } + + if (tune_idx == t) { + + runStdParVariantDefault(vid); + + } + + t += 1; +} + +void MEMCPY::setStdParTuningDefinitions(VariantID vid) +{ + if (vid == Base_StdPar || vid == RAJA_StdPar) { + addVariantTuningName(vid, "library"); + } + + addVariantTuningName(vid, "default"); +} + +} // end namespace algorithm +} // end namespace rajaperf diff --git a/src/algorithm/MEMSET-StdPar.cpp b/src/algorithm/MEMSET-StdPar.cpp new file mode 100644 index 000000000..73a61d5cf --- /dev/null +++ b/src/algorithm/MEMSET-StdPar.cpp @@ -0,0 +1,193 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MEMSET.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" +#include +#include + +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void MEMSET::runStdParVariantLibrary(VariantID vid) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMSET_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::fill_n(std::execution::par_unseq, + x+ibegin, iend-ibegin, val); + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case RAJA_StdPar : { + + camp::resources::Host res = camp::resources::Host::get_default(); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + res.memset(MEMSET_STD_ARGS); + + } + stopTimer(); + + break; + } +#endif + + default : { + getCout() << "\n MEMSET : Unknown variant id = " << vid << std::endl; + } + + } +#endif +} + +void MEMSET::runStdParVariantDefault(VariantID vid) +{ +#if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMSET_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin,end, + [=](Index_type i) { + MEMSET_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto memset_lambda = [=](Index_type i) { + MEMSET_BODY; + }; + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each( std::execution::par_unseq, + begin,end, + [=](Index_type i) { + memset_lambda(i); + }); + + } + stopTimer(); + + break; + } + +#ifdef RAJA_ENABLE_STDPAR + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + MEMSET_BODY; + }); + + } + stopTimer(); + + break; + } +#endif + + default : { + getCout() << "\n MEMSET : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +void MEMSET::runStdParVariant(VariantID vid, size_t tune_idx) +{ + size_t t = 0; + + if (vid == Base_StdPar || vid == RAJA_StdPar) { + + if (tune_idx == t) { + + runStdParVariantLibrary(vid); + + } + + t += 1; + + } + + if (tune_idx == t) { + + runStdParVariantDefault(vid); + + } + + t += 1; +} + +void MEMSET::setStdParTuningDefinitions(VariantID vid) +{ + if (vid == Base_StdPar || vid == RAJA_StdPar) { + addVariantTuningName(vid, "library"); + } + + addVariantTuningName(vid, "default"); +} + +} // end namespace algorithm +} // end namespace rajaperf diff --git a/src/apps/CMakeLists.txt b/src/apps/CMakeLists.txt index e79db7717..dbe74ba13 100644 --- a/src/apps/CMakeLists.txt +++ b/src/apps/CMakeLists.txt @@ -13,6 +13,7 @@ blt_add_library( CONVECTION3DPA-Cuda.cpp CONVECTION3DPA-Hip.cpp CONVECTION3DPA-Seq.cpp + CONVECTION3DPA-StdPar.cpp CONVECTION3DPA-OMP.cpp CONVECTION3DPA-OMPTarget.cpp DEL_DOT_VEC_2D.cpp diff --git a/src/apps/CONVECTION3DPA-StdPar.cpp b/src/apps/CONVECTION3DPA-StdPar.cpp new file mode 100644 index 000000000..1bc58d4f1 --- /dev/null +++ b/src/apps/CONVECTION3DPA-StdPar.cpp @@ -0,0 +1,316 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "CONVECTION3DPA.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf { +namespace apps { + +void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + + CONVECTION3DPA_DATA_SETUP; + + switch (vid) { + + case Base_StdPar: { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (int e = 0; e < NE; ++e) { + + CONVECTION3DPA_0_CPU; + + CPU_FOREACH(dz,z,CPA_D1D) + { + CPU_FOREACH(dy,y,CPA_D1D) + { + CPU_FOREACH(dx,x,CPA_D1D) + { + CONVECTION3DPA_1; + } + } + } + + CPU_FOREACH(dz,z,CPA_D1D) + { + CPU_FOREACH(dy,y,CPA_D1D) + { + CPU_FOREACH(qx,x,CPA_Q1D) + { + CONVECTION3DPA_2; + } + } + } + + CPU_FOREACH(dz,z,CPA_D1D) + { + CPU_FOREACH(qx,x,CPA_Q1D) + { + CPU_FOREACH(qy,y,CPA_Q1D) + { + CONVECTION3DPA_3; + } + } + } + + CPU_FOREACH(qx,x,CPA_Q1D) + { + CPU_FOREACH(qy,y,CPA_Q1D) + { + CPU_FOREACH(qz,z,CPA_Q1D) + { + CONVECTION3DPA_4; + } + } + } + + CPU_FOREACH(qz,z,CPA_Q1D) + { + CPU_FOREACH(qy,y,CPA_Q1D) + { + CPU_FOREACH(qx,x,CPA_Q1D) + { + CONVECTION3DPA_5; + } + } + } + + CPU_FOREACH(qx,x,CPA_Q1D) + { + CPU_FOREACH(qy,y,CPA_Q1D) + { + CPU_FOREACH(dz,z,CPA_D1D) + { + CONVECTION3DPA_6; + } + } + } + + CPU_FOREACH(dz,z,CPA_D1D) + { + CPU_FOREACH(qx,x,CPA_Q1D) + { + CPU_FOREACH(dy,y,CPA_D1D) + { + CONVECTION3DPA_7; + } + } + } + + CPU_FOREACH(dz,z,CPA_D1D) + { + CPU_FOREACH(dy,y,CPA_D1D) + { + CPU_FOREACH(dx,x,CPA_D1D) + { + CONVECTION3DPA_8; + } + } + } + } // element loop + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_SEQ) + case RAJA_StdPar: { + + using launch_policy = RAJA::expt::LaunchPolicy; + + using outer_x = RAJA::expt::LoopPolicy; + + using inner_x = RAJA::expt::LoopPolicy; + + using inner_y = RAJA::expt::LoopPolicy; + + using inner_z = RAJA::expt::LoopPolicy; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // Grid is empty as the host does not need a compute grid to be specified + RAJA::expt::launch( + RAJA::expt::Grid(), + [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, NE), + [&](int e) { + + CONVECTION3DPA_0_CPU; + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dx) { + + CONVECTION3DPA_1; + + } // lambda (dx) + ); // RAJA::expt::loop + } // lambda (dy) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + + CONVECTION3DPA_2; + + } // lambda (dx) + ); // RAJA::expt::loop + } // lambda (dy) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + + CONVECTION3DPA_3; + + } // lambda (dy) + ); // RAJA::expt::loop + } // lambda (dx) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qz) { + + CONVECTION3DPA_4; + + } // lambda (qz) + ); // RAJA::expt::loop + } // lambda (qy) + ); //RAJA::expt::loop + } // lambda (qx) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + + CONVECTION3DPA_5; + + } // lambda (qx) + ); // RAJA::expt::loop + } // lambda (qy) + ); //RAJA::expt::loop + } // lambda (qz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + + CONVECTION3DPA_6; + + } // lambda (dz) + ); // RAJA::expt::loop + } // lambda (qy) + ); //RAJA::expt::loop + } // lambda (qx) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), + [&](int qx) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + + CONVECTION3DPA_7; + + } // lambda (dy) + ); // RAJA::expt::loop + } // lambda (qx) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + ctx.teamSync(); + + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dz) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dy) { + RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), + [&](int dx) { + + CONVECTION3DPA_8; + + } // lambda (dx) + ); // RAJA::expt::loop + } // lambda (dy) + ); //RAJA::expt::loop + } // lambda (dz) + ); //RAJA::expt::loop + + } // lambda (e) + ); // RAJA::expt::loop + + } // outer lambda (ctx) + ); // RAJA::expt::launch + } // loop over kernel reps + stopTimer(); + + return; + } +#endif // RUN_RAJA_SEQ + + default: + getCout() << "\n CONVECTION3DPA : Unknown StdPar variant id = " << vid + << std::endl; + } +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/CONVECTION3DPA.cpp b/src/apps/CONVECTION3DPA.cpp index 64fcc6063..246ae6b2e 100644 --- a/src/apps/CONVECTION3DPA.cpp +++ b/src/apps/CONVECTION3DPA.cpp @@ -64,6 +64,7 @@ CONVECTION3DPA::CONVECTION3DPA(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); } CONVECTION3DPA::~CONVECTION3DPA() diff --git a/src/basic/CMakeLists.txt b/src/basic/CMakeLists.txt index cca6bf286..ec2eddf98 100644 --- a/src/basic/CMakeLists.txt +++ b/src/basic/CMakeLists.txt @@ -17,6 +17,7 @@ blt_add_library( DAXPY-OMPTarget.cpp DAXPY_ATOMIC.cpp DAXPY_ATOMIC-Seq.cpp + DAXPY_ATOMIC-StdPar.cpp DAXPY_ATOMIC-Hip.cpp DAXPY_ATOMIC-Cuda.cpp DAXPY_ATOMIC-OMP.cpp diff --git a/src/basic/DAXPY_ATOMIC-StdPar.cpp b/src/basic/DAXPY_ATOMIC-StdPar.cpp new file mode 100644 index 000000000..c143e571d --- /dev/null +++ b/src/basic/DAXPY_ATOMIC-StdPar.cpp @@ -0,0 +1,93 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DAXPY_ATOMIC.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + DAXPY_ATOMIC_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = ibegin; i < iend; ++i ) { + DAXPY_ATOMIC_BODY; + } + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case Lambda_StdPar : { + + auto daxpy_atomic_lam = [=](Index_type i) { + DAXPY_ATOMIC_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = ibegin; i < iend; ++i ) { + daxpy_atomic_lam(i); + } + + } + stopTimer(); + + break; + } + + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + DAXPY_ATOMIC_RAJA_BODY(RAJA::seq_atomic); + }); + + } + stopTimer(); + + break; + } +#endif + + default : { + getCout() << "\n DAXPY_ATOMIC : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace basic +} // end namespace rajaperf From cbe5356e1d23defc99ef9d944872cbcc824e9ef3 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 12:15:22 +0300 Subject: [PATCH 059/174] add SCAN and NODAL --- src/algorithm/CMakeLists.txt | 1 + src/algorithm/SCAN-StdPar.cpp | 90 +++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 src/algorithm/SCAN-StdPar.cpp diff --git a/src/algorithm/CMakeLists.txt b/src/algorithm/CMakeLists.txt index 03d6069ba..fc9dfa4ef 100644 --- a/src/algorithm/CMakeLists.txt +++ b/src/algorithm/CMakeLists.txt @@ -10,6 +10,7 @@ blt_add_library( NAME algorithm SOURCES SCAN.cpp SCAN-Seq.cpp + SCAN-StdPar.cpp SCAN-Hip.cpp SCAN-Cuda.cpp SCAN-OMP.cpp diff --git a/src/algorithm/SCAN-StdPar.cpp b/src/algorithm/SCAN-StdPar.cpp new file mode 100644 index 000000000..d6adeaabf --- /dev/null +++ b/src/algorithm/SCAN-StdPar.cpp @@ -0,0 +1,90 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "SCAN.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void SCAN::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + SCAN_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + SCAN_PROLOGUE; + for (Index_type i = ibegin; i < iend; ++i ) { + SCAN_BODY; + } + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + SCAN_PROLOGUE; + auto scan_lam = [=, &scan_var](Index_type i) { + SCAN_BODY; + }; + for (Index_type i = ibegin; i < iend; ++i ) { + scan_lam(i); + } + + } + stopTimer(); + + break; + } + + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::exclusive_scan(RAJA_SCAN_ARGS); + + } + stopTimer(); + + break; + } +#endif + + default : { + getCout() << "\n SCAN : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace algorithm +} // end namespace rajaperf From 43b9f8f98c0732d5d342c64d29c11a3aa45a40f2 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 12:17:16 +0300 Subject: [PATCH 060/174] add more stuff --- src/apps/CMakeLists.txt | 1 + src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp | 104 ++++++++++++++++ src/apps/NODAL_ACCUMULATION_3D.cpp | 4 + src/basic/CMakeLists.txt | 1 + src/basic/REDUCE_STRUCT-StdPar.cpp | 139 ++++++++++++++++++++++ 5 files changed, 249 insertions(+) create mode 100644 src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp create mode 100644 src/basic/REDUCE_STRUCT-StdPar.cpp diff --git a/src/apps/CMakeLists.txt b/src/apps/CMakeLists.txt index dbe74ba13..04c149782 100644 --- a/src/apps/CMakeLists.txt +++ b/src/apps/CMakeLists.txt @@ -72,6 +72,7 @@ blt_add_library( MASS3DPA-OMPTarget.cpp NODAL_ACCUMULATION_3D.cpp NODAL_ACCUMULATION_3D-Seq.cpp + NODAL_ACCUMULATION_3D-StdPar.cpp NODAL_ACCUMULATION_3D-Hip.cpp NODAL_ACCUMULATION_3D-Cuda.cpp NODAL_ACCUMULATION_3D-OMP.cpp diff --git a/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp b/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp new file mode 100644 index 000000000..585e3a000 --- /dev/null +++ b/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp @@ -0,0 +1,104 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "NODAL_ACCUMULATION_3D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "AppsData.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void NODAL_ACCUMULATION_3D::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = m_domain->n_real_zones; + + NODAL_ACCUMULATION_3D_DATA_SETUP; + + NDPTRSET(m_domain->jp, m_domain->kp, x,x0,x1,x2,x3,x4,x5,x6,x7) ; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type ii = ibegin ; ii < iend ; ++ii ) { + NODAL_ACCUMULATION_3D_BODY_INDEX; + NODAL_ACCUMULATION_3D_BODY; + } + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case Lambda_StdPar : { + + auto nodal_accumulation_3d_lam = [=](Index_type ii) { + NODAL_ACCUMULATION_3D_BODY_INDEX; + NODAL_ACCUMULATION_3D_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type ii = ibegin ; ii < iend ; ++ii ) { + nodal_accumulation_3d_lam(ii); + } + + } + stopTimer(); + + break; + } + + case RAJA_StdPar : { + + camp::resources::Resource working_res{camp::resources::Host()}; + RAJA::TypedListSegment zones(m_domain->real_zones, + m_domain->n_real_zones, + working_res); + + auto nodal_accumulation_3d_lam = [=](Index_type i) { + NODAL_ACCUMULATION_3D_RAJA_ATOMIC_BODY(RAJA::seq_atomic); + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall(zones, nodal_accumulation_3d_lam); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + getCout() << "\n NODAL_ACCUMULATION_3D : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/NODAL_ACCUMULATION_3D.cpp b/src/apps/NODAL_ACCUMULATION_3D.cpp index 5fd512fb7..ef652b4a4 100644 --- a/src/apps/NODAL_ACCUMULATION_3D.cpp +++ b/src/apps/NODAL_ACCUMULATION_3D.cpp @@ -67,6 +67,10 @@ NODAL_ACCUMULATION_3D::NODAL_ACCUMULATION_3D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( RAJA_StdPar ); } NODAL_ACCUMULATION_3D::~NODAL_ACCUMULATION_3D() diff --git a/src/basic/CMakeLists.txt b/src/basic/CMakeLists.txt index ec2eddf98..bfc2000b9 100644 --- a/src/basic/CMakeLists.txt +++ b/src/basic/CMakeLists.txt @@ -105,6 +105,7 @@ blt_add_library( REDUCE3_INT-OMPTarget.cpp REDUCE_STRUCT.cpp REDUCE_STRUCT-Seq.cpp + REDUCE_STRUCT-StdPar.cpp REDUCE_STRUCT-Hip.cpp REDUCE_STRUCT-Cuda.cpp REDUCE_STRUCT-OMP.cpp diff --git a/src/basic/REDUCE_STRUCT-StdPar.cpp b/src/basic/REDUCE_STRUCT-StdPar.cpp new file mode 100644 index 000000000..b91513bdb --- /dev/null +++ b/src/basic/REDUCE_STRUCT-StdPar.cpp @@ -0,0 +1,139 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "REDUCE_STRUCT.hpp" + +#include "RAJA/RAJA.hpp" + +#include +#include + +namespace rajaperf +{ +namespace basic +{ + + +void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + REDUCE_STRUCT_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type xsum = m_init_sum; Real_type ysum = m_init_sum; + Real_type xmin = m_init_min; Real_type ymin = m_init_min; + Real_type xmax = m_init_max; Real_type ymax = m_init_max; + + for (Index_type i = ibegin; i < iend; ++i ) { + REDUCE_STRUCT_BODY; + } + + points.SetCenter(xsum/(points.N), ysum/(points.N)); + points.SetXMin(xmin); + points.SetXMax(xmax); + points.SetYMin(ymin); + points.SetYMax(ymax); + m_points=points; + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case Lambda_StdPar : { + + auto reduce_struct_x_base_lam = [=](Index_type i) -> Real_type { + return points.x[i]; + }; + + auto reduce_struct_y_base_lam = [=](Index_type i) -> Real_type { + return points.y[i]; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type xsum = m_init_sum; Real_type ysum = m_init_sum; + Real_type xmin = m_init_min; Real_type ymin = m_init_min; + Real_type xmax = m_init_max; Real_type ymax = m_init_max; + + for (Index_type i = ibegin; i < iend; ++i ) { + xsum += reduce_struct_x_base_lam(i); + xmin = RAJA_MIN(xmin, reduce_struct_x_base_lam(i)); + xmax = RAJA_MAX(xmax, reduce_struct_x_base_lam(i)); + ysum += reduce_struct_y_base_lam(i); + ymin = RAJA_MIN(ymin, reduce_struct_y_base_lam(i)); + ymax = RAJA_MAX(ymax, reduce_struct_y_base_lam(i)); + } + + points.SetCenter(xsum/(points.N), ysum/(points.N)); + points.SetXMin(xmin); + points.SetXMax(xmax); + points.SetYMin(ymin); + points.SetYMax(ymax); + m_points=points; + + } + stopTimer(); + + break; + } + + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::ReduceSum xsum(m_init_sum); + RAJA::ReduceSum ysum(m_init_sum); + RAJA::ReduceMin xmin(m_init_min); + RAJA::ReduceMin ymin(m_init_min); + RAJA::ReduceMax xmax(m_init_max); + RAJA::ReduceMax ymax(m_init_max); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { + REDUCE_STRUCT_BODY_RAJA; + }); + + points.SetCenter(xsum.get()/(points.N), + ysum.get()/(points.N)); + points.SetXMin(xmin.get()); + points.SetXMax(xmax.get()); + points.SetYMin(ymin.get()); + points.SetYMax(ymax.get()); + m_points=points; + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_STDPAR + + default : { + getCout() << "\n REDUCE_STRUCT : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace basic +} // end namespace rajaperf From d7c7a68abc4a916739bcaa9c371dc88eb4a8b220 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 12:19:21 +0300 Subject: [PATCH 061/174] index list --- src/basic/CMakeLists.txt | 2 + src/basic/INDEXLIST-StdPar.cpp | 84 ++++++++++++++ src/basic/INDEXLIST_3LOOP-StdPar.cpp | 160 +++++++++++++++++++++++++++ 3 files changed, 246 insertions(+) create mode 100644 src/basic/INDEXLIST-StdPar.cpp create mode 100644 src/basic/INDEXLIST_3LOOP-StdPar.cpp diff --git a/src/basic/CMakeLists.txt b/src/basic/CMakeLists.txt index bfc2000b9..24af1abae 100644 --- a/src/basic/CMakeLists.txt +++ b/src/basic/CMakeLists.txt @@ -31,12 +31,14 @@ blt_add_library( IF_QUAD-OMPTarget.cpp INDEXLIST.cpp INDEXLIST-Seq.cpp + INDEXLIST-StdPar.cpp INDEXLIST-Hip.cpp INDEXLIST-Cuda.cpp INDEXLIST-OMP.cpp INDEXLIST-OMPTarget.cpp INDEXLIST_3LOOP.cpp INDEXLIST_3LOOP-Seq.cpp + INDEXLIST_3LOOP-StdPar.cpp INDEXLIST_3LOOP-Hip.cpp INDEXLIST_3LOOP-Cuda.cpp INDEXLIST_3LOOP-OMP.cpp diff --git a/src/basic/INDEXLIST-StdPar.cpp b/src/basic/INDEXLIST-StdPar.cpp new file mode 100644 index 000000000..8b4565b9c --- /dev/null +++ b/src/basic/INDEXLIST-StdPar.cpp @@ -0,0 +1,84 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INDEXLIST.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + INDEXLIST_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Index_type count = 0; + + for (Index_type i = ibegin; i < iend; ++i ) { + INDEXLIST_BODY; + } + + m_len = count; + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case Lambda_StdPar : { + + auto indexlist_base_lam = [=](Index_type i, Index_type& count) { + INDEXLIST_BODY + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Index_type count = 0; + + for (Index_type i = ibegin; i < iend; ++i ) { + indexlist_base_lam(i, count); + } + + m_len = count; + + } + stopTimer(); + + break; + } +#endif + + default : { + getCout() << "\n INDEXLIST : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/INDEXLIST_3LOOP-StdPar.cpp b/src/basic/INDEXLIST_3LOOP-StdPar.cpp new file mode 100644 index 000000000..315269450 --- /dev/null +++ b/src/basic/INDEXLIST_3LOOP-StdPar.cpp @@ -0,0 +1,160 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INDEXLIST_3LOOP.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + +#define INDEXLIST_3LOOP_DATA_SETUP_StdPar \ + Index_type* counts = new Index_type[iend+1]; + +#define INDEXLIST_3LOOP_DATA_TEARDOWN_StdPar \ + delete[] counts; counts = nullptr; + + + +void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + INDEXLIST_3LOOP_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + INDEXLIST_3LOOP_DATA_SETUP_StdPar; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = ibegin; i < iend; ++i ) { + counts[i] = (INDEXLIST_3LOOP_CONDITIONAL) ? 1 : 0; + } + + Index_type count = 0; + + for (Index_type i = ibegin; i < iend+1; ++i ) { + Index_type inc = counts[i]; + counts[i] = count; + count += inc; + } + + for (Index_type i = ibegin; i < iend; ++i ) { + INDEXLIST_3LOOP_MAKE_LIST; + } + + m_len = counts[iend]; + + } + stopTimer(); + + INDEXLIST_3LOOP_DATA_TEARDOWN_StdPar; + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case Lambda_StdPar : { + + INDEXLIST_3LOOP_DATA_SETUP_StdPar; + + auto indexlist_conditional_lam = [=](Index_type i) { + counts[i] = (INDEXLIST_3LOOP_CONDITIONAL) ? 1 : 0; + }; + + auto indexlist_make_list_lam = [=](Index_type i) { + INDEXLIST_3LOOP_MAKE_LIST; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = ibegin; i < iend; ++i ) { + indexlist_conditional_lam(i); + } + + Index_type count = 0; + + for (Index_type i = ibegin; i < iend+1; ++i ) { + Index_type inc = counts[i]; + counts[i] = count; + count += inc; + } + + for (Index_type i = ibegin; i < iend; ++i ) { + indexlist_make_list_lam(i); + } + + m_len = counts[iend]; + + } + stopTimer(); + + INDEXLIST_3LOOP_DATA_TEARDOWN_StdPar; + + break; + } + + case RAJA_StdPar : { + + INDEXLIST_3LOOP_DATA_SETUP_StdPar; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::ReduceSum len(0); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + counts[i] = (INDEXLIST_3LOOP_CONDITIONAL) ? 1 : 0; + }); + + RAJA::exclusive_scan_inplace( + RAJA::make_span(counts+ibegin, iend+1-ibegin)); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + if (counts[i] != counts[i+1]) { + list[counts[i]] = i; + len += 1; + } + }); + + m_len = len.get(); + + } + stopTimer(); + + INDEXLIST_3LOOP_DATA_TEARDOWN_StdPar; + + break; + } +#endif + + default : { + getCout() << "\n INDEXLIST_3LOOP : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace basic +} // end namespace rajaperf From 854871b17d4ce8de6de3e8af0f8f90139ee4d8c4 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 12:20:26 +0300 Subject: [PATCH 062/174] reduce sum --- src/algorithm/CMakeLists.txt | 1 + src/algorithm/REDUCE_SUM-StdPar.cpp | 104 ++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 src/algorithm/REDUCE_SUM-StdPar.cpp diff --git a/src/algorithm/CMakeLists.txt b/src/algorithm/CMakeLists.txt index fc9dfa4ef..2baa2b414 100644 --- a/src/algorithm/CMakeLists.txt +++ b/src/algorithm/CMakeLists.txt @@ -28,6 +28,7 @@ blt_add_library( SORTPAIRS-OMP.cpp REDUCE_SUM.cpp REDUCE_SUM-Seq.cpp + REDUCE_SUM-StdPar.cpp REDUCE_SUM-Hip.cpp REDUCE_SUM-Cuda.cpp REDUCE_SUM-OMP.cpp diff --git a/src/algorithm/REDUCE_SUM-StdPar.cpp b/src/algorithm/REDUCE_SUM-StdPar.cpp new file mode 100644 index 000000000..261be955a --- /dev/null +++ b/src/algorithm/REDUCE_SUM-StdPar.cpp @@ -0,0 +1,104 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "REDUCE_SUM.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + REDUCE_SUM_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type sum = m_sum_init; + + for (Index_type i = ibegin; i < iend; ++i ) { + REDUCE_SUM_BODY; + } + + m_sum = sum; + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_STDPAR) + case Lambda_StdPar : { + + auto reduce_sum_base_lam = [=](Index_type i) { + return x[i]; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type sum = m_sum_init; + + for (Index_type i = ibegin; i < iend; ++i ) { + sum += reduce_sum_base_lam(i); + } + + m_sum = sum; + + } + stopTimer(); + + break; + } + + case RAJA_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::ReduceSum sum(m_sum_init); + + RAJA::forall( RAJA::RangeSegment(ibegin, iend), + [=](Index_type i) { + REDUCE_SUM_BODY; + }); + + m_sum = sum.get(); + + } + stopTimer(); + + break; + } +#endif + + default : { + getCout() << "\n REDUCE_SUM : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace algorithm +} // end namespace rajaperf From ab625625ca68047945f23df1643323c722d7c6bf Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 12:51:27 +0300 Subject: [PATCH 063/174] cout fix --- src/basic-kokkos/DAXPY-Kokkos.cpp | 2 +- src/basic-kokkos/IF_QUAD-Kokkos.cpp | 2 +- src/basic-kokkos/INIT3-Kokkos.cpp | 2 +- src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp | 2 +- src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp | 2 +- src/basic-kokkos/MULADDSUB-Kokkos.cpp | 2 +- src/basic-kokkos/NESTED_INIT-Kokkos.cpp | 2 +- src/basic-kokkos/PI_ATOMIC-Kokkos.cpp | 2 +- src/basic-kokkos/REDUCE3_INT-Kokkos.cpp | 2 +- src/basic-kokkos/TRAP_INT-Kokkos.cpp | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/basic-kokkos/DAXPY-Kokkos.cpp b/src/basic-kokkos/DAXPY-Kokkos.cpp index 6c3ad5e6e..e6ca5d0ad 100644 --- a/src/basic-kokkos/DAXPY-Kokkos.cpp +++ b/src/basic-kokkos/DAXPY-Kokkos.cpp @@ -53,7 +53,7 @@ void DAXPY::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx) break; } default: { - std::cout << "\n DAXPY : Unknown variant id = " << vid << std::endl; + getCout() << "\n DAXPY : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic-kokkos/IF_QUAD-Kokkos.cpp b/src/basic-kokkos/IF_QUAD-Kokkos.cpp index e1b8cc601..67ed8e615 100644 --- a/src/basic-kokkos/IF_QUAD-Kokkos.cpp +++ b/src/basic-kokkos/IF_QUAD-Kokkos.cpp @@ -61,7 +61,7 @@ void IF_QUAD::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_id } default: { - std::cout << "\n IF_QUAD : Unknown variant id = " << vid << std::endl; + getCout() << "\n IF_QUAD : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic-kokkos/INIT3-Kokkos.cpp b/src/basic-kokkos/INIT3-Kokkos.cpp index eef8ffbaa..97c7a04d4 100644 --- a/src/basic-kokkos/INIT3-Kokkos.cpp +++ b/src/basic-kokkos/INIT3-Kokkos.cpp @@ -57,7 +57,7 @@ void INIT3::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx) } default: { - std::cout << "\n INIT3 : Unknown variant id = " << vid << std::endl; + getCout() << "\n INIT3 : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp index 8d59409d1..05da1be92 100644 --- a/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp +++ b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp @@ -46,7 +46,7 @@ void INIT_VIEW1D::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tun } default: { - std::cout << "\n INIT_VIEW1D : Unknown variant id = " << vid << std::endl; + getCout() << "\n INIT_VIEW1D : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp index ae03fe752..756d517cd 100644 --- a/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp +++ b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp @@ -46,7 +46,7 @@ void INIT_VIEW1D_OFFSET::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ } default: { - std::cout << "\n INIT_VIEW1D_OFFSET : Unknown variant id = " << vid + getCout() << "\n INIT_VIEW1D_OFFSET : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic-kokkos/MULADDSUB-Kokkos.cpp b/src/basic-kokkos/MULADDSUB-Kokkos.cpp index e81cd17d5..1aa8f88bb 100644 --- a/src/basic-kokkos/MULADDSUB-Kokkos.cpp +++ b/src/basic-kokkos/MULADDSUB-Kokkos.cpp @@ -56,7 +56,7 @@ void MULADDSUB::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ } default: { - std::cout << "\n MULADDSUB : Unknown variant id = " << vid << std::endl; + getCout() << "\n MULADDSUB : Unknown variant id = " << vid << std::endl; } } moveDataToHostFromKokkosView(out1, out1_view, iend); diff --git a/src/basic-kokkos/NESTED_INIT-Kokkos.cpp b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp index f69020e57..c482d9f90 100644 --- a/src/basic-kokkos/NESTED_INIT-Kokkos.cpp +++ b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp @@ -68,7 +68,7 @@ void NESTED_INIT::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tun } default: { - std::cout << "\n NESTED_INIT : Unknown variant id = " << vid << std::endl; + getCout() << "\n NESTED_INIT : Unknown variant id = " << vid << std::endl; } } } diff --git a/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp index 066aca7aa..326578e66 100644 --- a/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp +++ b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp @@ -59,7 +59,7 @@ void PI_ATOMIC::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ } default: { - std::cout << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; + getCout() << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; } } } diff --git a/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp index b1566d619..47ca2b222 100644 --- a/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp +++ b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp @@ -66,7 +66,7 @@ void REDUCE3_INT::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tun } default: { - std::cout << "\n REDUCE3_INT : Unknown variant id = " << vid << std::endl; + getCout() << "\n REDUCE3_INT : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic-kokkos/TRAP_INT-Kokkos.cpp b/src/basic-kokkos/TRAP_INT-Kokkos.cpp index 47c4596e6..dee9433e8 100644 --- a/src/basic-kokkos/TRAP_INT-Kokkos.cpp +++ b/src/basic-kokkos/TRAP_INT-Kokkos.cpp @@ -59,7 +59,7 @@ void TRAP_INT::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_i } default: { - std::cout << "\n TRAP_INT : Unknown variant id = " << vid << std::endl; + getCout() << "\n TRAP_INT : Unknown variant id = " << vid << std::endl; } } } From 1f10cdc2c6badaa2ac5e4f6f323cfba4c3cb5eb1 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 12:51:34 +0300 Subject: [PATCH 064/174] cout fix --- src/algorithm/SORT-StdPar.cpp | 2 +- src/algorithm/SORTPAIRS-StdPar.cpp | 2 +- src/apps/DEL_DOT_VEC_2D-StdPar.cpp | 2 +- src/apps/DIFFUSION3DPA-StdPar.cpp | 2 +- src/apps/ENERGY-StdPar.cpp | 2 +- src/apps/FIR-StdPar.cpp | 2 +- src/apps/HALOEXCHANGE-StdPar.cpp | 2 +- src/apps/HALOEXCHANGE_FUSED-StdPar.cpp | 2 +- src/apps/LTIMES-StdPar.cpp | 2 +- src/apps/LTIMES_NOVIEW-StdPar.cpp | 2 +- src/apps/MASS3DPA-StdPar.cpp | 2 +- src/apps/PRESSURE-StdPar.cpp | 2 +- src/apps/VOL3D-StdPar.cpp | 2 +- src/basic/DAXPY-StdPar.cpp | 18 +++++----- src/basic/DAXPY_ATOMIC-StdPar.cpp | 24 +++++++++---- src/basic/IF_QUAD-StdPar.cpp | 2 +- src/basic/INDEXLIST-StdPar.cpp | 8 +++-- src/basic/INDEXLIST_3LOOP-StdPar.cpp | 15 +++++++- src/basic/INIT3-StdPar.cpp | 21 +++++------ src/basic/INIT_VIEW1D-StdPar.cpp | 2 +- src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp | 2 +- src/basic/MAT_MAT_SHARED-StdPar.cpp | 35 ++++++++++--------- src/basic/MULADDSUB-StdPar.cpp | 2 +- src/basic/NESTED_INIT-StdPar.cpp | 4 +-- src/basic/PI_ATOMIC-StdPar.cpp | 2 +- src/basic/PI_REDUCE-StdPar.cpp | 5 +-- src/basic/REDUCE3_INT-StdPar.cpp | 9 ++--- src/basic/REDUCE_STRUCT-StdPar.cpp | 19 ++++++++-- src/basic/TRAP_INT-StdPar.cpp | 2 +- src/common/StdParUtils.hpp | 1 + src/lcals/DIFF_PREDICT-StdPar.cpp | 2 +- src/lcals/EOS-StdPar.cpp | 2 +- src/lcals/FIRST_DIFF-StdPar.cpp | 2 +- src/lcals/FIRST_MIN-StdPar.cpp | 2 +- src/lcals/FIRST_SUM-StdPar.cpp | 2 +- src/lcals/GEN_LIN_RECUR-StdPar.cpp | 2 +- src/lcals/HYDRO_1D-StdPar.cpp | 2 +- src/lcals/HYDRO_2D-StdPar.cpp | 2 +- src/lcals/INT_PREDICT-StdPar.cpp | 2 +- src/lcals/PLANCKIAN-StdPar.cpp | 2 +- src/lcals/TRIDIAG_ELIM-StdPar.cpp | 2 +- src/polybench/POLYBENCH_2MM-StdPar.cpp | 2 +- src/polybench/POLYBENCH_3MM-StdPar.cpp | 2 +- src/polybench/POLYBENCH_ADI-StdPar.cpp | 2 +- src/polybench/POLYBENCH_ATAX-StdPar.cpp | 2 +- src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp | 2 +- .../POLYBENCH_FLOYD_WARSHALL-StdPar.cpp | 2 +- src/polybench/POLYBENCH_GEMM-StdPar.cpp | 2 +- src/polybench/POLYBENCH_GEMVER-StdPar.cpp | 2 +- src/polybench/POLYBENCH_GESUMMV-StdPar.cpp | 2 +- src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp | 2 +- src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp | 2 +- src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp | 2 +- src/polybench/POLYBENCH_MVT-StdPar.cpp | 2 +- src/stream/ADD-StdPar.cpp | 2 +- src/stream/COPY-StdPar.cpp | 2 +- src/stream/DOT-StdPar.cpp | 2 +- src/stream/MUL-StdPar.cpp | 2 +- src/stream/TRIAD-StdPar.cpp | 8 ++--- 59 files changed, 147 insertions(+), 114 deletions(-) diff --git a/src/algorithm/SORT-StdPar.cpp b/src/algorithm/SORT-StdPar.cpp index ba808313e..ddf3be505 100644 --- a/src/algorithm/SORT-StdPar.cpp +++ b/src/algorithm/SORT-StdPar.cpp @@ -63,7 +63,7 @@ void SORT::runStdParVariant(VariantID vid, size_t tune_idx) #endif default : { - std::cout << "\n SORT : Unknown variant id = " << vid << std::endl; + getCout() << "\n SORT : Unknown variant id = " << vid << std::endl; } } diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp index f82b260e5..d97ade603 100644 --- a/src/algorithm/SORTPAIRS-StdPar.cpp +++ b/src/algorithm/SORTPAIRS-StdPar.cpp @@ -89,7 +89,7 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) #endif default : { - std::cout << "\n SORTPAIRS : Unknown variant id = " << vid << std::endl; + getCout() << "\n SORTPAIRS : Unknown variant id = " << vid << std::endl; } } diff --git a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp index 93fde5151..7679df246 100644 --- a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp +++ b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp @@ -111,7 +111,7 @@ void DEL_DOT_VEC_2D::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n DEL_DOT_VEC_2D : Unknown variant id = " << vid << std::endl; + getCout() << "\n DEL_DOT_VEC_2D : Unknown variant id = " << vid << std::endl; } } diff --git a/src/apps/DIFFUSION3DPA-StdPar.cpp b/src/apps/DIFFUSION3DPA-StdPar.cpp index 38ee4da02..97d0ad239 100644 --- a/src/apps/DIFFUSION3DPA-StdPar.cpp +++ b/src/apps/DIFFUSION3DPA-StdPar.cpp @@ -321,7 +321,7 @@ void DIFFUSION3DPA::runStdParVariant(VariantID vid, size_t tune_idx) { #endif // RUN_RAJA_STDPAR default: - std::cout << "\n DIFFUSION3DPA : Unknown StdPar variant id = " << vid + getCout() << "\n DIFFUSION3DPA : Unknown StdPar variant id = " << vid << std::endl; } diff --git a/src/apps/ENERGY-StdPar.cpp b/src/apps/ENERGY-StdPar.cpp index fceadd05e..d02b68d4a 100644 --- a/src/apps/ENERGY-StdPar.cpp +++ b/src/apps/ENERGY-StdPar.cpp @@ -186,7 +186,7 @@ void ENERGY::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n ENERGY : Unknown variant id = " << vid << std::endl; + getCout() << "\n ENERGY : Unknown variant id = " << vid << std::endl; } } diff --git a/src/apps/FIR-StdPar.cpp b/src/apps/FIR-StdPar.cpp index 782a36321..07040148f 100644 --- a/src/apps/FIR-StdPar.cpp +++ b/src/apps/FIR-StdPar.cpp @@ -97,7 +97,7 @@ void FIR::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n FIR : Unknown variant id = " << vid << std::endl; + getCout() << "\n FIR : Unknown variant id = " << vid << std::endl; } } diff --git a/src/apps/HALOEXCHANGE-StdPar.cpp b/src/apps/HALOEXCHANGE-StdPar.cpp index 11e551fda..e06612b6b 100644 --- a/src/apps/HALOEXCHANGE-StdPar.cpp +++ b/src/apps/HALOEXCHANGE-StdPar.cpp @@ -171,7 +171,7 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n HALOEXCHANGE : Unknown variant id = " << vid << std::endl; + getCout() << "\n HALOEXCHANGE : Unknown variant id = " << vid << std::endl; } } diff --git a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp index 86967eac6..b320e7fe3 100644 --- a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp +++ b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp @@ -254,7 +254,7 @@ void HALOEXCHANGE_FUSED::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n HALOEXCHANGE_FUSED : Unknown variant id = " << vid << std::endl; + getCout() << "\n HALOEXCHANGE_FUSED : Unknown variant id = " << vid << std::endl; } } diff --git a/src/apps/LTIMES-StdPar.cpp b/src/apps/LTIMES-StdPar.cpp index 59422d859..60e8aec63 100644 --- a/src/apps/LTIMES-StdPar.cpp +++ b/src/apps/LTIMES-StdPar.cpp @@ -126,7 +126,7 @@ void LTIMES::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n LTIMES : Unknown variant id = " << vid << std::endl; + getCout() << "\n LTIMES : Unknown variant id = " << vid << std::endl; } } diff --git a/src/apps/LTIMES_NOVIEW-StdPar.cpp b/src/apps/LTIMES_NOVIEW-StdPar.cpp index 4039f4ffc..7ada8a148 100644 --- a/src/apps/LTIMES_NOVIEW-StdPar.cpp +++ b/src/apps/LTIMES_NOVIEW-StdPar.cpp @@ -120,7 +120,7 @@ void LTIMES_NOVIEW::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n LTIMES_NOVIEW : Unknown variant id = " << vid << std::endl; + getCout() << "\n LTIMES_NOVIEW : Unknown variant id = " << vid << std::endl; } } diff --git a/src/apps/MASS3DPA-StdPar.cpp b/src/apps/MASS3DPA-StdPar.cpp index 8c8a6a328..58b08b9d1 100644 --- a/src/apps/MASS3DPA-StdPar.cpp +++ b/src/apps/MASS3DPA-StdPar.cpp @@ -222,7 +222,7 @@ void MASS3DPA::runStdParVariant(VariantID vid, size_t tune_idx) { #endif // RUN_RAJA_STDPAR default: - std::cout << "\n MASS3DPA : Unknown StdPar variant id = " << vid << std::endl; + getCout() << "\n MASS3DPA : Unknown StdPar variant id = " << vid << std::endl; } #endif } diff --git a/src/apps/PRESSURE-StdPar.cpp b/src/apps/PRESSURE-StdPar.cpp index cc1eb2c0a..787def3e6 100644 --- a/src/apps/PRESSURE-StdPar.cpp +++ b/src/apps/PRESSURE-StdPar.cpp @@ -114,7 +114,7 @@ void PRESSURE::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n PRESSURE : Unknown variant id = " << vid << std::endl; + getCout() << "\n PRESSURE : Unknown variant id = " << vid << std::endl; } } diff --git a/src/apps/VOL3D-StdPar.cpp b/src/apps/VOL3D-StdPar.cpp index 1997e95cf..fb369f192 100644 --- a/src/apps/VOL3D-StdPar.cpp +++ b/src/apps/VOL3D-StdPar.cpp @@ -98,7 +98,7 @@ void VOL3D::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n VOL3D : Unknown variant id = " << vid << std::endl; + getCout() << "\n VOL3D : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/DAXPY-StdPar.cpp b/src/basic/DAXPY-StdPar.cpp index 61ed338ec..3615eeee8 100644 --- a/src/basic/DAXPY-StdPar.cpp +++ b/src/basic/DAXPY-StdPar.cpp @@ -1,7 +1,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. -// See the RAJAPerf/COPYRIGHT file for details. +// See the RAJAPerf/LICENSE file for details. // // SPDX-License-Identifier: (BSD-3-Clause) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// @@ -11,12 +11,10 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include -namespace rajaperf +namespace rajaperf { namespace basic { @@ -35,10 +33,6 @@ void DAXPY::runStdParVariant(VariantID vid, size_t tune_idx) DAXPY_DATA_SETUP; - auto daxpy_lam = [=](Index_type i) { - DAXPY_BODY; - }; - switch ( vid ) { case Base_StdPar : { @@ -60,6 +54,10 @@ void DAXPY::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { + auto daxpy_lam = [=](Index_type i) { + DAXPY_BODY; + }; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { @@ -91,7 +89,7 @@ void DAXPY::runStdParVariant(VariantID vid, size_t tune_idx) #endif default : { - std::cout << "\n DAXPY : Unknown variant id = " << vid << std::endl; + getCout() << "\n DAXPY : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/DAXPY_ATOMIC-StdPar.cpp b/src/basic/DAXPY_ATOMIC-StdPar.cpp index c143e571d..a0e887e7c 100644 --- a/src/basic/DAXPY_ATOMIC-StdPar.cpp +++ b/src/basic/DAXPY_ATOMIC-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#include "common/StdParUtils.hpp" + #include namespace rajaperf @@ -20,10 +22,15 @@ namespace basic void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { +#if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + DAXPY_ATOMIC_DATA_SETUP; switch ( vid ) { @@ -33,9 +40,11 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - for (Index_type i = ibegin; i < iend; ++i ) { + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { DAXPY_ATOMIC_BODY; - } + }); } stopTimer(); @@ -43,7 +52,6 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu break; } -#if defined(RUN_RAJA_STDPAR) case Lambda_StdPar : { auto daxpy_atomic_lam = [=](Index_type i) { @@ -53,9 +61,11 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - for (Index_type i = ibegin; i < iend; ++i ) { + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { daxpy_atomic_lam(i); - } + }); } stopTimer(); @@ -63,12 +73,13 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu break; } +#ifdef RAJA_ENABLE_STDPAR case RAJA_StdPar : { startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::forall( + RAJA::forall( RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { DAXPY_ATOMIC_RAJA_BODY(RAJA::seq_atomic); @@ -87,6 +98,7 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu } +#endif } } // end namespace basic diff --git a/src/basic/IF_QUAD-StdPar.cpp b/src/basic/IF_QUAD-StdPar.cpp index 3c86353ef..605a1258e 100644 --- a/src/basic/IF_QUAD-StdPar.cpp +++ b/src/basic/IF_QUAD-StdPar.cpp @@ -92,7 +92,7 @@ void IF_QUAD::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n IF_QUAD : Unknown variant id = " << vid << std::endl; + getCout() << "\n IF_QUAD : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/INDEXLIST-StdPar.cpp b/src/basic/INDEXLIST-StdPar.cpp index 8b4565b9c..f2b8cb828 100644 --- a/src/basic/INDEXLIST-StdPar.cpp +++ b/src/basic/INDEXLIST-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#include "common/StdParUtils.hpp" + #include namespace rajaperf @@ -20,6 +22,7 @@ namespace basic void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { +#if defined(RUN_STDPAR) const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); @@ -35,6 +38,7 @@ void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ Index_type count = 0; +#warning needs parallel inscan for (Index_type i = ibegin; i < iend; ++i ) { INDEXLIST_BODY; } @@ -47,7 +51,6 @@ void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ break; } -#if defined(RUN_RAJA_STDPAR) case Lambda_StdPar : { auto indexlist_base_lam = [=](Index_type i, Index_type& count) { @@ -59,6 +62,7 @@ void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ Index_type count = 0; +#warning needs parallel inscan for (Index_type i = ibegin; i < iend; ++i ) { indexlist_base_lam(i, count); } @@ -70,7 +74,6 @@ void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ break; } -#endif default : { getCout() << "\n INDEXLIST : Unknown variant id = " << vid << std::endl; @@ -78,6 +81,7 @@ void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ } +#endif } } // end namespace basic diff --git a/src/basic/INDEXLIST_3LOOP-StdPar.cpp b/src/basic/INDEXLIST_3LOOP-StdPar.cpp index 315269450..39657f1ce 100644 --- a/src/basic/INDEXLIST_3LOOP-StdPar.cpp +++ b/src/basic/INDEXLIST_3LOOP-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#include "common/StdParUtils.hpp" + #include namespace rajaperf @@ -27,10 +29,14 @@ namespace basic void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { +#if defined(RUN_STDPAR) const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + INDEXLIST_3LOOP_DATA_SETUP; switch ( vid ) { @@ -42,18 +48,21 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#warning needs parallel for for (Index_type i = ibegin; i < iend; ++i ) { counts[i] = (INDEXLIST_3LOOP_CONDITIONAL) ? 1 : 0; } Index_type count = 0; +#warning needs parallel scan for (Index_type i = ibegin; i < iend+1; ++i ) { Index_type inc = counts[i]; counts[i] = count; count += inc; } +#warning needs parallel for for (Index_type i = ibegin; i < iend; ++i ) { INDEXLIST_3LOOP_MAKE_LIST; } @@ -68,7 +77,6 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG break; } -#if defined(RUN_RAJA_STDPAR) case Lambda_StdPar : { INDEXLIST_3LOOP_DATA_SETUP_StdPar; @@ -84,18 +92,21 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#warning needs parallel for for (Index_type i = ibegin; i < iend; ++i ) { indexlist_conditional_lam(i); } Index_type count = 0; +#warning needs parallel scan for (Index_type i = ibegin; i < iend+1; ++i ) { Index_type inc = counts[i]; counts[i] = count; count += inc; } +#warning needs parallel for for (Index_type i = ibegin; i < iend; ++i ) { indexlist_make_list_lam(i); } @@ -110,6 +121,7 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG break; } +#if defined(RUN_RAJA_STDPAR) case RAJA_StdPar : { INDEXLIST_3LOOP_DATA_SETUP_StdPar; @@ -154,6 +166,7 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG } +#endif } } // end namespace basic diff --git a/src/basic/INIT3-StdPar.cpp b/src/basic/INIT3-StdPar.cpp index 7105fc9d3..d176c3b42 100644 --- a/src/basic/INIT3-StdPar.cpp +++ b/src/basic/INIT3-StdPar.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/COPYRIGHT file for details. // @@ -11,12 +11,10 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include -namespace rajaperf +namespace rajaperf { namespace basic { @@ -25,20 +23,15 @@ namespace basic void INIT3::runStdParVariant(VariantID vid, size_t tune_idx) { #if defined(RUN_STDPAR) - const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - + auto begin = counting_iterator(ibegin); auto end = counting_iterator(iend); INIT3_DATA_SETUP; - auto init3_lam = [=](Index_type i) { - INIT3_BODY; - }; - switch ( vid ) { case Base_StdPar : { @@ -60,6 +53,10 @@ void INIT3::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { + auto init3_lam = [=](Index_type i) { + INIT3_BODY; + }; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { @@ -89,10 +86,10 @@ void INIT3::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#endif // RUN_RAJA_STDPAR +#endif default : { - std::cout << "\n INIT3 : Unknown variant id = " << vid << std::endl; + getCout() << "\n INIT3 : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/INIT_VIEW1D-StdPar.cpp b/src/basic/INIT_VIEW1D-StdPar.cpp index c79d29b97..1ffbde1a0 100644 --- a/src/basic/INIT_VIEW1D-StdPar.cpp +++ b/src/basic/INIT_VIEW1D-StdPar.cpp @@ -98,7 +98,7 @@ void INIT_VIEW1D::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n INIT_VIEW1D : Unknown variant id = " << vid << std::endl; + getCout() << "\n INIT_VIEW1D : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp index 4014ccacd..ca7bf0130 100644 --- a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp @@ -98,7 +98,7 @@ void INIT_VIEW1D_OFFSET::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n INIT_VIEW1D_OFFSET : Unknown variant id = " << vid << std::endl; + getCout() << "\n INIT_VIEW1D_OFFSET : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/MAT_MAT_SHARED-StdPar.cpp b/src/basic/MAT_MAT_SHARED-StdPar.cpp index 02cb8622d..6aa32ea3b 100644 --- a/src/basic/MAT_MAT_SHARED-StdPar.cpp +++ b/src/basic/MAT_MAT_SHARED-StdPar.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/LICENSE file for details. // @@ -8,13 +8,16 @@ #include "MAT_MAT_SHARED.hpp" +#include "common/StdParUtils.hpp" + #include namespace rajaperf { namespace basic { -void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) { - +void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) const Index_type run_reps = getRunReps(); const Index_type N = m_N; @@ -29,12 +32,11 @@ void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) { startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#warning need parallel for for (Index_type by = 0; by < Ny; ++by) { for (Index_type bx = 0; bx < Nx; ++bx) { - //Work around for when compiling with CLANG and HIP - //See notes in MAT_MAT_SHARED.hpp - MAT_MAT_SHARED_BODY_0_CLANG_HIP_CPU(TL_SZ) + MAT_MAT_SHARED_BODY_0(TL_SZ) for (Index_type ty = 0; ty < TL_SZ; ++ty) { for (Index_type tx = 0; tx < TL_SZ; ++tx) { @@ -56,7 +58,7 @@ void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) { } } - } // Sequential loop + } for (Index_type ty = 0; ty < TL_SZ; ++ty) { for (Index_type tx = 0; tx < TL_SZ; ++tx) { @@ -65,24 +67,21 @@ void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) { } } } - - } // number of iterations + } stopTimer(); break; } -#if defined(RUN_RAJA_STDPAR) case Lambda_StdPar: { startTimer(); - for (Index_type irep = 0; irep < run_reps; ++irep) { + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { auto outer_y = [&](Index_type by) { auto outer_x = [&](Index_type bx) { - - MAT_MAT_SHARED_BODY_0_CLANG_HIP_CPU(TL_SZ) + MAT_MAT_SHARED_BODY_0(TL_SZ) auto inner_y_1 = [&](Index_type ty) { auto inner_x_1 = [&](Index_type tx) { MAT_MAT_SHARED_BODY_1(TL_SZ) }; @@ -143,16 +142,17 @@ void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) { } }; +#warning need parallel for for (Index_type by = 0; by < Ny; ++by) { outer_y(by); } - - } // irep + } stopTimer(); break; } +#ifdef RAJA_ENABLE_STDPAR case RAJA_Sq: { using launch_policy = RAJA::expt::LaunchPolicy; @@ -241,13 +241,14 @@ void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) { break; } -#endif // RUN_RAJA_STDPAR +#endif default: { - std::cout << "\n MAT_MAT_SHARED : Unknown variant id = " << vid + getCout() << "\n MAT_MAT_SHARED : Unknown variant id = " << vid << std::endl; } } +#endif } } // end namespace basic diff --git a/src/basic/MULADDSUB-StdPar.cpp b/src/basic/MULADDSUB-StdPar.cpp index e86287d75..f89c3b179 100644 --- a/src/basic/MULADDSUB-StdPar.cpp +++ b/src/basic/MULADDSUB-StdPar.cpp @@ -91,7 +91,7 @@ void MULADDSUB::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n MULADDSUB : Unknown variant id = " << vid << std::endl; + getCout() << "\n MULADDSUB : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/NESTED_INIT-StdPar.cpp b/src/basic/NESTED_INIT-StdPar.cpp index a37a88dda..be46d2e06 100644 --- a/src/basic/NESTED_INIT-StdPar.cpp +++ b/src/basic/NESTED_INIT-StdPar.cpp @@ -66,7 +66,7 @@ void NESTED_INIT::runStdParVariant(VariantID vid, size_t tune_idx) #endif { NESTED_INIT_BODY; - //std::cout << i << "," << j << "," << k << ";" << idx << " PAR\n"; + //getCout() << i << "," << j << "," << k << ";" << idx << " PAR\n"; } }); @@ -138,7 +138,7 @@ void NESTED_INIT::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n NESTED_INIT : Unknown variant id = " << vid << std::endl; + getCout() << "\n NESTED_INIT : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/PI_ATOMIC-StdPar.cpp b/src/basic/PI_ATOMIC-StdPar.cpp index 27b7557bf..6b2f80fdc 100644 --- a/src/basic/PI_ATOMIC-StdPar.cpp +++ b/src/basic/PI_ATOMIC-StdPar.cpp @@ -109,7 +109,7 @@ void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; + getCout() << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/PI_REDUCE-StdPar.cpp b/src/basic/PI_REDUCE-StdPar.cpp index b2c075278..e1f37eea4 100644 --- a/src/basic/PI_REDUCE-StdPar.cpp +++ b/src/basic/PI_REDUCE-StdPar.cpp @@ -11,9 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include -#include #include @@ -106,7 +103,7 @@ void PI_REDUCE::runStdParVariant(VariantID vid, size_t tune_idx) #endif default : { - std::cout << "\n PI_REDUCE : Unknown variant id = " << vid << std::endl; + getCout() << "\n PI_REDUCE : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/REDUCE3_INT-StdPar.cpp b/src/basic/REDUCE3_INT-StdPar.cpp index b40129c17..f7b5f5dd5 100644 --- a/src/basic/REDUCE3_INT-StdPar.cpp +++ b/src/basic/REDUCE3_INT-StdPar.cpp @@ -1,7 +1,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. -// See the RAJAPerf/COPYRIGHT file for details. +// See the RAJAPerf/LICENSE file for details. // // SPDX-License-Identifier: (BSD-3-Clause) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// @@ -12,9 +12,6 @@ #include #include "common/StdParUtils.hpp" -#include -#include -#include #include @@ -129,7 +126,7 @@ void REDUCE3_INT::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n REDUCE3_INT : Unknown variant id = " << vid << std::endl; + getCout() << "\n REDUCE3_INT : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic/REDUCE_STRUCT-StdPar.cpp b/src/basic/REDUCE_STRUCT-StdPar.cpp index b91513bdb..1264f8257 100644 --- a/src/basic/REDUCE_STRUCT-StdPar.cpp +++ b/src/basic/REDUCE_STRUCT-StdPar.cpp @@ -11,6 +11,8 @@ #include "RAJA/RAJA.hpp" #include +#include "common/StdParUtils.hpp" + #include namespace rajaperf @@ -21,10 +23,15 @@ namespace basic void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { +#if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + REDUCE_STRUCT_DATA_SETUP; switch ( vid ) { @@ -38,8 +45,14 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t Real_type xmin = m_init_min; Real_type ymin = m_init_min; Real_type xmax = m_init_max; Real_type ymax = m_init_max; +#warning needs parallel for (Index_type i = ibegin; i < iend; ++i ) { - REDUCE_STRUCT_BODY; + xsum += points.x[i] ; \ + xmin = RAJA_MIN(xmin, points.x[i]) ; \ + xmax = RAJA_MAX(xmax, points.x[i]) ; \ + ysum += points.y[i] ; \ + ymin = RAJA_MIN(ymin, points.y[i]) ; \ + ymax = RAJA_MAX(ymax, points.y[i]) ; } points.SetCenter(xsum/(points.N), ysum/(points.N)); @@ -55,7 +68,6 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t break; } -#if defined(RUN_RAJA_STDPAR) case Lambda_StdPar : { auto reduce_struct_x_base_lam = [=](Index_type i) -> Real_type { @@ -73,6 +85,7 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t Real_type xmin = m_init_min; Real_type ymin = m_init_min; Real_type xmax = m_init_max; Real_type ymax = m_init_max; +#warning needs parallel for (Index_type i = ibegin; i < iend; ++i ) { xsum += reduce_struct_x_base_lam(i); xmin = RAJA_MIN(xmin, reduce_struct_x_base_lam(i)); @@ -95,6 +108,7 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t break; } +#if defined(RUN_RAJA_STDPAR) case RAJA_StdPar : { startTimer(); @@ -133,6 +147,7 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t } +#endif } } // end namespace basic diff --git a/src/basic/TRAP_INT-StdPar.cpp b/src/basic/TRAP_INT-StdPar.cpp index 359ed363a..ff626091b 100644 --- a/src/basic/TRAP_INT-StdPar.cpp +++ b/src/basic/TRAP_INT-StdPar.cpp @@ -120,7 +120,7 @@ void TRAP_INT::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n TRAP_INT : Unknown variant id = " << vid << std::endl; + getCout() << "\n TRAP_INT : Unknown variant id = " << vid << std::endl; } } diff --git a/src/common/StdParUtils.hpp b/src/common/StdParUtils.hpp index ab2e49f46..26c65c84b 100644 --- a/src/common/StdParUtils.hpp +++ b/src/common/StdParUtils.hpp @@ -32,6 +32,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include template diff --git a/src/lcals/DIFF_PREDICT-StdPar.cpp b/src/lcals/DIFF_PREDICT-StdPar.cpp index b86723185..5ae9aba30 100644 --- a/src/lcals/DIFF_PREDICT-StdPar.cpp +++ b/src/lcals/DIFF_PREDICT-StdPar.cpp @@ -91,7 +91,7 @@ void DIFF_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n DIFF_PREDICT : Unknown variant id = " << vid << std::endl; + getCout() << "\n DIFF_PREDICT : Unknown variant id = " << vid << std::endl; } } diff --git a/src/lcals/EOS-StdPar.cpp b/src/lcals/EOS-StdPar.cpp index a3aa279f2..8a2aa56a5 100644 --- a/src/lcals/EOS-StdPar.cpp +++ b/src/lcals/EOS-StdPar.cpp @@ -91,7 +91,7 @@ void EOS::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n EOS : Unknown variant id = " << vid << std::endl; + getCout() << "\n EOS : Unknown variant id = " << vid << std::endl; } } diff --git a/src/lcals/FIRST_DIFF-StdPar.cpp b/src/lcals/FIRST_DIFF-StdPar.cpp index 1a2d15e6c..e327870d9 100644 --- a/src/lcals/FIRST_DIFF-StdPar.cpp +++ b/src/lcals/FIRST_DIFF-StdPar.cpp @@ -91,7 +91,7 @@ void FIRST_DIFF::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n FIRST_DIFF : Unknown variant id = " << vid << std::endl; + getCout() << "\n FIRST_DIFF : Unknown variant id = " << vid << std::endl; } } diff --git a/src/lcals/FIRST_MIN-StdPar.cpp b/src/lcals/FIRST_MIN-StdPar.cpp index 4a019b5b3..4ea40ca09 100644 --- a/src/lcals/FIRST_MIN-StdPar.cpp +++ b/src/lcals/FIRST_MIN-StdPar.cpp @@ -102,7 +102,7 @@ void FIRST_MIN::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n FIRST_MIN : Unknown variant id = " << vid << std::endl; + getCout() << "\n FIRST_MIN : Unknown variant id = " << vid << std::endl; } } diff --git a/src/lcals/FIRST_SUM-StdPar.cpp b/src/lcals/FIRST_SUM-StdPar.cpp index 1f47f9412..c60223fe0 100644 --- a/src/lcals/FIRST_SUM-StdPar.cpp +++ b/src/lcals/FIRST_SUM-StdPar.cpp @@ -91,7 +91,7 @@ void FIRST_SUM::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n FIRST_SUM : Unknown variant id = " << vid << std::endl; + getCout() << "\n FIRST_SUM : Unknown variant id = " << vid << std::endl; } } diff --git a/src/lcals/GEN_LIN_RECUR-StdPar.cpp b/src/lcals/GEN_LIN_RECUR-StdPar.cpp index f1cd69a0d..d33cb4eea 100644 --- a/src/lcals/GEN_LIN_RECUR-StdPar.cpp +++ b/src/lcals/GEN_LIN_RECUR-StdPar.cpp @@ -114,7 +114,7 @@ void GEN_LIN_RECUR::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n GEN_LIN_RECUR : Unknown variant id = " << vid << std::endl; + getCout() << "\n GEN_LIN_RECUR : Unknown variant id = " << vid << std::endl; } } diff --git a/src/lcals/HYDRO_1D-StdPar.cpp b/src/lcals/HYDRO_1D-StdPar.cpp index 45601b347..1099102e3 100644 --- a/src/lcals/HYDRO_1D-StdPar.cpp +++ b/src/lcals/HYDRO_1D-StdPar.cpp @@ -92,7 +92,7 @@ void HYDRO_1D::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n HYDRO_1D : Unknown variant id = " << vid << std::endl; + getCout() << "\n HYDRO_1D : Unknown variant id = " << vid << std::endl; } } diff --git a/src/lcals/HYDRO_2D-StdPar.cpp b/src/lcals/HYDRO_2D-StdPar.cpp index 1650dffd5..b6825ebb2 100644 --- a/src/lcals/HYDRO_2D-StdPar.cpp +++ b/src/lcals/HYDRO_2D-StdPar.cpp @@ -183,7 +183,7 @@ void HYDRO_2D::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n HYDRO_2D : Unknown variant id = " << vid << std::endl; + getCout() << "\n HYDRO_2D : Unknown variant id = " << vid << std::endl; } } diff --git a/src/lcals/INT_PREDICT-StdPar.cpp b/src/lcals/INT_PREDICT-StdPar.cpp index d8139dfbe..ef2b06df1 100644 --- a/src/lcals/INT_PREDICT-StdPar.cpp +++ b/src/lcals/INT_PREDICT-StdPar.cpp @@ -92,7 +92,7 @@ void INT_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n INT_PREDICT : Unknown variant id = " << vid << std::endl; + getCout() << "\n INT_PREDICT : Unknown variant id = " << vid << std::endl; } } diff --git a/src/lcals/PLANCKIAN-StdPar.cpp b/src/lcals/PLANCKIAN-StdPar.cpp index 3d937bb22..7a41c6120 100644 --- a/src/lcals/PLANCKIAN-StdPar.cpp +++ b/src/lcals/PLANCKIAN-StdPar.cpp @@ -93,7 +93,7 @@ void PLANCKIAN::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n PLANCKIAN : Unknown variant id = " << vid << std::endl; + getCout() << "\n PLANCKIAN : Unknown variant id = " << vid << std::endl; } } diff --git a/src/lcals/TRIDIAG_ELIM-StdPar.cpp b/src/lcals/TRIDIAG_ELIM-StdPar.cpp index ff1986bc1..28f4a1ae2 100644 --- a/src/lcals/TRIDIAG_ELIM-StdPar.cpp +++ b/src/lcals/TRIDIAG_ELIM-StdPar.cpp @@ -92,7 +92,7 @@ void TRIDIAG_ELIM::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n TRIDIAG_ELIM : Unknown variant id = " << vid << std::endl; + getCout() << "\n TRIDIAG_ELIM : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_2MM-StdPar.cpp b/src/polybench/POLYBENCH_2MM-StdPar.cpp index feb441614..64d24ff30 100644 --- a/src/polybench/POLYBENCH_2MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_2MM-StdPar.cpp @@ -243,7 +243,7 @@ void POLYBENCH_2MM::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_2MM : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_2MM : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_3MM-StdPar.cpp b/src/polybench/POLYBENCH_3MM-StdPar.cpp index 189caa032..1a6a1ec38 100644 --- a/src/polybench/POLYBENCH_3MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_3MM-StdPar.cpp @@ -319,7 +319,7 @@ void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_3MM : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_3MM : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_ADI-StdPar.cpp b/src/polybench/POLYBENCH_ADI-StdPar.cpp index 6d2a99650..7fbe1d871 100644 --- a/src/polybench/POLYBENCH_ADI-StdPar.cpp +++ b/src/polybench/POLYBENCH_ADI-StdPar.cpp @@ -224,7 +224,7 @@ void POLYBENCH_ADI::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\nPOLYBENCH_ADI Unknown variant id = " << vid << std::endl; + getCout() << "\nPOLYBENCH_ADI Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_ATAX-StdPar.cpp b/src/polybench/POLYBENCH_ATAX-StdPar.cpp index 1c3d1a3a9..57e8d785a 100644 --- a/src/polybench/POLYBENCH_ATAX-StdPar.cpp +++ b/src/polybench/POLYBENCH_ATAX-StdPar.cpp @@ -201,7 +201,7 @@ void POLYBENCH_ATAX::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_ATAX : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_ATAX : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp index 5bd7435dd..01ebceccd 100644 --- a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp @@ -214,7 +214,7 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\nPOLYBENCH_FDTD_2D Unknown variant id = " << vid << std::endl; + getCout() << "\nPOLYBENCH_FDTD_2D Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp index 023b125d3..0f76e256b 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp @@ -146,7 +146,7 @@ void POLYBENCH_FLOYD_WARSHALL::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_FLOYD_WARSHALL : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_FLOYD_WARSHALL : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_GEMM-StdPar.cpp b/src/polybench/POLYBENCH_GEMM-StdPar.cpp index 1fd75528e..4eda80f76 100644 --- a/src/polybench/POLYBENCH_GEMM-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMM-StdPar.cpp @@ -180,7 +180,7 @@ void POLYBENCH_GEMM::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_GEMM : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_GEMM : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp index 2673abd45..7c40bbd64 100644 --- a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp @@ -243,7 +243,7 @@ void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_GEMVER : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_GEMVER : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp index 070e56c18..720688eee 100644 --- a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp +++ b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp @@ -133,7 +133,7 @@ void POLYBENCH_GESUMMV::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_GESUMMV : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_GESUMMV : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp index d18a359f9..8bb373b52 100644 --- a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp @@ -176,7 +176,7 @@ void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_HEAT_3D : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_HEAT_3D : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp index 1b1ce72f2..a39699c97 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp @@ -125,7 +125,7 @@ void POLYBENCH_JACOBI_1D::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_JACOBI_1D : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_JACOBI_1D : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp index 41cd58b2b..440016ca7 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp @@ -164,7 +164,7 @@ void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_JACOBI_2D : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_JACOBI_2D : Unknown variant id = " << vid << std::endl; } } diff --git a/src/polybench/POLYBENCH_MVT-StdPar.cpp b/src/polybench/POLYBENCH_MVT-StdPar.cpp index 7adc162de..79ff364c6 100644 --- a/src/polybench/POLYBENCH_MVT-StdPar.cpp +++ b/src/polybench/POLYBENCH_MVT-StdPar.cpp @@ -192,7 +192,7 @@ void POLYBENCH_MVT::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n POLYBENCH_MVT : Unknown variant id = " << vid << std::endl; + getCout() << "\n POLYBENCH_MVT : Unknown variant id = " << vid << std::endl; } } diff --git a/src/stream/ADD-StdPar.cpp b/src/stream/ADD-StdPar.cpp index bde010541..5f26092a1 100644 --- a/src/stream/ADD-StdPar.cpp +++ b/src/stream/ADD-StdPar.cpp @@ -91,7 +91,7 @@ void ADD::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n ADD : Unknown variant id = " << vid << std::endl; + getCout() << "\n ADD : Unknown variant id = " << vid << std::endl; } } diff --git a/src/stream/COPY-StdPar.cpp b/src/stream/COPY-StdPar.cpp index 1fc757e22..38e53dbf8 100644 --- a/src/stream/COPY-StdPar.cpp +++ b/src/stream/COPY-StdPar.cpp @@ -76,7 +76,7 @@ void COPY::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n COPY : Unknown variant id = " << vid << std::endl; + getCout() << "\n COPY : Unknown variant id = " << vid << std::endl; } } diff --git a/src/stream/DOT-StdPar.cpp b/src/stream/DOT-StdPar.cpp index cf22a9e35..7d359ebb4 100644 --- a/src/stream/DOT-StdPar.cpp +++ b/src/stream/DOT-StdPar.cpp @@ -104,7 +104,7 @@ void DOT::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n DOT : Unknown variant id = " << vid << std::endl; + getCout() << "\n DOT : Unknown variant id = " << vid << std::endl; } } diff --git a/src/stream/MUL-StdPar.cpp b/src/stream/MUL-StdPar.cpp index 6cf9f418f..aca05ed54 100644 --- a/src/stream/MUL-StdPar.cpp +++ b/src/stream/MUL-StdPar.cpp @@ -91,7 +91,7 @@ void MUL::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n MUL : Unknown variant id = " << vid << std::endl; + getCout() << "\n MUL : Unknown variant id = " << vid << std::endl; } } diff --git a/src/stream/TRIAD-StdPar.cpp b/src/stream/TRIAD-StdPar.cpp index 484d79cc3..48fe2ee76 100644 --- a/src/stream/TRIAD-StdPar.cpp +++ b/src/stream/TRIAD-StdPar.cpp @@ -1,7 +1,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. -// See the RAJAPerf/COPYRIGHT file for details. +// See the RAJAPerf/LICENSE file for details. // // SPDX-License-Identifier: (BSD-3-Clause) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include @@ -91,7 +89,7 @@ void TRIAD::runStdParVariant(VariantID vid, size_t tune_idx) #endif // RUN_RAJA_STDPAR default : { - std::cout << "\n TRIAD : Unknown variant id = " << vid << std::endl; + getCout() << "\n TRIAD : Unknown variant id = " << vid << std::endl; } } From c3d1dbea0f64005f4126c50fb51e197760fa51de Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 13:26:47 +0300 Subject: [PATCH 065/174] cleanup --- src/algorithm/MEMCPY-StdPar.cpp | 2 -- src/algorithm/MEMSET-StdPar.cpp | 2 -- src/algorithm/REDUCE_SUM-StdPar.cpp | 11 +++++++- src/algorithm/SCAN-StdPar.cpp | 26 +++++-------------- src/algorithm/SORT-StdPar.cpp | 6 ++--- src/algorithm/SORTPAIRS-StdPar.cpp | 3 +-- src/apps/CONVECTION3DPA-StdPar.cpp | 18 ++++++++++--- src/apps/DEL_DOT_VEC_2D-StdPar.cpp | 2 -- src/apps/DIFFUSION3DPA-StdPar.cpp | 7 ++--- src/apps/ENERGY-StdPar.cpp | 2 -- src/apps/FIR-StdPar.cpp | 2 -- src/apps/HALOEXCHANGE-StdPar.cpp | 2 -- src/apps/HALOEXCHANGE_FUSED-StdPar.cpp | 2 -- src/apps/LTIMES-StdPar.cpp | 2 -- src/apps/LTIMES_NOVIEW-StdPar.cpp | 2 -- src/apps/MASS3DPA-StdPar.cpp | 4 +-- src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp | 8 +++++- src/apps/PRESSURE-StdPar.cpp | 2 -- src/apps/VOL3D-StdPar.cpp | 2 -- src/basic/IF_QUAD-StdPar.cpp | 2 -- src/basic/INIT_VIEW1D-StdPar.cpp | 2 -- src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp | 2 -- src/basic/MULADDSUB-StdPar.cpp | 2 -- src/basic/NESTED_INIT-StdPar.cpp | 2 -- src/basic/PI_ATOMIC-StdPar.cpp | 2 -- src/basic/TRAP_INT-StdPar.cpp | 3 --- src/lcals/DIFF_PREDICT-StdPar.cpp | 2 -- src/lcals/EOS-StdPar.cpp | 2 -- src/lcals/FIRST_DIFF-StdPar.cpp | 2 -- src/lcals/FIRST_MIN-StdPar.cpp | 2 -- src/lcals/FIRST_SUM-StdPar.cpp | 2 -- src/lcals/GEN_LIN_RECUR-StdPar.cpp | 2 -- src/lcals/HYDRO_1D-StdPar.cpp | 2 -- src/lcals/HYDRO_2D-StdPar.cpp | 2 -- src/lcals/INT_PREDICT-StdPar.cpp | 2 -- src/lcals/PLANCKIAN-StdPar.cpp | 2 -- src/lcals/TRIDIAG_ELIM-StdPar.cpp | 2 -- src/polybench/POLYBENCH_2MM-StdPar.cpp | 2 -- src/polybench/POLYBENCH_3MM-StdPar.cpp | 2 -- src/polybench/POLYBENCH_ADI-StdPar.cpp | 2 -- src/polybench/POLYBENCH_ATAX-StdPar.cpp | 2 -- src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp | 2 -- .../POLYBENCH_FLOYD_WARSHALL-StdPar.cpp | 2 -- src/polybench/POLYBENCH_GEMM-StdPar.cpp | 2 -- src/polybench/POLYBENCH_GEMVER-StdPar.cpp | 2 -- src/polybench/POLYBENCH_GESUMMV-StdPar.cpp | 2 -- src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp | 2 -- src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp | 2 -- src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp | 2 -- src/polybench/POLYBENCH_MVT-StdPar.cpp | 3 --- src/stream/ADD-StdPar.cpp | 4 +-- src/stream/COPY-StdPar.cpp | 3 +-- src/stream/DOT-StdPar.cpp | 3 --- src/stream/MUL-StdPar.cpp | 2 -- src/stream/TRIAD-StdPar.cpp | 1 - 55 files changed, 46 insertions(+), 136 deletions(-) diff --git a/src/algorithm/MEMCPY-StdPar.cpp b/src/algorithm/MEMCPY-StdPar.cpp index ff8e66c08..71a6c2eac 100644 --- a/src/algorithm/MEMCPY-StdPar.cpp +++ b/src/algorithm/MEMCPY-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/algorithm/MEMSET-StdPar.cpp b/src/algorithm/MEMSET-StdPar.cpp index 73a61d5cf..8ffba6f3f 100644 --- a/src/algorithm/MEMSET-StdPar.cpp +++ b/src/algorithm/MEMSET-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/algorithm/REDUCE_SUM-StdPar.cpp b/src/algorithm/REDUCE_SUM-StdPar.cpp index 261be955a..b2e010833 100644 --- a/src/algorithm/REDUCE_SUM-StdPar.cpp +++ b/src/algorithm/REDUCE_SUM-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#include "common/StdParUtils.hpp" + #include namespace rajaperf @@ -20,10 +22,14 @@ namespace algorithm void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { +#if defined(RUN_STDPAR) const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + REDUCE_SUM_DATA_SETUP; switch ( vid ) { @@ -35,6 +41,7 @@ void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune Real_type sum = m_sum_init; +#warning needs parallel reduce for (Index_type i = ibegin; i < iend; ++i ) { REDUCE_SUM_BODY; } @@ -47,7 +54,6 @@ void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune break; } -#if defined(RUN_RAJA_STDPAR) case Lambda_StdPar : { auto reduce_sum_base_lam = [=](Index_type i) { @@ -59,6 +65,7 @@ void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune Real_type sum = m_sum_init; +#warning needs parallel reduce for (Index_type i = ibegin; i < iend; ++i ) { sum += reduce_sum_base_lam(i); } @@ -71,6 +78,7 @@ void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune break; } +#ifdef RAJA_ENABLE_STDPAR case RAJA_StdPar : { startTimer(); @@ -98,6 +106,7 @@ void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune } +#endif } } // end namespace algorithm diff --git a/src/algorithm/SCAN-StdPar.cpp b/src/algorithm/SCAN-StdPar.cpp index d6adeaabf..f6095642d 100644 --- a/src/algorithm/SCAN-StdPar.cpp +++ b/src/algorithm/SCAN-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#include "common/StdParUtils.hpp" + #include namespace rajaperf @@ -20,6 +22,7 @@ namespace algorithm void SCAN::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { +#if defined(RUN_STDPAR) const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); @@ -33,6 +36,7 @@ void SCAN::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#warning needs parallel scan SCAN_PROLOGUE; for (Index_type i = ibegin; i < iend; ++i ) { SCAN_BODY; @@ -44,26 +48,7 @@ void SCAN::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) break; } -#if defined(RUN_RAJA_STDPAR) - case Lambda_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - SCAN_PROLOGUE; - auto scan_lam = [=, &scan_var](Index_type i) { - SCAN_BODY; - }; - for (Index_type i = ibegin; i < iend; ++i ) { - scan_lam(i); - } - - } - stopTimer(); - - break; - } - +#ifdef RAJA_ENABLE_STDPAR case RAJA_StdPar : { startTimer(); @@ -84,6 +69,7 @@ void SCAN::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) } +#endif } } // end namespace algorithm diff --git a/src/algorithm/SORT-StdPar.cpp b/src/algorithm/SORT-StdPar.cpp index ddf3be505..3fcee72d6 100644 --- a/src/algorithm/SORT-StdPar.cpp +++ b/src/algorithm/SORT-StdPar.cpp @@ -1,5 +1,5 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. // See the RAJAPerf/COPYRIGHT file for details. // @@ -10,8 +10,7 @@ #include "RAJA/RAJA.hpp" -#include -#include +#include "common/StdParUtils.hpp" #include @@ -24,7 +23,6 @@ namespace algorithm void SORT::runStdParVariant(VariantID vid, size_t tune_idx) { #if defined(RUN_STDPAR) - const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp index d97ade603..daa603e7f 100644 --- a/src/algorithm/SORTPAIRS-StdPar.cpp +++ b/src/algorithm/SORTPAIRS-StdPar.cpp @@ -11,8 +11,7 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include + #include #include #include diff --git a/src/apps/CONVECTION3DPA-StdPar.cpp b/src/apps/CONVECTION3DPA-StdPar.cpp index 1bc58d4f1..119d99b0a 100644 --- a/src/apps/CONVECTION3DPA-StdPar.cpp +++ b/src/apps/CONVECTION3DPA-StdPar.cpp @@ -10,16 +10,23 @@ #include "RAJA/RAJA.hpp" +#include "common/StdParUtils.hpp" + #include namespace rajaperf { namespace apps { -void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { +void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ +#if defined(RUN_STDPAR) const Index_type run_reps = getRunReps(); CONVECTION3DPA_DATA_SETUP; + auto begin = counting_iterator(0); + auto end = counting_iterator(NE); + switch (vid) { case Base_StdPar: { @@ -27,7 +34,9 @@ void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - for (int e = 0; e < NE; ++e) { + std::for_each( std::execution::par_unseq, + begin, end, + [=](int e) { CONVECTION3DPA_0_CPU; @@ -118,7 +127,7 @@ void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( } } } - } // element loop + }); // element loop } stopTimer(); @@ -126,7 +135,7 @@ void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( break; } -#if defined(RUN_RAJA_SEQ) +#if defined(RUN_RAJA_STDPAR) case RAJA_StdPar: { using launch_policy = RAJA::expt::LaunchPolicy; @@ -310,6 +319,7 @@ void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( getCout() << "\n CONVECTION3DPA : Unknown StdPar variant id = " << vid << std::endl; } +#endif } } // end namespace apps diff --git a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp index 7679df246..469bd93bd 100644 --- a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp +++ b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include "AppsData.hpp" diff --git a/src/apps/DIFFUSION3DPA-StdPar.cpp b/src/apps/DIFFUSION3DPA-StdPar.cpp index 97d0ad239..608a4d665 100644 --- a/src/apps/DIFFUSION3DPA-StdPar.cpp +++ b/src/apps/DIFFUSION3DPA-StdPar.cpp @@ -14,18 +14,15 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include namespace rajaperf { namespace apps { -void DIFFUSION3DPA::runStdParVariant(VariantID vid, size_t tune_idx) { - +void DIFFUSION3DPA::runStdParVariant(VariantID vid, size_t tune_idx) +{ #if defined(RUN_STDPAR) - const Index_type run_reps = getRunReps(); DIFFUSION3DPA_DATA_SETUP; diff --git a/src/apps/ENERGY-StdPar.cpp b/src/apps/ENERGY-StdPar.cpp index d02b68d4a..f8c054fa4 100644 --- a/src/apps/ENERGY-StdPar.cpp +++ b/src/apps/ENERGY-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/apps/FIR-StdPar.cpp b/src/apps/FIR-StdPar.cpp index 07040148f..befd29fa4 100644 --- a/src/apps/FIR-StdPar.cpp +++ b/src/apps/FIR-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/apps/HALOEXCHANGE-StdPar.cpp b/src/apps/HALOEXCHANGE-StdPar.cpp index e06612b6b..6043185a2 100644 --- a/src/apps/HALOEXCHANGE-StdPar.cpp +++ b/src/apps/HALOEXCHANGE-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp index b320e7fe3..242759fa2 100644 --- a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp +++ b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/apps/LTIMES-StdPar.cpp b/src/apps/LTIMES-StdPar.cpp index 60e8aec63..ba8ae1332 100644 --- a/src/apps/LTIMES-StdPar.cpp +++ b/src/apps/LTIMES-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/apps/LTIMES_NOVIEW-StdPar.cpp b/src/apps/LTIMES_NOVIEW-StdPar.cpp index 7ada8a148..0ff1a9d40 100644 --- a/src/apps/LTIMES_NOVIEW-StdPar.cpp +++ b/src/apps/LTIMES_NOVIEW-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/apps/MASS3DPA-StdPar.cpp b/src/apps/MASS3DPA-StdPar.cpp index 58b08b9d1..11ce57188 100644 --- a/src/apps/MASS3DPA-StdPar.cpp +++ b/src/apps/MASS3DPA-StdPar.cpp @@ -11,8 +11,8 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include + + #include diff --git a/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp b/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp index 585e3a000..f1326230e 100644 --- a/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp +++ b/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#include "common/StdParUtils.hpp" + #include "AppsData.hpp" #include @@ -22,6 +24,7 @@ namespace apps void NODAL_ACCUMULATION_3D::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { +#if defined(RUN_STDPAR) const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = m_domain->n_real_zones; @@ -37,6 +40,7 @@ void NODAL_ACCUMULATION_3D::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUS startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#warning needs parallel for+atomic or reduce for (Index_type ii = ibegin ; ii < iend ; ++ii ) { NODAL_ACCUMULATION_3D_BODY_INDEX; NODAL_ACCUMULATION_3D_BODY; @@ -48,7 +52,6 @@ void NODAL_ACCUMULATION_3D::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUS break; } -#if defined(RUN_RAJA_STDPAR) case Lambda_StdPar : { auto nodal_accumulation_3d_lam = [=](Index_type ii) { @@ -59,6 +62,7 @@ void NODAL_ACCUMULATION_3D::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUS startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#warning needs parallel for+atomic or reduce for (Index_type ii = ibegin ; ii < iend ; ++ii ) { nodal_accumulation_3d_lam(ii); } @@ -69,6 +73,7 @@ void NODAL_ACCUMULATION_3D::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUS break; } +#if defined(RUN_RAJA_STDPAR) case RAJA_StdPar : { camp::resources::Resource working_res{camp::resources::Host()}; @@ -98,6 +103,7 @@ void NODAL_ACCUMULATION_3D::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUS } +#endif } } // end namespace apps diff --git a/src/apps/PRESSURE-StdPar.cpp b/src/apps/PRESSURE-StdPar.cpp index 787def3e6..301e30719 100644 --- a/src/apps/PRESSURE-StdPar.cpp +++ b/src/apps/PRESSURE-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/apps/VOL3D-StdPar.cpp b/src/apps/VOL3D-StdPar.cpp index fb369f192..93cd2a941 100644 --- a/src/apps/VOL3D-StdPar.cpp +++ b/src/apps/VOL3D-StdPar.cpp @@ -13,8 +13,6 @@ #include "AppsData.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/basic/IF_QUAD-StdPar.cpp b/src/basic/IF_QUAD-StdPar.cpp index 605a1258e..137fdd0e6 100644 --- a/src/basic/IF_QUAD-StdPar.cpp +++ b/src/basic/IF_QUAD-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/basic/INIT_VIEW1D-StdPar.cpp b/src/basic/INIT_VIEW1D-StdPar.cpp index 1ffbde1a0..30c190fdd 100644 --- a/src/basic/INIT_VIEW1D-StdPar.cpp +++ b/src/basic/INIT_VIEW1D-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp index ca7bf0130..c6ff05190 100644 --- a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/basic/MULADDSUB-StdPar.cpp b/src/basic/MULADDSUB-StdPar.cpp index f89c3b179..b76f667b7 100644 --- a/src/basic/MULADDSUB-StdPar.cpp +++ b/src/basic/MULADDSUB-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/basic/NESTED_INIT-StdPar.cpp b/src/basic/NESTED_INIT-StdPar.cpp index be46d2e06..1041a9953 100644 --- a/src/basic/NESTED_INIT-StdPar.cpp +++ b/src/basic/NESTED_INIT-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/basic/PI_ATOMIC-StdPar.cpp b/src/basic/PI_ATOMIC-StdPar.cpp index 6b2f80fdc..491a2cfd3 100644 --- a/src/basic/PI_ATOMIC-StdPar.cpp +++ b/src/basic/PI_ATOMIC-StdPar.cpp @@ -19,8 +19,6 @@ typedef std::atomic myAtomic; #endif #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/basic/TRAP_INT-StdPar.cpp b/src/basic/TRAP_INT-StdPar.cpp index ff626091b..94907744c 100644 --- a/src/basic/TRAP_INT-StdPar.cpp +++ b/src/basic/TRAP_INT-StdPar.cpp @@ -11,9 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include -#include #include diff --git a/src/lcals/DIFF_PREDICT-StdPar.cpp b/src/lcals/DIFF_PREDICT-StdPar.cpp index 5ae9aba30..19a843bfb 100644 --- a/src/lcals/DIFF_PREDICT-StdPar.cpp +++ b/src/lcals/DIFF_PREDICT-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/lcals/EOS-StdPar.cpp b/src/lcals/EOS-StdPar.cpp index 8a2aa56a5..1022d79a1 100644 --- a/src/lcals/EOS-StdPar.cpp +++ b/src/lcals/EOS-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/lcals/FIRST_DIFF-StdPar.cpp b/src/lcals/FIRST_DIFF-StdPar.cpp index e327870d9..5e274c841 100644 --- a/src/lcals/FIRST_DIFF-StdPar.cpp +++ b/src/lcals/FIRST_DIFF-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/lcals/FIRST_MIN-StdPar.cpp b/src/lcals/FIRST_MIN-StdPar.cpp index 4ea40ca09..3b797cbc6 100644 --- a/src/lcals/FIRST_MIN-StdPar.cpp +++ b/src/lcals/FIRST_MIN-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/lcals/FIRST_SUM-StdPar.cpp b/src/lcals/FIRST_SUM-StdPar.cpp index c60223fe0..b02253819 100644 --- a/src/lcals/FIRST_SUM-StdPar.cpp +++ b/src/lcals/FIRST_SUM-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/lcals/GEN_LIN_RECUR-StdPar.cpp b/src/lcals/GEN_LIN_RECUR-StdPar.cpp index d33cb4eea..e16eebddd 100644 --- a/src/lcals/GEN_LIN_RECUR-StdPar.cpp +++ b/src/lcals/GEN_LIN_RECUR-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/lcals/HYDRO_1D-StdPar.cpp b/src/lcals/HYDRO_1D-StdPar.cpp index 1099102e3..ce8d37a2c 100644 --- a/src/lcals/HYDRO_1D-StdPar.cpp +++ b/src/lcals/HYDRO_1D-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/lcals/HYDRO_2D-StdPar.cpp b/src/lcals/HYDRO_2D-StdPar.cpp index b6825ebb2..5f6d3dbc2 100644 --- a/src/lcals/HYDRO_2D-StdPar.cpp +++ b/src/lcals/HYDRO_2D-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/lcals/INT_PREDICT-StdPar.cpp b/src/lcals/INT_PREDICT-StdPar.cpp index ef2b06df1..a635061d2 100644 --- a/src/lcals/INT_PREDICT-StdPar.cpp +++ b/src/lcals/INT_PREDICT-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/lcals/PLANCKIAN-StdPar.cpp b/src/lcals/PLANCKIAN-StdPar.cpp index 7a41c6120..df63d89b1 100644 --- a/src/lcals/PLANCKIAN-StdPar.cpp +++ b/src/lcals/PLANCKIAN-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include #include diff --git a/src/lcals/TRIDIAG_ELIM-StdPar.cpp b/src/lcals/TRIDIAG_ELIM-StdPar.cpp index 28f4a1ae2..9ce2afd9f 100644 --- a/src/lcals/TRIDIAG_ELIM-StdPar.cpp +++ b/src/lcals/TRIDIAG_ELIM-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_2MM-StdPar.cpp b/src/polybench/POLYBENCH_2MM-StdPar.cpp index 64d24ff30..43683a68b 100644 --- a/src/polybench/POLYBENCH_2MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_2MM-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_3MM-StdPar.cpp b/src/polybench/POLYBENCH_3MM-StdPar.cpp index 1a6a1ec38..b7ac966a2 100644 --- a/src/polybench/POLYBENCH_3MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_3MM-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_ADI-StdPar.cpp b/src/polybench/POLYBENCH_ADI-StdPar.cpp index 7fbe1d871..18641aa5c 100644 --- a/src/polybench/POLYBENCH_ADI-StdPar.cpp +++ b/src/polybench/POLYBENCH_ADI-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_ATAX-StdPar.cpp b/src/polybench/POLYBENCH_ATAX-StdPar.cpp index 57e8d785a..b2be11771 100644 --- a/src/polybench/POLYBENCH_ATAX-StdPar.cpp +++ b/src/polybench/POLYBENCH_ATAX-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp index 01ebceccd..d2584e96c 100644 --- a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp index 0f76e256b..c6d015640 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_GEMM-StdPar.cpp b/src/polybench/POLYBENCH_GEMM-StdPar.cpp index 4eda80f76..b17c381c0 100644 --- a/src/polybench/POLYBENCH_GEMM-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMM-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp index 7c40bbd64..37361759d 100644 --- a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp index 720688eee..8201ecb24 100644 --- a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp +++ b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp index 8bb373b52..0b5690828 100644 --- a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp index a39699c97..76dca3264 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp index 440016ca7..11d8c208c 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/polybench/POLYBENCH_MVT-StdPar.cpp b/src/polybench/POLYBENCH_MVT-StdPar.cpp index 79ff364c6..2c3b30ffb 100644 --- a/src/polybench/POLYBENCH_MVT-StdPar.cpp +++ b/src/polybench/POLYBENCH_MVT-StdPar.cpp @@ -11,9 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include - namespace rajaperf { diff --git a/src/stream/ADD-StdPar.cpp b/src/stream/ADD-StdPar.cpp index 5f26092a1..2131dedbf 100644 --- a/src/stream/ADD-StdPar.cpp +++ b/src/stream/ADD-StdPar.cpp @@ -11,12 +11,10 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include -namespace rajaperf +namespace rajaperf { namespace stream { diff --git a/src/stream/COPY-StdPar.cpp b/src/stream/COPY-StdPar.cpp index 38e53dbf8..bda9af163 100644 --- a/src/stream/COPY-StdPar.cpp +++ b/src/stream/COPY-StdPar.cpp @@ -10,8 +10,7 @@ #include "RAJA/RAJA.hpp" -#include -#include +#include "common/StdParUtils.hpp" #include diff --git a/src/stream/DOT-StdPar.cpp b/src/stream/DOT-StdPar.cpp index 7d359ebb4..43d247f72 100644 --- a/src/stream/DOT-StdPar.cpp +++ b/src/stream/DOT-StdPar.cpp @@ -11,9 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include -#include #include diff --git a/src/stream/MUL-StdPar.cpp b/src/stream/MUL-StdPar.cpp index aca05ed54..082265af2 100644 --- a/src/stream/MUL-StdPar.cpp +++ b/src/stream/MUL-StdPar.cpp @@ -11,8 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#include -#include #include diff --git a/src/stream/TRIAD-StdPar.cpp b/src/stream/TRIAD-StdPar.cpp index 48fe2ee76..4b3db4a49 100644 --- a/src/stream/TRIAD-StdPar.cpp +++ b/src/stream/TRIAD-StdPar.cpp @@ -23,7 +23,6 @@ namespace stream void TRIAD::runStdParVariant(VariantID vid, size_t tune_idx) { #if defined(RUN_STDPAR) - const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); From 4c3519e4835027c5ca254cce8600f3311732da67 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 15:45:28 +0300 Subject: [PATCH 066/174] README --- README.stdpar | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 README.stdpar diff --git a/README.stdpar b/README.stdpar new file mode 100644 index 000000000..5cd8a478b --- /dev/null +++ b/README.stdpar @@ -0,0 +1,7 @@ +# GCC + +# NVC++ + +# Intel +cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-unused-parameter -Wno-deprecated-volatile -tbb" -DENABLE_STDPAR=1 && make -j8 + From 8c34177571eb6223449620bc0c1878c0a8617a15 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 15:45:52 +0300 Subject: [PATCH 067/174] fix unroll pragma --- src/apps/MASS3DPA-StdPar.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/apps/MASS3DPA-StdPar.cpp b/src/apps/MASS3DPA-StdPar.cpp index 11ce57188..ad347f5d8 100644 --- a/src/apps/MASS3DPA-StdPar.cpp +++ b/src/apps/MASS3DPA-StdPar.cpp @@ -12,20 +12,11 @@ #include "common/StdParUtils.hpp" - - #include namespace rajaperf { namespace apps { -//#define USE_RAJA_UNROLL -#define RAJA_DIRECT_PRAGMA(X) _Pragma(#X) -#if defined(USE_RAJA_UNROLL) -#define RAJA_UNROLL(N) RAJA_DIRECT_PRAGMA(unroll(N)) -#else -#define RAJA_UNROLL(N) -#endif #define CPU_FOREACH(i, k, N) for (int i = 0; i < N; i++) void MASS3DPA::runStdParVariant(VariantID vid, size_t tune_idx) { From a6d6bc4f22d1478c0b4173b0ed9efd3c3c6356b5 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 15:46:49 +0300 Subject: [PATCH 068/174] GCC --- README.stdpar | 1 + 1 file changed, 1 insertion(+) diff --git a/README.stdpar b/README.stdpar index 5cd8a478b..1cb862c9d 100644 --- a/README.stdpar +++ b/README.stdpar @@ -1,4 +1,5 @@ # GCC +cmake .. -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-volatile -Wno-unused-parameter" -DENABLE_STDPAR=1 && make -j8 # NVC++ From c9a6ec5f693d4ba6f648cb6bda2acd2c49f8291c Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 16:32:49 +0300 Subject: [PATCH 069/174] remove RAJA_StdPar --- src/algorithm/MEMCPY-StdPar.cpp | 40 +--- src/algorithm/MEMCPY.cpp | 1 - src/algorithm/MEMSET-StdPar.cpp | 40 +--- src/algorithm/MEMSET.cpp | 1 - src/algorithm/REDUCE_SUM-StdPar.cpp | 22 -- src/algorithm/SCAN-StdPar.cpp | 15 -- src/algorithm/SORT-StdPar.cpp | 15 -- src/algorithm/SORT.cpp | 1 - src/algorithm/SORTPAIRS-StdPar.cpp | 15 -- src/algorithm/SORTPAIRS.cpp | 1 - src/apps/CONVECTION3DPA-StdPar.cpp | 180 ---------------- src/apps/DEL_DOT_VEC_2D-StdPar.cpp | 24 --- src/apps/DEL_DOT_VEC_2D.cpp | 1 - src/apps/DIFFUSION3DPA-StdPar.cpp | 199 ------------------ src/apps/DIFFUSION3DPA.cpp | 1 - src/apps/ENERGY-StdPar.cpp | 35 --- src/apps/ENERGY.cpp | 1 - src/apps/FIR-StdPar.cpp | 16 -- src/apps/FIR.cpp | 1 - src/apps/HALOEXCHANGE-StdPar.cpp | 47 ----- src/apps/HALOEXCHANGE.cpp | 1 - src/apps/HALOEXCHANGE_FUSED-StdPar.cpp | 80 ------- src/apps/HALOEXCHANGE_FUSED.cpp | 1 - src/apps/LTIMES-StdPar.cpp | 39 ---- src/apps/LTIMES.cpp | 1 - src/apps/LTIMES_NOVIEW-StdPar.cpp | 33 --- src/apps/LTIMES_NOVIEW.cpp | 1 - src/apps/MASS3DPA-StdPar.cpp | 112 ---------- src/apps/MASS3DPA.cpp | 1 - src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp | 24 --- src/apps/NODAL_ACCUMULATION_3D.cpp | 1 - src/apps/PRESSURE-StdPar.cpp | 23 -- src/apps/PRESSURE.cpp | 1 - src/apps/VOL3D-StdPar.cpp | 16 -- src/apps/VOL3D.cpp | 1 - src/basic/DAXPY-StdPar.cpp | 16 -- src/basic/DAXPY.cpp | 1 - src/basic/DAXPY_ATOMIC-StdPar.cpp | 19 -- src/basic/IF_QUAD-StdPar.cpp | 16 -- src/basic/IF_QUAD.cpp | 1 - src/basic/INDEXLIST_3LOOP-StdPar.cpp | 39 ---- src/basic/INIT3-StdPar.cpp | 16 -- src/basic/INIT3.cpp | 1 - src/basic/INIT_VIEW1D-StdPar.cpp | 22 -- src/basic/INIT_VIEW1D.cpp | 1 - src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp | 22 -- src/basic/INIT_VIEW1D_OFFSET.cpp | 1 - src/basic/MAT_MAT_SHARED.cpp | 5 +- src/basic/MULADDSUB-StdPar.cpp | 16 -- src/basic/MULADDSUB.cpp | 1 - src/basic/NESTED_INIT-StdPar.cpp | 30 --- src/basic/NESTED_INIT.cpp | 1 - src/basic/PI_ATOMIC-StdPar.cpp | 21 -- src/basic/PI_ATOMIC.cpp | 1 - src/basic/PI_REDUCE-StdPar.cpp | 22 -- src/basic/PI_REDUCE.cpp | 1 - src/basic/REDUCE3_INT-StdPar.cpp | 26 --- src/basic/REDUCE3_INT.cpp | 1 - src/basic/REDUCE_STRUCT-StdPar.cpp | 33 --- src/basic/TRAP_INT-StdPar.cpp | 22 -- src/basic/TRAP_INT.cpp | 1 - src/common/KernelBase.cpp | 8 - src/common/RAJAPerfSuite.cpp | 11 - src/common/RAJAPerfSuite.hpp | 1 - src/lcals/DIFF_PREDICT-StdPar.cpp | 16 -- src/lcals/DIFF_PREDICT.cpp | 1 - src/lcals/EOS-StdPar.cpp | 16 -- src/lcals/EOS.cpp | 1 - src/lcals/FIRST_DIFF-StdPar.cpp | 16 -- src/lcals/FIRST_DIFF.cpp | 1 - src/lcals/FIRST_MIN-StdPar.cpp | 23 -- src/lcals/FIRST_MIN.cpp | 1 - src/lcals/FIRST_SUM-StdPar.cpp | 16 -- src/lcals/FIRST_SUM.cpp | 1 - src/lcals/GEN_LIN_RECUR-StdPar.cpp | 19 -- src/lcals/GEN_LIN_RECUR.cpp | 1 - src/lcals/HYDRO_1D-StdPar.cpp | 16 -- src/lcals/HYDRO_1D.cpp | 1 - src/lcals/HYDRO_2D-StdPar.cpp | 49 ----- src/lcals/HYDRO_2D.cpp | 1 - src/lcals/INT_PREDICT-StdPar.cpp | 16 -- src/lcals/INT_PREDICT.cpp | 1 - src/lcals/PLANCKIAN-StdPar.cpp | 16 -- src/lcals/PLANCKIAN.cpp | 1 - src/lcals/TRIDIAG_ELIM-StdPar.cpp | 16 -- src/lcals/TRIDIAG_ELIM.cpp | 1 - src/polybench/POLYBENCH_2MM-StdPar.cpp | 74 ------- src/polybench/POLYBENCH_2MM.cpp | 1 - src/polybench/POLYBENCH_3MM-StdPar.cpp | 98 --------- src/polybench/POLYBENCH_3MM.cpp | 1 - src/polybench/POLYBENCH_ADI-StdPar.cpp | 82 -------- src/polybench/POLYBENCH_ADI.cpp | 1 - src/polybench/POLYBENCH_ATAX-StdPar.cpp | 79 ------- src/polybench/POLYBENCH_ATAX.cpp | 1 - src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp | 71 ------- src/polybench/POLYBENCH_FDTD_2D.cpp | 1 - .../POLYBENCH_FLOYD_WARSHALL-StdPar.cpp | 37 ---- src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp | 1 - src/polybench/POLYBENCH_GEMM-StdPar.cpp | 58 ----- src/polybench/POLYBENCH_GEMM.cpp | 1 - src/polybench/POLYBENCH_GEMVER-StdPar.cpp | 103 --------- src/polybench/POLYBENCH_GEMVER.cpp | 1 - src/polybench/POLYBENCH_GESUMMV-StdPar.cpp | 49 ----- src/polybench/POLYBENCH_GESUMMV.cpp | 1 - src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp | 54 ----- src/polybench/POLYBENCH_HEAT_3D.cpp | 1 - src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp | 27 --- src/polybench/POLYBENCH_JACOBI_1D.cpp | 1 - src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp | 49 ----- src/polybench/POLYBENCH_JACOBI_2D.cpp | 1 - src/polybench/POLYBENCH_MVT-StdPar.cpp | 71 ------- src/polybench/POLYBENCH_MVT.cpp | 1 - src/stream/ADD-StdPar.cpp | 16 -- src/stream/ADD.cpp | 1 - src/stream/COPY-StdPar.cpp | 16 -- src/stream/COPY.cpp | 1 - src/stream/DOT-StdPar.cpp | 22 -- src/stream/DOT.cpp | 1 - src/stream/MUL-StdPar.cpp | 16 -- src/stream/MUL.cpp | 1 - src/stream/TRIAD-StdPar.cpp | 16 -- src/stream/TRIAD.cpp | 1 - 122 files changed, 6 insertions(+), 2527 deletions(-) diff --git a/src/algorithm/MEMCPY-StdPar.cpp b/src/algorithm/MEMCPY-StdPar.cpp index 71a6c2eac..1d7d74709 100644 --- a/src/algorithm/MEMCPY-StdPar.cpp +++ b/src/algorithm/MEMCPY-StdPar.cpp @@ -23,7 +23,6 @@ namespace algorithm void MEMCPY::runStdParVariantLibrary(VariantID vid) { #if defined(RUN_STDPAR) - const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); @@ -46,23 +45,6 @@ void MEMCPY::runStdParVariantLibrary(VariantID vid) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - camp::resources::Host res = camp::resources::Host::get_default(); - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - res.memcpy(MEMCPY_STD_ARGS); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n MEMCPY : Unknown variant id = " << vid << std::endl; } @@ -126,24 +108,6 @@ void MEMCPY::runStdParVariantDefault(VariantID vid) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( RAJA::RangeSegment(ibegin, iend), - [=](Index_type i) { - MEMCPY_BODY; - }); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n MEMCPY : Unknown variant id = " << vid << std::endl; } @@ -157,7 +121,7 @@ void MEMCPY::runStdParVariant(VariantID vid, size_t tune_idx) { size_t t = 0; - if (vid == Base_StdPar || vid == RAJA_StdPar) { + if (vid == Base_StdPar) { if (tune_idx == t) { @@ -180,7 +144,7 @@ void MEMCPY::runStdParVariant(VariantID vid, size_t tune_idx) void MEMCPY::setStdParTuningDefinitions(VariantID vid) { - if (vid == Base_StdPar || vid == RAJA_StdPar) { + if (vid == Base_StdPar) { addVariantTuningName(vid, "library"); } diff --git a/src/algorithm/MEMCPY.cpp b/src/algorithm/MEMCPY.cpp index 80c7f4f62..08bf79731 100644 --- a/src/algorithm/MEMCPY.cpp +++ b/src/algorithm/MEMCPY.cpp @@ -54,7 +54,6 @@ MEMCPY::MEMCPY(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } MEMCPY::~MEMCPY() diff --git a/src/algorithm/MEMSET-StdPar.cpp b/src/algorithm/MEMSET-StdPar.cpp index 8ffba6f3f..835b27b74 100644 --- a/src/algorithm/MEMSET-StdPar.cpp +++ b/src/algorithm/MEMSET-StdPar.cpp @@ -23,7 +23,6 @@ namespace algorithm void MEMSET::runStdParVariantLibrary(VariantID vid) { #if defined(RUN_STDPAR) - const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); @@ -46,23 +45,6 @@ void MEMSET::runStdParVariantLibrary(VariantID vid) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - camp::resources::Host res = camp::resources::Host::get_default(); - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - res.memset(MEMSET_STD_ARGS); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n MEMSET : Unknown variant id = " << vid << std::endl; } @@ -126,24 +108,6 @@ void MEMSET::runStdParVariantDefault(VariantID vid) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( RAJA::RangeSegment(ibegin, iend), - [=](Index_type i) { - MEMSET_BODY; - }); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n MEMSET : Unknown variant id = " << vid << std::endl; } @@ -157,7 +121,7 @@ void MEMSET::runStdParVariant(VariantID vid, size_t tune_idx) { size_t t = 0; - if (vid == Base_StdPar || vid == RAJA_StdPar) { + if (vid == Base_StdPar) { if (tune_idx == t) { @@ -180,7 +144,7 @@ void MEMSET::runStdParVariant(VariantID vid, size_t tune_idx) void MEMSET::setStdParTuningDefinitions(VariantID vid) { - if (vid == Base_StdPar || vid == RAJA_StdPar) { + if (vid == Base_StdPar) { addVariantTuningName(vid, "library"); } diff --git a/src/algorithm/MEMSET.cpp b/src/algorithm/MEMSET.cpp index 3cf345bd6..22091b49e 100644 --- a/src/algorithm/MEMSET.cpp +++ b/src/algorithm/MEMSET.cpp @@ -55,7 +55,6 @@ MEMSET::MEMSET(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } MEMSET::~MEMSET() diff --git a/src/algorithm/REDUCE_SUM-StdPar.cpp b/src/algorithm/REDUCE_SUM-StdPar.cpp index b2e010833..08cbd206b 100644 --- a/src/algorithm/REDUCE_SUM-StdPar.cpp +++ b/src/algorithm/REDUCE_SUM-StdPar.cpp @@ -78,28 +78,6 @@ void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::ReduceSum sum(m_sum_init); - - RAJA::forall( RAJA::RangeSegment(ibegin, iend), - [=](Index_type i) { - REDUCE_SUM_BODY; - }); - - m_sum = sum.get(); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n REDUCE_SUM : Unknown variant id = " << vid << std::endl; } diff --git a/src/algorithm/SCAN-StdPar.cpp b/src/algorithm/SCAN-StdPar.cpp index f6095642d..5e6638e4b 100644 --- a/src/algorithm/SCAN-StdPar.cpp +++ b/src/algorithm/SCAN-StdPar.cpp @@ -48,21 +48,6 @@ void SCAN::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::exclusive_scan(RAJA_SCAN_ARGS); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n SCAN : Unknown variant id = " << vid << std::endl; } diff --git a/src/algorithm/SORT-StdPar.cpp b/src/algorithm/SORT-StdPar.cpp index 3fcee72d6..5a6fd384c 100644 --- a/src/algorithm/SORT-StdPar.cpp +++ b/src/algorithm/SORT-StdPar.cpp @@ -45,21 +45,6 @@ void SORT::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::sort(RAJA_SORT_ARGS); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n SORT : Unknown variant id = " << vid << std::endl; } diff --git a/src/algorithm/SORT.cpp b/src/algorithm/SORT.cpp index 15192f500..1e5fb07b4 100644 --- a/src/algorithm/SORT.cpp +++ b/src/algorithm/SORT.cpp @@ -43,7 +43,6 @@ SORT::SORT(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Base_StdPar ); - setVariantDefined( RAJA_StdPar ); } SORT::~SORT() diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp index daa603e7f..dcb0f3a5c 100644 --- a/src/algorithm/SORTPAIRS-StdPar.cpp +++ b/src/algorithm/SORTPAIRS-StdPar.cpp @@ -72,21 +72,6 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::sort_pairs(RAJA_SORTPAIRS_ARGS); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n SORTPAIRS : Unknown variant id = " << vid << std::endl; } diff --git a/src/algorithm/SORTPAIRS.cpp b/src/algorithm/SORTPAIRS.cpp index 882527eb1..db9a0f8af 100644 --- a/src/algorithm/SORTPAIRS.cpp +++ b/src/algorithm/SORTPAIRS.cpp @@ -43,7 +43,6 @@ SORTPAIRS::SORTPAIRS(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Base_StdPar ); - setVariantDefined( RAJA_StdPar ); } SORTPAIRS::~SORTPAIRS() diff --git a/src/apps/CONVECTION3DPA-StdPar.cpp b/src/apps/CONVECTION3DPA-StdPar.cpp index 119d99b0a..b8c36646e 100644 --- a/src/apps/CONVECTION3DPA-StdPar.cpp +++ b/src/apps/CONVECTION3DPA-StdPar.cpp @@ -135,186 +135,6 @@ void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar: { - - using launch_policy = RAJA::expt::LaunchPolicy; - - using outer_x = RAJA::expt::LoopPolicy; - - using inner_x = RAJA::expt::LoopPolicy; - - using inner_y = RAJA::expt::LoopPolicy; - - using inner_z = RAJA::expt::LoopPolicy; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - // Grid is empty as the host does not need a compute grid to be specified - RAJA::expt::launch( - RAJA::expt::Grid(), - [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, NE), - [&](int e) { - - CONVECTION3DPA_0_CPU; - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dx) { - - CONVECTION3DPA_1; - - } // lambda (dx) - ); // RAJA::expt::loop - } // lambda (dy) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qx) { - - CONVECTION3DPA_2; - - } // lambda (dx) - ); // RAJA::expt::loop - } // lambda (dy) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qy) { - - CONVECTION3DPA_3; - - } // lambda (dy) - ); // RAJA::expt::loop - } // lambda (dx) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qz) { - - CONVECTION3DPA_4; - - } // lambda (qz) - ); // RAJA::expt::loop - } // lambda (qy) - ); //RAJA::expt::loop - } // lambda (qx) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qx) { - - CONVECTION3DPA_5; - - } // lambda (qx) - ); // RAJA::expt::loop - } // lambda (qy) - ); //RAJA::expt::loop - } // lambda (qz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dz) { - - CONVECTION3DPA_6; - - } // lambda (dz) - ); // RAJA::expt::loop - } // lambda (qy) - ); //RAJA::expt::loop - } // lambda (qx) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_Q1D), - [&](int qx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dy) { - - CONVECTION3DPA_7; - - } // lambda (dy) - ); // RAJA::expt::loop - } // lambda (qx) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, CPA_D1D), - [&](int dx) { - - CONVECTION3DPA_8; - - } // lambda (dx) - ); // RAJA::expt::loop - } // lambda (dy) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - } // lambda (e) - ); // RAJA::expt::loop - - } // outer lambda (ctx) - ); // RAJA::expt::launch - } // loop over kernel reps - stopTimer(); - - return; - } -#endif // RUN_RAJA_SEQ - default: getCout() << "\n CONVECTION3DPA : Unknown StdPar variant id = " << vid << std::endl; diff --git a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp index 469bd93bd..7b9216949 100644 --- a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp +++ b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp @@ -84,30 +84,6 @@ void DEL_DOT_VEC_2D::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - camp::resources::Resource working_res{camp::resources::Host()}; - RAJA::TypedListSegment zones(m_domain->real_zones, - m_domain->n_real_zones, - working_res); - - auto deldotvec2d_lam = [=](Index_type i) { - DEL_DOT_VEC_2D_BODY; - }; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall(zones, deldotvec2d_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n DEL_DOT_VEC_2D : Unknown variant id = " << vid << std::endl; } diff --git a/src/apps/DEL_DOT_VEC_2D.cpp b/src/apps/DEL_DOT_VEC_2D.cpp index 7e3bf7579..821741f6d 100644 --- a/src/apps/DEL_DOT_VEC_2D.cpp +++ b/src/apps/DEL_DOT_VEC_2D.cpp @@ -65,7 +65,6 @@ DEL_DOT_VEC_2D::DEL_DOT_VEC_2D(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } DEL_DOT_VEC_2D::~DEL_DOT_VEC_2D() diff --git a/src/apps/DIFFUSION3DPA-StdPar.cpp b/src/apps/DIFFUSION3DPA-StdPar.cpp index 608a4d665..ae21b8df0 100644 --- a/src/apps/DIFFUSION3DPA-StdPar.cpp +++ b/src/apps/DIFFUSION3DPA-StdPar.cpp @@ -118,205 +118,6 @@ void DIFFUSION3DPA::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_Seq: { - - // Currently Teams requires two policies if compiled with a device - using launch_policy = RAJA::expt::LaunchPolicy; - - using outer_x = RAJA::expt::LoopPolicy; - - using inner_x = RAJA::expt::LoopPolicy; - - using inner_y = RAJA::expt::LoopPolicy; - - using inner_z = RAJA::expt::LoopPolicy; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - // Grid is empty as the host does not need a compute grid to be specified - RAJA::expt::launch( - RAJA::expt::Grid(), - [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, NE), - [&](int e) { - - DIFFUSION3DPA_0_CPU; - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dx) { - - DIFFUSION3DPA_1; - - } // lambda (dx) - ); // RAJA::expt::loop - } // lambda (dy) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, 1), - [&](int RAJA_UNUSED_ARG(dz)) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int qx) { - - DIFFUSION3DPA_2; - - } // lambda (qx) - ); // RAJA::expt::loop - } // lambda (dy) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int qx) { - - DIFFUSION3DPA_3; - - } // lambda (qx) - ); // RAJA::expt::loop - } // lambda (dy) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int qx) { - - DIFFUSION3DPA_4; - - } // lambda (qx) - ); // RAJA::expt::loop - } // lambda (qy) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int qz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int qx) { - - DIFFUSION3DPA_5; - - } // lambda (qx) - ); // RAJA::expt::loop - } // lambda (qy) - ); //RAJA::expt::loop - } // lambda (qz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, 1), - [&](int RAJA_UNUSED_ARG(dz)) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int d) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int q) { - - DIFFUSION3DPA_6; - - } // lambda (q) - ); // RAJA::expt::loop - } // lambda (d) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int qz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dx) { - - DIFFUSION3DPA_7; - - } // lambda (dx) - ); // RAJA::expt::loop - } // lambda (qy) - ); //RAJA::expt::loop - } // lambda (qz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_Q1D), - [&](int qz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dx) { - - DIFFUSION3DPA_8; - - } // lambda (dx) - ); // RAJA::expt::loop - } // lambda (dy) - ); //RAJA::expt::loop - } // lambda (qz) - ); //RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dz) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, DPA_D1D), - [&](int dx) { - - DIFFUSION3DPA_9; - - } // lambda (dx) - ); // RAJA::expt::loop - } // lambda (dy) - ); //RAJA::expt::loop - } // lambda (dz) - ); //RAJA::expt::loop - - } // lambda (e) - ); // RAJA::expt::loop - - } // outer lambda (ctx) - ); // RAJA::expt::launch - } // loop over kernel reps - stopTimer(); - - return; - } -#endif // RUN_RAJA_STDPAR - default: getCout() << "\n DIFFUSION3DPA : Unknown StdPar variant id = " << vid << std::endl; diff --git a/src/apps/DIFFUSION3DPA.cpp b/src/apps/DIFFUSION3DPA.cpp index 69ee1aa3a..4ab3fa5bb 100644 --- a/src/apps/DIFFUSION3DPA.cpp +++ b/src/apps/DIFFUSION3DPA.cpp @@ -66,7 +66,6 @@ DIFFUSION3DPA::DIFFUSION3DPA(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Base_StdPar ); - setVariantDefined( RAJA_StdPar ); } DIFFUSION3DPA::~DIFFUSION3DPA() diff --git a/src/apps/ENERGY-StdPar.cpp b/src/apps/ENERGY-StdPar.cpp index f8c054fa4..7c353618e 100644 --- a/src/apps/ENERGY-StdPar.cpp +++ b/src/apps/ENERGY-StdPar.cpp @@ -148,41 +148,6 @@ void ENERGY::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::region( [=]() { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), energy_lam1); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), energy_lam2); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), energy_lam3); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), energy_lam4); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), energy_lam5); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), energy_lam6); - - }); // end sequential region (for single-source code) - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n ENERGY : Unknown variant id = " << vid << std::endl; } diff --git a/src/apps/ENERGY.cpp b/src/apps/ENERGY.cpp index 66f796db1..e775aca5c 100644 --- a/src/apps/ENERGY.cpp +++ b/src/apps/ENERGY.cpp @@ -65,7 +65,6 @@ ENERGY::ENERGY(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } ENERGY::~ENERGY() diff --git a/src/apps/FIR-StdPar.cpp b/src/apps/FIR-StdPar.cpp index befd29fa4..4a7cc4235 100644 --- a/src/apps/FIR-StdPar.cpp +++ b/src/apps/FIR-StdPar.cpp @@ -78,22 +78,6 @@ void FIR::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), fir_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n FIR : Unknown variant id = " << vid << std::endl; } diff --git a/src/apps/FIR.cpp b/src/apps/FIR.cpp index 90871a160..25241fac6 100644 --- a/src/apps/FIR.cpp +++ b/src/apps/FIR.cpp @@ -59,7 +59,6 @@ FIR::FIR(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } FIR::~FIR() diff --git a/src/apps/HALOEXCHANGE-StdPar.cpp b/src/apps/HALOEXCHANGE-StdPar.cpp index 6043185a2..b8564868e 100644 --- a/src/apps/HALOEXCHANGE-StdPar.cpp +++ b/src/apps/HALOEXCHANGE-StdPar.cpp @@ -121,53 +121,6 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - using EXEC_POL = RAJA::loop_exec; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - for (Index_type l = 0; l < num_neighbors; ++l) { - Real_ptr buffer = buffers[l]; - Int_ptr list = pack_index_lists[l]; - Index_type len = pack_index_list_lengths[l]; - for (Index_type v = 0; v < num_vars; ++v) { - Real_ptr var = vars[v]; - auto haloexchange_pack_base_lam = [=](Index_type i) { - HALOEXCHANGE_PACK_BODY; - }; - RAJA::forall( - RAJA::TypedRangeSegment(0, len), - haloexchange_pack_base_lam ); - buffer += len; - } - } - - for (Index_type l = 0; l < num_neighbors; ++l) { - Real_ptr buffer = buffers[l]; - Int_ptr list = unpack_index_lists[l]; - Index_type len = unpack_index_list_lengths[l]; - for (Index_type v = 0; v < num_vars; ++v) { - Real_ptr var = vars[v]; - auto haloexchange_unpack_base_lam = [=](Index_type i) { - HALOEXCHANGE_UNPACK_BODY; - }; - RAJA::forall( - RAJA::TypedRangeSegment(0, len), - haloexchange_unpack_base_lam ); - buffer += len; - } - } - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n HALOEXCHANGE : Unknown variant id = " << vid << std::endl; } diff --git a/src/apps/HALOEXCHANGE.cpp b/src/apps/HALOEXCHANGE.cpp index 35c9839b1..4616d633c 100644 --- a/src/apps/HALOEXCHANGE.cpp +++ b/src/apps/HALOEXCHANGE.cpp @@ -101,7 +101,6 @@ HALOEXCHANGE::HALOEXCHANGE(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } HALOEXCHANGE::~HALOEXCHANGE() diff --git a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp index 242759fa2..c89f014e7 100644 --- a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp +++ b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp @@ -171,86 +171,6 @@ void HALOEXCHANGE_FUSED::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - using AllocatorHolder = RAJAPoolAllocatorHolder< - RAJA::basic_mempool::MemPool>; - using Allocator = AllocatorHolder::Allocator; - - AllocatorHolder allocatorHolder; - - using workgroup_policy = RAJA::WorkGroupPolicy < - RAJA::loop_work, - RAJA::ordered, - RAJA::constant_stride_array_of_objects >; - - using workpool = RAJA::WorkPool< workgroup_policy, - Index_type, - RAJA::xargs<>, - Allocator >; - - using workgroup = RAJA::WorkGroup< workgroup_policy, - Index_type, - RAJA::xargs<>, - Allocator >; - - using worksite = RAJA::WorkSite< workgroup_policy, - Index_type, - RAJA::xargs<>, - Allocator >; - - workpool pool_pack (allocatorHolder.template getAllocator()); - workpool pool_unpack(allocatorHolder.template getAllocator()); - pool_pack.reserve(num_neighbors * num_vars, 1024ull*1024ull); - pool_unpack.reserve(num_neighbors * num_vars, 1024ull*1024ull); - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - for (Index_type l = 0; l < num_neighbors; ++l) { - Real_ptr buffer = buffers[l]; - Int_ptr list = pack_index_lists[l]; - Index_type len = pack_index_list_lengths[l]; - for (Index_type v = 0; v < num_vars; ++v) { - Real_ptr var = vars[v]; - auto haloexchange_fused_pack_base_lam = [=](Index_type i) { - HALOEXCHANGE_FUSED_PACK_BODY; - }; - pool_pack.enqueue( - RAJA::TypedRangeSegment(0, len), - haloexchange_fused_pack_base_lam ); - buffer += len; - } - } - workgroup group_pack = pool_pack.instantiate(); - worksite site_pack = group_pack.run(); - - for (Index_type l = 0; l < num_neighbors; ++l) { - Real_ptr buffer = buffers[l]; - Int_ptr list = unpack_index_lists[l]; - Index_type len = unpack_index_list_lengths[l]; - for (Index_type v = 0; v < num_vars; ++v) { - Real_ptr var = vars[v]; - auto haloexchange_fused_unpack_base_lam = [=](Index_type i) { - HALOEXCHANGE_FUSED_UNPACK_BODY; - }; - pool_unpack.enqueue( - RAJA::TypedRangeSegment(0, len), - haloexchange_fused_unpack_base_lam ); - buffer += len; - } - } - workgroup group_unpack = pool_unpack.instantiate(); - worksite site_unpack = group_unpack.run(); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n HALOEXCHANGE_FUSED : Unknown variant id = " << vid << std::endl; } diff --git a/src/apps/HALOEXCHANGE_FUSED.cpp b/src/apps/HALOEXCHANGE_FUSED.cpp index 272d66de4..9bcefe0a9 100644 --- a/src/apps/HALOEXCHANGE_FUSED.cpp +++ b/src/apps/HALOEXCHANGE_FUSED.cpp @@ -101,7 +101,6 @@ HALOEXCHANGE_FUSED::HALOEXCHANGE_FUSED(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } HALOEXCHANGE_FUSED::~HALOEXCHANGE_FUSED() diff --git a/src/apps/LTIMES-StdPar.cpp b/src/apps/LTIMES-StdPar.cpp index ba8ae1332..aab2c55bc 100644 --- a/src/apps/LTIMES-StdPar.cpp +++ b/src/apps/LTIMES-StdPar.cpp @@ -84,45 +84,6 @@ void LTIMES::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - LTIMES_VIEWS_RANGES_RAJA; - - auto ltimes_lam = [=](ID d, IZ z, IG g, IM m) { - LTIMES_BODY_RAJA; - }; - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<1, RAJA::loop_exec, // z - RAJA::statement::For<2, RAJA::loop_exec, // g - RAJA::statement::For<3, RAJA::loop_exec, // m - RAJA::statement::For<0, RAJA::loop_exec, // d - RAJA::statement::Lambda<0> - > - > - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel( RAJA::make_tuple(IDRange(0, num_d), - IZRange(0, num_z), - IGRange(0, num_g), - IMRange(0, num_m)), - ltimes_lam - ); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n LTIMES : Unknown variant id = " << vid << std::endl; } diff --git a/src/apps/LTIMES.cpp b/src/apps/LTIMES.cpp index 9d170071b..80145a78a 100644 --- a/src/apps/LTIMES.cpp +++ b/src/apps/LTIMES.cpp @@ -80,7 +80,6 @@ LTIMES::LTIMES(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } LTIMES::~LTIMES() diff --git a/src/apps/LTIMES_NOVIEW-StdPar.cpp b/src/apps/LTIMES_NOVIEW-StdPar.cpp index 0ff1a9d40..d35e03a3d 100644 --- a/src/apps/LTIMES_NOVIEW-StdPar.cpp +++ b/src/apps/LTIMES_NOVIEW-StdPar.cpp @@ -84,39 +84,6 @@ void LTIMES_NOVIEW::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<1, RAJA::loop_exec, // z - RAJA::statement::For<2, RAJA::loop_exec, // g - RAJA::statement::For<3, RAJA::loop_exec, // m - RAJA::statement::For<0, RAJA::loop_exec, // d - RAJA::statement::Lambda<0> - > - > - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment(0, num_d), - RAJA::RangeSegment(0, num_z), - RAJA::RangeSegment(0, num_g), - RAJA::RangeSegment(0, num_m)), - ltimesnoview_lam - ); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n LTIMES_NOVIEW : Unknown variant id = " << vid << std::endl; } diff --git a/src/apps/LTIMES_NOVIEW.cpp b/src/apps/LTIMES_NOVIEW.cpp index a4f53d360..baaeacda5 100644 --- a/src/apps/LTIMES_NOVIEW.cpp +++ b/src/apps/LTIMES_NOVIEW.cpp @@ -79,7 +79,6 @@ LTIMES_NOVIEW::LTIMES_NOVIEW(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } LTIMES_NOVIEW::~LTIMES_NOVIEW() diff --git a/src/apps/MASS3DPA-StdPar.cpp b/src/apps/MASS3DPA-StdPar.cpp index ad347f5d8..2fdbdcdcf 100644 --- a/src/apps/MASS3DPA-StdPar.cpp +++ b/src/apps/MASS3DPA-StdPar.cpp @@ -100,118 +100,6 @@ void MASS3DPA::runStdParVariant(VariantID vid, size_t tune_idx) { break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar: { - - //Currently Teams requires two policies if compiled with a device - using launch_policy = RAJA::expt::LaunchPolicy; - - using outer_x = RAJA::expt::LoopPolicy; - - using inner_x = RAJA::expt::LoopPolicy; - - using inner_y = RAJA::expt::LoopPolicy; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::expt::launch( - RAJA::expt::HOST, RAJA::expt::Resources(), - [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, NE), [&](int e) { - - MASS3DPA_0_CPU - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { - MASS3DPA_1 - }); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int dx) { - MASS3DPA_2 - }); - }); - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { - MASS3DPA_3 - }); - }); - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { - MASS3DPA_4 - }); - }); - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qx) { - MASS3DPA_5 - }); - }); - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int d) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int q) { - MASS3DPA_6 - }); - }); - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_Q1D), [&](int qy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { - MASS3DPA_7 - }); - }); - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { - MASS3DPA_8 - }); - }); - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dy) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, MPA_D1D), [&](int dx) { - MASS3DPA_9 - }); - }); - }); - }); - } - stopTimer(); - - return; - } -#endif // RUN_RAJA_STDPAR - default: getCout() << "\n MASS3DPA : Unknown StdPar variant id = " << vid << std::endl; } diff --git a/src/apps/MASS3DPA.cpp b/src/apps/MASS3DPA.cpp index c951336ad..29c2a9ca7 100644 --- a/src/apps/MASS3DPA.cpp +++ b/src/apps/MASS3DPA.cpp @@ -62,7 +62,6 @@ MASS3DPA::MASS3DPA(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Base_StdPar ); - setVariantDefined( RAJA_StdPar ); } MASS3DPA::~MASS3DPA() diff --git a/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp b/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp index f1326230e..1be53f986 100644 --- a/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp +++ b/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp @@ -73,30 +73,6 @@ void NODAL_ACCUMULATION_3D::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUS break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - camp::resources::Resource working_res{camp::resources::Host()}; - RAJA::TypedListSegment zones(m_domain->real_zones, - m_domain->n_real_zones, - working_res); - - auto nodal_accumulation_3d_lam = [=](Index_type i) { - NODAL_ACCUMULATION_3D_RAJA_ATOMIC_BODY(RAJA::seq_atomic); - }; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall(zones, nodal_accumulation_3d_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n NODAL_ACCUMULATION_3D : Unknown variant id = " << vid << std::endl; } diff --git a/src/apps/NODAL_ACCUMULATION_3D.cpp b/src/apps/NODAL_ACCUMULATION_3D.cpp index ef652b4a4..c3be0a82f 100644 --- a/src/apps/NODAL_ACCUMULATION_3D.cpp +++ b/src/apps/NODAL_ACCUMULATION_3D.cpp @@ -70,7 +70,6 @@ NODAL_ACCUMULATION_3D::NODAL_ACCUMULATION_3D(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } NODAL_ACCUMULATION_3D::~NODAL_ACCUMULATION_3D() diff --git a/src/apps/PRESSURE-StdPar.cpp b/src/apps/PRESSURE-StdPar.cpp index 301e30719..17fc0eedf 100644 --- a/src/apps/PRESSURE-StdPar.cpp +++ b/src/apps/PRESSURE-StdPar.cpp @@ -88,29 +88,6 @@ void PRESSURE::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::region( [=]() { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), pressure_lam1); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), pressure_lam2); - - }); // end sequential region (for single-source code) - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n PRESSURE : Unknown variant id = " << vid << std::endl; } diff --git a/src/apps/PRESSURE.cpp b/src/apps/PRESSURE.cpp index 29fc72adc..70cdab10b 100644 --- a/src/apps/PRESSURE.cpp +++ b/src/apps/PRESSURE.cpp @@ -55,7 +55,6 @@ PRESSURE::PRESSURE(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } PRESSURE::~PRESSURE() diff --git a/src/apps/VOL3D-StdPar.cpp b/src/apps/VOL3D-StdPar.cpp index 93cd2a941..324d2bbcc 100644 --- a/src/apps/VOL3D-StdPar.cpp +++ b/src/apps/VOL3D-StdPar.cpp @@ -79,22 +79,6 @@ void VOL3D::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), vol3d_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n VOL3D : Unknown variant id = " << vid << std::endl; } diff --git a/src/apps/VOL3D.cpp b/src/apps/VOL3D.cpp index b05511f99..988b54bc4 100644 --- a/src/apps/VOL3D.cpp +++ b/src/apps/VOL3D.cpp @@ -67,7 +67,6 @@ VOL3D::VOL3D(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } VOL3D::~VOL3D() diff --git a/src/basic/DAXPY-StdPar.cpp b/src/basic/DAXPY-StdPar.cpp index 3615eeee8..66d09cd9e 100644 --- a/src/basic/DAXPY-StdPar.cpp +++ b/src/basic/DAXPY-StdPar.cpp @@ -72,22 +72,6 @@ void DAXPY::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), daxpy_lam); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n DAXPY : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/DAXPY.cpp b/src/basic/DAXPY.cpp index a0cd60977..93f2cf9d4 100644 --- a/src/basic/DAXPY.cpp +++ b/src/basic/DAXPY.cpp @@ -54,7 +54,6 @@ DAXPY::DAXPY(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/DAXPY_ATOMIC-StdPar.cpp b/src/basic/DAXPY_ATOMIC-StdPar.cpp index a0e887e7c..911e8de6e 100644 --- a/src/basic/DAXPY_ATOMIC-StdPar.cpp +++ b/src/basic/DAXPY_ATOMIC-StdPar.cpp @@ -73,25 +73,6 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), - [=](Index_type i) { - DAXPY_ATOMIC_RAJA_BODY(RAJA::seq_atomic); - }); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n DAXPY_ATOMIC : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/IF_QUAD-StdPar.cpp b/src/basic/IF_QUAD-StdPar.cpp index 137fdd0e6..dac40ccab 100644 --- a/src/basic/IF_QUAD-StdPar.cpp +++ b/src/basic/IF_QUAD-StdPar.cpp @@ -73,22 +73,6 @@ void IF_QUAD::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), ifquad_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n IF_QUAD : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/IF_QUAD.cpp b/src/basic/IF_QUAD.cpp index 799c02865..55b182a2b 100644 --- a/src/basic/IF_QUAD.cpp +++ b/src/basic/IF_QUAD.cpp @@ -58,7 +58,6 @@ IF_QUAD::IF_QUAD(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/INDEXLIST_3LOOP-StdPar.cpp b/src/basic/INDEXLIST_3LOOP-StdPar.cpp index 39657f1ce..f07bdd583 100644 --- a/src/basic/INDEXLIST_3LOOP-StdPar.cpp +++ b/src/basic/INDEXLIST_3LOOP-StdPar.cpp @@ -121,45 +121,6 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - INDEXLIST_3LOOP_DATA_SETUP_StdPar; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::ReduceSum len(0); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), - [=](Index_type i) { - counts[i] = (INDEXLIST_3LOOP_CONDITIONAL) ? 1 : 0; - }); - - RAJA::exclusive_scan_inplace( - RAJA::make_span(counts+ibegin, iend+1-ibegin)); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), - [=](Index_type i) { - if (counts[i] != counts[i+1]) { - list[counts[i]] = i; - len += 1; - } - }); - - m_len = len.get(); - - } - stopTimer(); - - INDEXLIST_3LOOP_DATA_TEARDOWN_StdPar; - - break; - } -#endif - default : { getCout() << "\n INDEXLIST_3LOOP : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/INIT3-StdPar.cpp b/src/basic/INIT3-StdPar.cpp index d176c3b42..a01964a85 100644 --- a/src/basic/INIT3-StdPar.cpp +++ b/src/basic/INIT3-StdPar.cpp @@ -72,22 +72,6 @@ void INIT3::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), init3_lam); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n INIT3 : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/INIT3.cpp b/src/basic/INIT3.cpp index 990278e36..6f750553f 100644 --- a/src/basic/INIT3.cpp +++ b/src/basic/INIT3.cpp @@ -54,7 +54,6 @@ INIT3::INIT3(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/INIT_VIEW1D-StdPar.cpp b/src/basic/INIT_VIEW1D-StdPar.cpp index 30c190fdd..13cc0fdf5 100644 --- a/src/basic/INIT_VIEW1D-StdPar.cpp +++ b/src/basic/INIT_VIEW1D-StdPar.cpp @@ -73,28 +73,6 @@ void INIT_VIEW1D::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - INIT_VIEW1D_VIEW_RAJA; - - auto initview1d_lam = [=](Index_type i) { - INIT_VIEW1D_BODY_RAJA; - }; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), initview1d_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n INIT_VIEW1D : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/INIT_VIEW1D.cpp b/src/basic/INIT_VIEW1D.cpp index ea68d0951..bb7195b16 100644 --- a/src/basic/INIT_VIEW1D.cpp +++ b/src/basic/INIT_VIEW1D.cpp @@ -55,7 +55,6 @@ INIT_VIEW1D::INIT_VIEW1D(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp index c6ff05190..e60db90b2 100644 --- a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp @@ -73,28 +73,6 @@ void INIT_VIEW1D_OFFSET::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - INIT_VIEW1D_OFFSET_VIEW_RAJA; - - auto initview1doffset_lam = [=](Index_type i) { - INIT_VIEW1D_OFFSET_BODY_RAJA; - }; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), initview1doffset_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n INIT_VIEW1D_OFFSET : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/INIT_VIEW1D_OFFSET.cpp b/src/basic/INIT_VIEW1D_OFFSET.cpp index 1c482cec7..9918f8c0a 100644 --- a/src/basic/INIT_VIEW1D_OFFSET.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET.cpp @@ -55,7 +55,6 @@ INIT_VIEW1D_OFFSET::INIT_VIEW1D_OFFSET(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/MAT_MAT_SHARED.cpp b/src/basic/MAT_MAT_SHARED.cpp index 747aa8413..454bb2eed 100644 --- a/src/basic/MAT_MAT_SHARED.cpp +++ b/src/basic/MAT_MAT_SHARED.cpp @@ -61,9 +61,8 @@ MAT_MAT_SHARED::MAT_MAT_SHARED(const RunParams ¶ms) setVariantDefined(Lambda_HIP); setVariantDefined(RAJA_HIP); - //setVariantDefined( Base_StdPar ); - //setVariantDefined( Lambda_StdPar ); - //setVariantDefined( RAJA_StdPar ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } MAT_MAT_SHARED::~MAT_MAT_SHARED() {} diff --git a/src/basic/MULADDSUB-StdPar.cpp b/src/basic/MULADDSUB-StdPar.cpp index b76f667b7..9f01a117e 100644 --- a/src/basic/MULADDSUB-StdPar.cpp +++ b/src/basic/MULADDSUB-StdPar.cpp @@ -72,22 +72,6 @@ void MULADDSUB::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), mas_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n MULADDSUB : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/MULADDSUB.cpp b/src/basic/MULADDSUB.cpp index 8e6b76b5d..f3020dfbb 100644 --- a/src/basic/MULADDSUB.cpp +++ b/src/basic/MULADDSUB.cpp @@ -54,7 +54,6 @@ MULADDSUB::MULADDSUB(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/NESTED_INIT-StdPar.cpp b/src/basic/NESTED_INIT-StdPar.cpp index 1041a9953..705ed38bd 100644 --- a/src/basic/NESTED_INIT-StdPar.cpp +++ b/src/basic/NESTED_INIT-StdPar.cpp @@ -105,36 +105,6 @@ void NESTED_INIT::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<2, RAJA::loop_exec, // k - RAJA::statement::For<1, RAJA::loop_exec, // j - RAJA::statement::For<0, RAJA::loop_exec,// i - RAJA::statement::Lambda<0> - > - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment(0, ni), - RAJA::RangeSegment(0, nj), - RAJA::RangeSegment(0, nk)), - nestedinit_lam - ); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n NESTED_INIT : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/NESTED_INIT.cpp b/src/basic/NESTED_INIT.cpp index 3c01fe350..a4be4f273 100644 --- a/src/basic/NESTED_INIT.cpp +++ b/src/basic/NESTED_INIT.cpp @@ -65,7 +65,6 @@ NESTED_INIT::NESTED_INIT(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/PI_ATOMIC-StdPar.cpp b/src/basic/PI_ATOMIC-StdPar.cpp index 491a2cfd3..d73f13814 100644 --- a/src/basic/PI_ATOMIC-StdPar.cpp +++ b/src/basic/PI_ATOMIC-StdPar.cpp @@ -85,27 +85,6 @@ void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - *pi = m_pi_init; - RAJA::forall( RAJA::RangeSegment(ibegin, iend), - [=](Index_type i) { - double x = (double(i) + 0.5) * dx; - RAJA::atomicAdd(pi, dx / (1.0 + x * x)); - }); - *pi *= 4.0; - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/PI_ATOMIC.cpp b/src/basic/PI_ATOMIC.cpp index 0633887a0..6a15d4784 100644 --- a/src/basic/PI_ATOMIC.cpp +++ b/src/basic/PI_ATOMIC.cpp @@ -56,7 +56,6 @@ PI_ATOMIC::PI_ATOMIC(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/PI_REDUCE-StdPar.cpp b/src/basic/PI_REDUCE-StdPar.cpp index e1f37eea4..a3fc51531 100644 --- a/src/basic/PI_REDUCE-StdPar.cpp +++ b/src/basic/PI_REDUCE-StdPar.cpp @@ -80,28 +80,6 @@ void PI_REDUCE::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::ReduceSum pi(m_pi_init); - - RAJA::forall( RAJA::RangeSegment(ibegin, iend), - [=](Index_type i) { - PI_REDUCE_BODY; - }); - - m_pi = 4.0 * pi.get(); - - } - stopTimer(); - - break; - } -#endif - default : { getCout() << "\n PI_REDUCE : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/PI_REDUCE.cpp b/src/basic/PI_REDUCE.cpp index 5af375f56..62a20bd57 100644 --- a/src/basic/PI_REDUCE.cpp +++ b/src/basic/PI_REDUCE.cpp @@ -54,7 +54,6 @@ PI_REDUCE::PI_REDUCE(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } PI_REDUCE::~PI_REDUCE() diff --git a/src/basic/REDUCE3_INT-StdPar.cpp b/src/basic/REDUCE3_INT-StdPar.cpp index f7b5f5dd5..b2ada68e7 100644 --- a/src/basic/REDUCE3_INT-StdPar.cpp +++ b/src/basic/REDUCE3_INT-StdPar.cpp @@ -99,32 +99,6 @@ void REDUCE3_INT::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::ReduceSum vsum(m_vsum_init); - RAJA::ReduceMin vmin(m_vmin_init); - RAJA::ReduceMax vmax(m_vmax_init); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { - REDUCE3_INT_BODY_RAJA; - }); - - m_vsum += static_cast(vsum.get()); - m_vmin = RAJA_MIN(m_vmin, static_cast(vmin.get())); - m_vmax = RAJA_MAX(m_vmax, static_cast(vmax.get())); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n REDUCE3_INT : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/REDUCE3_INT.cpp b/src/basic/REDUCE3_INT.cpp index e39f0c031..2b9988268 100644 --- a/src/basic/REDUCE3_INT.cpp +++ b/src/basic/REDUCE3_INT.cpp @@ -59,7 +59,6 @@ REDUCE3_INT::REDUCE3_INT(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/REDUCE_STRUCT-StdPar.cpp b/src/basic/REDUCE_STRUCT-StdPar.cpp index 1264f8257..ee94e542c 100644 --- a/src/basic/REDUCE_STRUCT-StdPar.cpp +++ b/src/basic/REDUCE_STRUCT-StdPar.cpp @@ -108,39 +108,6 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::ReduceSum xsum(m_init_sum); - RAJA::ReduceSum ysum(m_init_sum); - RAJA::ReduceMin xmin(m_init_min); - RAJA::ReduceMin ymin(m_init_min); - RAJA::ReduceMax xmax(m_init_max); - RAJA::ReduceMax ymax(m_init_max); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { - REDUCE_STRUCT_BODY_RAJA; - }); - - points.SetCenter(xsum.get()/(points.N), - ysum.get()/(points.N)); - points.SetXMin(xmin.get()); - points.SetXMax(xmax.get()); - points.SetYMin(ymin.get()); - points.SetYMax(ymax.get()); - m_points=points; - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n REDUCE_STRUCT : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/TRAP_INT-StdPar.cpp b/src/basic/TRAP_INT-StdPar.cpp index 94907744c..e1e9b4cd0 100644 --- a/src/basic/TRAP_INT-StdPar.cpp +++ b/src/basic/TRAP_INT-StdPar.cpp @@ -94,28 +94,6 @@ void TRAP_INT::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::ReduceSum sumx(m_sumx_init); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { - TRAP_INT_BODY; - }); - - m_sumx += static_cast(sumx.get()) * h; - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n TRAP_INT : Unknown variant id = " << vid << std::endl; } diff --git a/src/basic/TRAP_INT.cpp b/src/basic/TRAP_INT.cpp index 7ddc1991b..5491ddcd6 100644 --- a/src/basic/TRAP_INT.cpp +++ b/src/basic/TRAP_INT.cpp @@ -54,7 +54,6 @@ TRAP_INT::TRAP_INT(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); setVariantDefined( Kokkos_Lambda ); } diff --git a/src/common/KernelBase.cpp b/src/common/KernelBase.cpp index 0b526afd0..2a777971d 100644 --- a/src/common/KernelBase.cpp +++ b/src/common/KernelBase.cpp @@ -254,14 +254,6 @@ void KernelBase::runKernel(VariantID vid, size_t tune_idx) break; } - case RAJA_StdPar : - { -#if defined(RUN_RAJA_STDPAR) - runStdParVariant(vid, tune_idx); -#endif - break; - } - case Kokkos_Lambda : { #if defined(RUN_KOKKOS) diff --git a/src/common/RAJAPerfSuite.cpp b/src/common/RAJAPerfSuite.cpp index 5c1144ef3..1b71a8401 100644 --- a/src/common/RAJAPerfSuite.cpp +++ b/src/common/RAJAPerfSuite.cpp @@ -280,7 +280,6 @@ static const std::string VariantNames [] = std::string("Base_StdPar"), std::string("Lambda_StdPar"), - std::string("RAJA_StdPar"), std::string("Kokkos_Lambda"), @@ -432,11 +431,6 @@ bool isVariantAvailable(VariantID vid) vid == Lambda_StdPar) { ret_val = true; } -#if defined(RUN_RAJA_STDPAR) - if ( vid == RAJA_StdPar ) { - ret_val = true; - } -#endif #if defined(RUN_KOKKOS) if ( vid == Kokkos_Lambda ) { @@ -503,11 +497,6 @@ bool isVariantGPU(VariantID vid) vid == Lambda_StdPar) { ret_val = true; } -#if defined(RUN_RAJA_STDPAR) - if ( vid == RAJA_StdPar ) { - ret_val = true; - } -#endif #if defined(RUN_KOKKOS) if ( vid == Kokkos_Lambda ) { diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index ab93280f1..b037d0c67 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -196,7 +196,6 @@ enum VariantID { Base_StdPar, Lambda_StdPar, - RAJA_StdPar, Kokkos_Lambda, diff --git a/src/lcals/DIFF_PREDICT-StdPar.cpp b/src/lcals/DIFF_PREDICT-StdPar.cpp index 19a843bfb..c38b3936c 100644 --- a/src/lcals/DIFF_PREDICT-StdPar.cpp +++ b/src/lcals/DIFF_PREDICT-StdPar.cpp @@ -72,22 +72,6 @@ void DIFF_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), diffpredict_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n DIFF_PREDICT : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/DIFF_PREDICT.cpp b/src/lcals/DIFF_PREDICT.cpp index e60d636e1..49a41deb8 100644 --- a/src/lcals/DIFF_PREDICT.cpp +++ b/src/lcals/DIFF_PREDICT.cpp @@ -52,7 +52,6 @@ DIFF_PREDICT::DIFF_PREDICT(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } DIFF_PREDICT::~DIFF_PREDICT() diff --git a/src/lcals/EOS-StdPar.cpp b/src/lcals/EOS-StdPar.cpp index 1022d79a1..f9281b86e 100644 --- a/src/lcals/EOS-StdPar.cpp +++ b/src/lcals/EOS-StdPar.cpp @@ -72,22 +72,6 @@ void EOS::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), eos_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n EOS : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/EOS.cpp b/src/lcals/EOS.cpp index b7b3813b3..8fc00ab30 100644 --- a/src/lcals/EOS.cpp +++ b/src/lcals/EOS.cpp @@ -60,7 +60,6 @@ EOS::EOS(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } EOS::~EOS() diff --git a/src/lcals/FIRST_DIFF-StdPar.cpp b/src/lcals/FIRST_DIFF-StdPar.cpp index 5e274c841..720371793 100644 --- a/src/lcals/FIRST_DIFF-StdPar.cpp +++ b/src/lcals/FIRST_DIFF-StdPar.cpp @@ -72,22 +72,6 @@ void FIRST_DIFF::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), firstdiff_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n FIRST_DIFF : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/FIRST_DIFF.cpp b/src/lcals/FIRST_DIFF.cpp index 54a7c0326..19cbfbb53 100644 --- a/src/lcals/FIRST_DIFF.cpp +++ b/src/lcals/FIRST_DIFF.cpp @@ -56,7 +56,6 @@ FIRST_DIFF::FIRST_DIFF(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } FIRST_DIFF::~FIRST_DIFF() diff --git a/src/lcals/FIRST_MIN-StdPar.cpp b/src/lcals/FIRST_MIN-StdPar.cpp index 3b797cbc6..ef6a11c93 100644 --- a/src/lcals/FIRST_MIN-StdPar.cpp +++ b/src/lcals/FIRST_MIN-StdPar.cpp @@ -76,29 +76,6 @@ void FIRST_MIN::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::ReduceMinLoc loc( - m_xmin_init, m_initloc); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { - FIRST_MIN_BODY_RAJA; - }); - - m_minloc = RAJA_MAX(m_minloc, loc.getLoc()); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n FIRST_MIN : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/FIRST_MIN.cpp b/src/lcals/FIRST_MIN.cpp index a1cffc072..e8825dd17 100644 --- a/src/lcals/FIRST_MIN.cpp +++ b/src/lcals/FIRST_MIN.cpp @@ -60,7 +60,6 @@ FIRST_MIN::FIRST_MIN(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } FIRST_MIN::~FIRST_MIN() diff --git a/src/lcals/FIRST_SUM-StdPar.cpp b/src/lcals/FIRST_SUM-StdPar.cpp index b02253819..c00a7c062 100644 --- a/src/lcals/FIRST_SUM-StdPar.cpp +++ b/src/lcals/FIRST_SUM-StdPar.cpp @@ -72,22 +72,6 @@ void FIRST_SUM::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), firstsum_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n FIRST_SUM : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/FIRST_SUM.cpp b/src/lcals/FIRST_SUM.cpp index 109c6499a..cda99863d 100644 --- a/src/lcals/FIRST_SUM.cpp +++ b/src/lcals/FIRST_SUM.cpp @@ -55,7 +55,6 @@ FIRST_SUM::FIRST_SUM(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } FIRST_SUM::~FIRST_SUM() diff --git a/src/lcals/GEN_LIN_RECUR-StdPar.cpp b/src/lcals/GEN_LIN_RECUR-StdPar.cpp index e16eebddd..fd4cf6ed3 100644 --- a/src/lcals/GEN_LIN_RECUR-StdPar.cpp +++ b/src/lcals/GEN_LIN_RECUR-StdPar.cpp @@ -92,25 +92,6 @@ void GEN_LIN_RECUR::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(0, N), genlinrecur_lam1); - - RAJA::forall( - RAJA::RangeSegment(1, N+1), genlinrecur_lam2); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n GEN_LIN_RECUR : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/GEN_LIN_RECUR.cpp b/src/lcals/GEN_LIN_RECUR.cpp index eb21f7f5c..0d5ea5ace 100644 --- a/src/lcals/GEN_LIN_RECUR.cpp +++ b/src/lcals/GEN_LIN_RECUR.cpp @@ -60,7 +60,6 @@ GEN_LIN_RECUR::GEN_LIN_RECUR(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } GEN_LIN_RECUR::~GEN_LIN_RECUR() diff --git a/src/lcals/HYDRO_1D-StdPar.cpp b/src/lcals/HYDRO_1D-StdPar.cpp index ce8d37a2c..7d02aaa5d 100644 --- a/src/lcals/HYDRO_1D-StdPar.cpp +++ b/src/lcals/HYDRO_1D-StdPar.cpp @@ -73,22 +73,6 @@ void HYDRO_1D::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), hydro1d_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n HYDRO_1D : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/HYDRO_1D.cpp b/src/lcals/HYDRO_1D.cpp index 32d40d978..bda89bec3 100644 --- a/src/lcals/HYDRO_1D.cpp +++ b/src/lcals/HYDRO_1D.cpp @@ -59,7 +59,6 @@ HYDRO_1D::HYDRO_1D(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } HYDRO_1D::~HYDRO_1D() diff --git a/src/lcals/HYDRO_2D-StdPar.cpp b/src/lcals/HYDRO_2D-StdPar.cpp index 5f6d3dbc2..8b8ff5c66 100644 --- a/src/lcals/HYDRO_2D-StdPar.cpp +++ b/src/lcals/HYDRO_2D-StdPar.cpp @@ -131,55 +131,6 @@ void HYDRO_2D::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - HYDRO_2D_VIEWS_RAJA; - - auto hydro2d_lam1 = [=] (Index_type k, Index_type j) { - HYDRO_2D_BODY1_RAJA; - }; - auto hydro2d_lam2 = [=] (Index_type k, Index_type j) { - HYDRO_2D_BODY2_RAJA; - }; - auto hydro2d_lam3 = [=] (Index_type k, Index_type j) { - HYDRO_2D_BODY3_RAJA; - }; - - using EXECPOL = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, // k - RAJA::statement::For<1, RAJA::loop_exec, // j - RAJA::statement::Lambda<0> - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel( - RAJA::make_tuple( RAJA::RangeSegment(kbeg, kend), - RAJA::RangeSegment(jbeg, jend)), - hydro2d_lam1); - - RAJA::kernel( - RAJA::make_tuple( RAJA::RangeSegment(kbeg, kend), - RAJA::RangeSegment(jbeg, jend)), - hydro2d_lam2); - - RAJA::kernel( - RAJA::make_tuple( RAJA::RangeSegment(kbeg, kend), - RAJA::RangeSegment(jbeg, jend)), - hydro2d_lam3); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n HYDRO_2D : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/HYDRO_2D.cpp b/src/lcals/HYDRO_2D.cpp index 331e6e695..81687b486 100644 --- a/src/lcals/HYDRO_2D.cpp +++ b/src/lcals/HYDRO_2D.cpp @@ -74,7 +74,6 @@ HYDRO_2D::HYDRO_2D(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } HYDRO_2D::~HYDRO_2D() diff --git a/src/lcals/INT_PREDICT-StdPar.cpp b/src/lcals/INT_PREDICT-StdPar.cpp index a635061d2..2df532913 100644 --- a/src/lcals/INT_PREDICT-StdPar.cpp +++ b/src/lcals/INT_PREDICT-StdPar.cpp @@ -73,22 +73,6 @@ void INT_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), intpredict_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n INT_PREDICT : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/INT_PREDICT.cpp b/src/lcals/INT_PREDICT.cpp index dd4ff83d8..5ef7dc4a1 100644 --- a/src/lcals/INT_PREDICT.cpp +++ b/src/lcals/INT_PREDICT.cpp @@ -52,7 +52,6 @@ INT_PREDICT::INT_PREDICT(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } INT_PREDICT::~INT_PREDICT() diff --git a/src/lcals/PLANCKIAN-StdPar.cpp b/src/lcals/PLANCKIAN-StdPar.cpp index df63d89b1..e6378319c 100644 --- a/src/lcals/PLANCKIAN-StdPar.cpp +++ b/src/lcals/PLANCKIAN-StdPar.cpp @@ -74,22 +74,6 @@ void PLANCKIAN::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), planckian_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n PLANCKIAN : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/PLANCKIAN.cpp b/src/lcals/PLANCKIAN.cpp index 74c65e31b..f3362d860 100644 --- a/src/lcals/PLANCKIAN.cpp +++ b/src/lcals/PLANCKIAN.cpp @@ -52,7 +52,6 @@ PLANCKIAN::PLANCKIAN(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } PLANCKIAN::~PLANCKIAN() diff --git a/src/lcals/TRIDIAG_ELIM-StdPar.cpp b/src/lcals/TRIDIAG_ELIM-StdPar.cpp index 9ce2afd9f..c18df1303 100644 --- a/src/lcals/TRIDIAG_ELIM-StdPar.cpp +++ b/src/lcals/TRIDIAG_ELIM-StdPar.cpp @@ -73,22 +73,6 @@ void TRIDIAG_ELIM::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), tridiag_elim_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n TRIDIAG_ELIM : Unknown variant id = " << vid << std::endl; } diff --git a/src/lcals/TRIDIAG_ELIM.cpp b/src/lcals/TRIDIAG_ELIM.cpp index d606e39be..10d19c0f4 100644 --- a/src/lcals/TRIDIAG_ELIM.cpp +++ b/src/lcals/TRIDIAG_ELIM.cpp @@ -54,7 +54,6 @@ TRIDIAG_ELIM::TRIDIAG_ELIM(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } TRIDIAG_ELIM::~TRIDIAG_ELIM() diff --git a/src/polybench/POLYBENCH_2MM-StdPar.cpp b/src/polybench/POLYBENCH_2MM-StdPar.cpp index 43683a68b..20ad50043 100644 --- a/src/polybench/POLYBENCH_2MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_2MM-StdPar.cpp @@ -166,80 +166,6 @@ void POLYBENCH_2MM::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_2MM_VIEWS_RAJA; - - auto poly_2mm_lam1 = [=](Real_type &dot) { - POLYBENCH_2MM_BODY1_RAJA; - }; - auto poly_2mm_lam2 = [=](Index_type i, Index_type j, Index_type k, - Real_type &dot) { - POLYBENCH_2MM_BODY2_RAJA; - }; - auto poly_2mm_lam3 = [=](Index_type i, Index_type j, - Real_type &dot) { - POLYBENCH_2MM_BODY3_RAJA; - }; - auto poly_2mm_lam4 = [=](Real_type &dot) { - POLYBENCH_2MM_BODY4_RAJA; - }; - auto poly_2mm_lam5 = [=](Index_type i, Index_type l, Index_type j, - Real_type &dot) { - POLYBENCH_2MM_BODY5_RAJA; - }; - auto poly_2mm_lam6 = [=](Index_type i, Index_type l, - Real_type &dot) { - POLYBENCH_2MM_BODY6_RAJA; - }; - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Params<0>>, - RAJA::statement::For<2, RAJA::loop_exec, - RAJA::statement::Lambda<1, RAJA::Segs<0,1,2>, RAJA::Params<0>> - >, - RAJA::statement::Lambda<2, RAJA::Segs<0,1>, RAJA::Params<0>> - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, ni}, - RAJA::RangeSegment{0, nj}, - RAJA::RangeSegment{0, nk}), - RAJA::tuple{0.0}, - - poly_2mm_lam1, - poly_2mm_lam2, - poly_2mm_lam3 - ); - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, ni}, - RAJA::RangeSegment{0, nl}, - RAJA::RangeSegment{0, nj}), - RAJA::tuple{0.0}, - - poly_2mm_lam4, - poly_2mm_lam5, - poly_2mm_lam6 - - ); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_2MM : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_2MM.cpp b/src/polybench/POLYBENCH_2MM.cpp index c1284791f..5952c07bb 100644 --- a/src/polybench/POLYBENCH_2MM.cpp +++ b/src/polybench/POLYBENCH_2MM.cpp @@ -81,7 +81,6 @@ POLYBENCH_2MM::POLYBENCH_2MM(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_2MM::~POLYBENCH_2MM() diff --git a/src/polybench/POLYBENCH_3MM-StdPar.cpp b/src/polybench/POLYBENCH_3MM-StdPar.cpp index b7ac966a2..ba0df5bb0 100644 --- a/src/polybench/POLYBENCH_3MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_3MM-StdPar.cpp @@ -218,104 +218,6 @@ void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_3MM_VIEWS_RAJA; - - auto poly_3mm_lam1 = [=] (Real_type &dot) { - POLYBENCH_3MM_BODY1_RAJA; - }; - auto poly_3mm_lam2 = [=] (Index_type i, Index_type j, Index_type k, - Real_type &dot) { - POLYBENCH_3MM_BODY2_RAJA; - }; - auto poly_3mm_lam3 = [=] (Index_type i, Index_type j, - Real_type &dot) { - POLYBENCH_3MM_BODY3_RAJA; - }; - auto poly_3mm_lam4 = [=] (Real_type &dot) { - POLYBENCH_3MM_BODY4_RAJA; - }; - auto poly_3mm_lam5 = [=] (Index_type j, Index_type l, Index_type m, - Real_type &dot) { - POLYBENCH_3MM_BODY5_RAJA; - }; - auto poly_3mm_lam6 = [=] (Index_type j, Index_type l, - Real_type &dot) { - POLYBENCH_3MM_BODY6_RAJA; - }; - auto poly_3mm_lam7 = [=] (Real_type &dot) { - POLYBENCH_3MM_BODY7_RAJA; - }; - auto poly_3mm_lam8 = [=] (Index_type i, Index_type l, Index_type j, - Real_type &dot) { - POLYBENCH_3MM_BODY8_RAJA; - }; - auto poly_3mm_lam9 = [=] (Index_type i, Index_type l, - Real_type &dot) { - POLYBENCH_3MM_BODY9_RAJA; - }; - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Params<0>>, - RAJA::statement::For<2, RAJA::loop_exec, - RAJA::statement::Lambda<1, RAJA::Segs<0,1,2>, RAJA::Params<0>> - >, - RAJA::statement::Lambda<2, RAJA::Segs<0,1>, RAJA::Params<0>> - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, ni}, - RAJA::RangeSegment{0, nj}, - RAJA::RangeSegment{0, nk}), - RAJA::tuple{0.0}, - - poly_3mm_lam1, - poly_3mm_lam2, - poly_3mm_lam3 - - ); - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, nj}, - RAJA::RangeSegment{0, nl}, - RAJA::RangeSegment{0, nm}), - RAJA::tuple{0.0}, - - poly_3mm_lam4, - poly_3mm_lam5, - poly_3mm_lam6 - - ); - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, ni}, - RAJA::RangeSegment{0, nl}, - RAJA::RangeSegment{0, nj}), - RAJA::tuple{0.0}, - - poly_3mm_lam7, - poly_3mm_lam8, - poly_3mm_lam9 - - ); - - } // end run_reps - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_3MM : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_3MM.cpp b/src/polybench/POLYBENCH_3MM.cpp index 3cc6fce66..71f147869 100644 --- a/src/polybench/POLYBENCH_3MM.cpp +++ b/src/polybench/POLYBENCH_3MM.cpp @@ -89,7 +89,6 @@ POLYBENCH_3MM::POLYBENCH_3MM(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_3MM::~POLYBENCH_3MM() diff --git a/src/polybench/POLYBENCH_ADI-StdPar.cpp b/src/polybench/POLYBENCH_ADI-StdPar.cpp index 18641aa5c..0151f931b 100644 --- a/src/polybench/POLYBENCH_ADI-StdPar.cpp +++ b/src/polybench/POLYBENCH_ADI-StdPar.cpp @@ -139,88 +139,6 @@ void POLYBENCH_ADI::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_ADI_VIEWS_RAJA; - - auto poly_adi_lam2 = [=](Index_type i) { - POLYBENCH_ADI_BODY2_RAJA; - }; - auto poly_adi_lam3 = [=](Index_type i, Index_type j) { - POLYBENCH_ADI_BODY3_RAJA; - }; - auto poly_adi_lam4 = [=](Index_type i) { - POLYBENCH_ADI_BODY4_RAJA; - }; - auto poly_adi_lam5 = [=](Index_type i, Index_type k) { - POLYBENCH_ADI_BODY5_RAJA; - }; - auto poly_adi_lam6 = [=](Index_type i) { - POLYBENCH_ADI_BODY6_RAJA; - }; - auto poly_adi_lam7 = [=](Index_type i, Index_type j) { - POLYBENCH_ADI_BODY7_RAJA; - }; - auto poly_adi_lam8 = [=](Index_type i) { - POLYBENCH_ADI_BODY8_RAJA; - }; - auto poly_adi_lam9 = [=](Index_type i, Index_type k) { - POLYBENCH_ADI_BODY9_RAJA; - }; - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Segs<0>>, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<1, RAJA::Segs<0,1>> - >, - RAJA::statement::Lambda<2, RAJA::Segs<0>>, - RAJA::statement::For<2, RAJA::loop_exec, - RAJA::statement::Lambda<3, RAJA::Segs<0,2>> - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - for (Index_type t = 1; t <= tsteps; ++t) { - - RAJA::kernel( - RAJA::make_tuple(RAJA::RangeSegment{1, n-1}, - RAJA::RangeSegment{1, n-1}, - RAJA::RangeStrideSegment{n-2, 0, -1}), - - poly_adi_lam2, - poly_adi_lam3, - poly_adi_lam4, - poly_adi_lam5 - - ); - - RAJA::kernel( - RAJA::make_tuple(RAJA::RangeSegment{1, n-1}, - RAJA::RangeSegment{1, n-1}, - RAJA::RangeStrideSegment{n-2, 0, -1}), - - poly_adi_lam6, - poly_adi_lam7, - poly_adi_lam8, - poly_adi_lam9 - - ); - - } // tstep loop - - } // run_reps - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\nPOLYBENCH_ADI Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_ADI.cpp b/src/polybench/POLYBENCH_ADI.cpp index 5ad7544dd..7e3749e46 100644 --- a/src/polybench/POLYBENCH_ADI.cpp +++ b/src/polybench/POLYBENCH_ADI.cpp @@ -66,7 +66,6 @@ POLYBENCH_ADI::POLYBENCH_ADI(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_ADI::~POLYBENCH_ADI() diff --git a/src/polybench/POLYBENCH_ATAX-StdPar.cpp b/src/polybench/POLYBENCH_ATAX-StdPar.cpp index b2be11771..6137fcb70 100644 --- a/src/polybench/POLYBENCH_ATAX-StdPar.cpp +++ b/src/polybench/POLYBENCH_ATAX-StdPar.cpp @@ -119,85 +119,6 @@ void POLYBENCH_ATAX::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_ATAX_VIEWS_RAJA; - - auto poly_atax_lam1 = [=] (Index_type i, Real_type &dot) { - POLYBENCH_ATAX_BODY1_RAJA; - }; - auto poly_atax_lam2 = [=] (Index_type i, Index_type j, Real_type &dot) { - POLYBENCH_ATAX_BODY2_RAJA; - }; - auto poly_atax_lam3 = [=] (Index_type i, Real_type &dot) { - POLYBENCH_ATAX_BODY3_RAJA; - }; - auto poly_atax_lam4 = [=] (Index_type j, Real_type &dot) { - POLYBENCH_ATAX_BODY4_RAJA; - }; - auto poly_atax_lam5 = [=] (Index_type i, Index_type j , Real_type &dot) { - POLYBENCH_ATAX_BODY5_RAJA; - }; - auto poly_atax_lam6 = [=] (Index_type j, Real_type &dot) { - POLYBENCH_ATAX_BODY6_RAJA; - }; - - using EXEC_POL1 = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Segs<0>, RAJA::Params<0>>, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> - >, - RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> - > - >; - - using EXEC_POL2 = - RAJA::KernelPolicy< - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Segs<1>, RAJA::Params<0>>, - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> - >, - RAJA::statement::Lambda<2, RAJA::Segs<1>, RAJA::Params<0>> - > - >; - - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, N}, - RAJA::RangeSegment{0, N}), - RAJA::tuple{0.0}, - - poly_atax_lam1, - poly_atax_lam2, - poly_atax_lam3 - - ); - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, N}, - RAJA::RangeSegment{0, N}), - RAJA::tuple{0.0}, - - poly_atax_lam4, - poly_atax_lam5, - poly_atax_lam6 - - ); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_ATAX : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_ATAX.cpp b/src/polybench/POLYBENCH_ATAX.cpp index 440586561..e0da9af86 100644 --- a/src/polybench/POLYBENCH_ATAX.cpp +++ b/src/polybench/POLYBENCH_ATAX.cpp @@ -68,7 +68,6 @@ POLYBENCH_ATAX::POLYBENCH_ATAX(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_ATAX::~POLYBENCH_ATAX() diff --git a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp index d2584e96c..4742b76f9 100644 --- a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp @@ -140,77 +140,6 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_FDTD_2D_VIEWS_RAJA; - - // - // Note: first lambda must use capture by reference so that the - // scalar variable 't' used in it is updated for each - // t-loop iteration. - // - auto poly_fdtd2d_lam1 = [&](Index_type j) { - POLYBENCH_FDTD_2D_BODY1_RAJA; - }; - auto poly_fdtd2d_lam2 = [=](Index_type i, Index_type j) { - POLYBENCH_FDTD_2D_BODY2_RAJA; - }; - auto poly_fdtd2d_lam3 = [=](Index_type i, Index_type j) { - POLYBENCH_FDTD_2D_BODY3_RAJA; - }; - auto poly_fdtd2d_lam4 = [=](Index_type i, Index_type j) { - POLYBENCH_FDTD_2D_BODY4_RAJA; - }; - - using EXEC_POL1 = RAJA::loop_exec; - - using EXEC_POL234 = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<0> - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - for (t = 0; t < tsteps; ++t) { - - RAJA::forall( RAJA::RangeSegment(0, ny), - poly_fdtd2d_lam1 - ); - - RAJA::kernel( - RAJA::make_tuple(RAJA::RangeSegment{1, nx}, - RAJA::RangeSegment{0, ny}), - poly_fdtd2d_lam2 - ); - - RAJA::kernel( - RAJA::make_tuple(RAJA::RangeSegment{0, nx}, - RAJA::RangeSegment{1, ny}), - poly_fdtd2d_lam3 - ); - - RAJA::kernel( - RAJA::make_tuple(RAJA::RangeSegment{0, nx-1}, - RAJA::RangeSegment{0, ny-1}), - poly_fdtd2d_lam4 - ); - - } // tstep loop - - } // run_reps - stopTimer(); - - break; - } - -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\nPOLYBENCH_FDTD_2D Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_FDTD_2D.cpp b/src/polybench/POLYBENCH_FDTD_2D.cpp index 47bb79ce2..19505024c 100644 --- a/src/polybench/POLYBENCH_FDTD_2D.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D.cpp @@ -87,7 +87,6 @@ POLYBENCH_FDTD_2D::POLYBENCH_FDTD_2D(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_FDTD_2D::~POLYBENCH_FDTD_2D() diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp index c6d015640..4b2a44daa 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp @@ -106,43 +106,6 @@ void POLYBENCH_FLOYD_WARSHALL::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_FLOYD_WARSHALL_VIEWS_RAJA; - - auto poly_floydwarshall_lam = [=](Index_type k, Index_type i, - Index_type j) { - POLYBENCH_FLOYD_WARSHALL_BODY_RAJA; - }; - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::For<2, RAJA::loop_exec, - RAJA::statement::Lambda<0> - > - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{0, N}, - RAJA::RangeSegment{0, N}, - RAJA::RangeSegment{0, N}), - poly_floydwarshall_lam - ); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_FLOYD_WARSHALL : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp index d48f141f0..cb8da97fc 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp @@ -63,7 +63,6 @@ POLYBENCH_FLOYD_WARSHALL::POLYBENCH_FLOYD_WARSHALL(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_FLOYD_WARSHALL::~POLYBENCH_FLOYD_WARSHALL() diff --git a/src/polybench/POLYBENCH_GEMM-StdPar.cpp b/src/polybench/POLYBENCH_GEMM-StdPar.cpp index b17c381c0..5d644def0 100644 --- a/src/polybench/POLYBENCH_GEMM-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMM-StdPar.cpp @@ -119,64 +119,6 @@ void POLYBENCH_GEMM::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_GEMM_VIEWS_RAJA; - - auto poly_gemm_lam1 = [=](Real_type& dot) { - POLYBENCH_GEMM_BODY1_RAJA; - }; - auto poly_gemm_lam2 = [=](Index_type i, Index_type j) { - POLYBENCH_GEMM_BODY2_RAJA; - }; - auto poly_gemm_lam3 = [=](Index_type i, Index_type j, Index_type k, - Real_type& dot) { - POLYBENCH_GEMM_BODY3_RAJA; - }; - auto poly_gemm_lam4 = [=](Index_type i, Index_type j, - Real_type& dot) { - POLYBENCH_GEMM_BODY4_RAJA; - }; - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Params<0>>, - RAJA::statement::Lambda<1, RAJA::Segs<0,1>>, - RAJA::statement::For<2, RAJA::loop_exec, - RAJA::statement::Lambda<2, RAJA::Segs<0,1,2>, RAJA::Params<0>> - >, - RAJA::statement::Lambda<3, RAJA::Segs<0,1>, RAJA::Params<0>> - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel_param( - - RAJA::make_tuple( RAJA::RangeSegment{0, ni}, - RAJA::RangeSegment{0, nj}, - RAJA::RangeSegment{0, nk} ), - RAJA::tuple{0.0}, // variable for dot - - poly_gemm_lam1, - poly_gemm_lam2, - poly_gemm_lam3, - poly_gemm_lam4 - - ); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_GEMM : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_GEMM.cpp b/src/polybench/POLYBENCH_GEMM.cpp index a7dec71b4..57d22fc60 100644 --- a/src/polybench/POLYBENCH_GEMM.cpp +++ b/src/polybench/POLYBENCH_GEMM.cpp @@ -73,7 +73,6 @@ POLYBENCH_GEMM::POLYBENCH_GEMM(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_GEMM::~POLYBENCH_GEMM() diff --git a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp index 37361759d..7a51496f9 100644 --- a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp @@ -137,109 +137,6 @@ void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_GEMVER_VIEWS_RAJA; - - auto poly_gemver_lam1 = [=] (Index_type i, Index_type j) { - POLYBENCH_GEMVER_BODY1_RAJA; - }; - auto poly_gemver_lam2 = [=] (Real_type &dot) { - POLYBENCH_GEMVER_BODY2_RAJA; - }; - auto poly_gemver_lam3 = [=] (Index_type i, Index_type j, Real_type &dot) { - POLYBENCH_GEMVER_BODY3_RAJA; - }; - auto poly_gemver_lam4 = [=] (Index_type i, Real_type &dot) { - POLYBENCH_GEMVER_BODY4_RAJA; - }; - auto poly_gemver_lam5 = [=] (Index_type i) { - POLYBENCH_GEMVER_BODY5_RAJA; - }; - auto poly_gemver_lam6 = [=] (Index_type i, Real_type &dot) { - POLYBENCH_GEMVER_BODY6_RAJA; - }; - auto poly_gemver_lam7 = [=] (Index_type i, Index_type j, Real_type &dot) { - POLYBENCH_GEMVER_BODY7_RAJA; - }; - auto poly_gemver_lam8 = [=] (Index_type i, Real_type &dot) { - POLYBENCH_GEMVER_BODY8_RAJA; - }; - - using EXEC_POL1 = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Segs<0,1>> - > - > - >; - - using EXEC_POL2 = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Params<0>>, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> - >, - RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> - > - >; - - using EXEC_POL3 = RAJA::loop_exec; - - using EXEC_POL4 = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Segs<0>, RAJA::Params<0>>, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> - >, - RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{0, n}, - RAJA::RangeSegment{0, n}), - poly_gemver_lam1 - ); - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, n}, - RAJA::RangeSegment{0, n}), - RAJA::tuple{0.0}, - - poly_gemver_lam2, - poly_gemver_lam3, - poly_gemver_lam4 - ); - - RAJA::forall (RAJA::RangeSegment{0, n}, - poly_gemver_lam5 - ); - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, n}, - RAJA::RangeSegment{0, n}), - RAJA::tuple{0.0}, - - poly_gemver_lam6, - poly_gemver_lam7, - poly_gemver_lam8 - - ); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_GEMVER : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_GEMVER.cpp b/src/polybench/POLYBENCH_GEMVER.cpp index 22a4837af..0c8108cdd 100644 --- a/src/polybench/POLYBENCH_GEMVER.cpp +++ b/src/polybench/POLYBENCH_GEMVER.cpp @@ -82,7 +82,6 @@ POLYBENCH_GEMVER::POLYBENCH_GEMVER(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_GEMVER::~POLYBENCH_GEMVER() diff --git a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp index 8201ecb24..7ad6e101b 100644 --- a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp +++ b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp @@ -81,55 +81,6 @@ void POLYBENCH_GESUMMV::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_GESUMMV_VIEWS_RAJA; - - auto poly_gesummv_lam1 = [=](Real_type& tmpdot, Real_type& ydot) { - POLYBENCH_GESUMMV_BODY1_RAJA; - }; - auto poly_gesummv_lam2 = [=](Index_type i, Index_type j, - Real_type& tmpdot, Real_type& ydot) { - POLYBENCH_GESUMMV_BODY2_RAJA; - }; - auto poly_gesummv_lam3 = [=](Index_type i, - Real_type& tmpdot, Real_type& ydot) { - POLYBENCH_GESUMMV_BODY3_RAJA; - }; - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Params<0,1>>, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<1, RAJA::Segs<0, 1>, RAJA::Params<0,1>> - >, - RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0,1>> - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::kernel_param( - RAJA::make_tuple( RAJA::RangeSegment{0, N}, - RAJA::RangeSegment{0, N} ), - RAJA::make_tuple(static_cast(0.0), - static_cast(0.0)), - - poly_gesummv_lam1, - poly_gesummv_lam2, - poly_gesummv_lam3 - ); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_GESUMMV : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_GESUMMV.cpp b/src/polybench/POLYBENCH_GESUMMV.cpp index f1f10c645..c56dd757b 100644 --- a/src/polybench/POLYBENCH_GESUMMV.cpp +++ b/src/polybench/POLYBENCH_GESUMMV.cpp @@ -62,7 +62,6 @@ POLYBENCH_GESUMMV::POLYBENCH_GESUMMV(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_GESUMMV::~POLYBENCH_GESUMMV() diff --git a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp index 0b5690828..8f9e1bc54 100644 --- a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp @@ -119,60 +119,6 @@ void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_HEAT_3D_VIEWS_RAJA; - - auto poly_heat3d_lam1 = [=](Index_type i, Index_type j, Index_type k) { - POLYBENCH_HEAT_3D_BODY1_RAJA; - }; - auto poly_heat3d_lam2 = [=](Index_type i, Index_type j, Index_type k) { - POLYBENCH_HEAT_3D_BODY2_RAJA; - }; - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::For<2, RAJA::loop_exec, - RAJA::statement::Lambda<0> - > - > - >, - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::For<2, RAJA::loop_exec, - RAJA::statement::Lambda<1> - > - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - for (Index_type t = 0; t < tsteps; ++t) { - - RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{1, N-1}, - RAJA::RangeSegment{1, N-1}, - RAJA::RangeSegment{1, N-1}), - - poly_heat3d_lam1, - poly_heat3d_lam2 - ); - - } - - } - stopTimer(); - - POLYBENCH_HEAT_3D_DATA_RESET; - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_HEAT_3D : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_HEAT_3D.cpp b/src/polybench/POLYBENCH_HEAT_3D.cpp index ec86de900..af4727d8a 100644 --- a/src/polybench/POLYBENCH_HEAT_3D.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D.cpp @@ -73,7 +73,6 @@ POLYBENCH_HEAT_3D::POLYBENCH_HEAT_3D(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_HEAT_3D::~POLYBENCH_HEAT_3D() diff --git a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp index 76dca3264..3b95527e9 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp @@ -95,33 +95,6 @@ void POLYBENCH_JACOBI_1D::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - for (Index_type t = 0; t < tsteps; ++t) { - - RAJA::forall ( RAJA::RangeSegment{1, N-1}, - poly_jacobi1d_lam1 - ); - - RAJA::forall ( RAJA::RangeSegment{1, N-1}, - poly_jacobi1d_lam2 - ); - - } - - } - stopTimer(); - - POLYBENCH_JACOBI_1D_DATA_RESET; - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_JACOBI_1D : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_JACOBI_1D.cpp b/src/polybench/POLYBENCH_JACOBI_1D.cpp index a8aa3e089..3ed3e8361 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D.cpp @@ -70,7 +70,6 @@ POLYBENCH_JACOBI_1D::POLYBENCH_JACOBI_1D(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_JACOBI_1D::~POLYBENCH_JACOBI_1D() diff --git a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp index 11d8c208c..b5c4ace75 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp @@ -112,55 +112,6 @@ void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_JACOBI_2D_VIEWS_RAJA; - - auto poly_jacobi2d_lam1 = [=](Index_type i, Index_type j) { - POLYBENCH_JACOBI_2D_BODY1_RAJA; - }; - auto poly_jacobi2d_lam2 = [=](Index_type i, Index_type j) { - POLYBENCH_JACOBI_2D_BODY2_RAJA; - }; - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<0> - > - >, - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<1> - > - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - for (Index_type t = 0; t < tsteps; ++t) { - - RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{1, N-1}, - RAJA::RangeSegment{1, N-1}), - - poly_jacobi2d_lam1, - poly_jacobi2d_lam2 - ); - - } - - } - stopTimer(); - - POLYBENCH_JACOBI_2D_DATA_RESET; - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_JACOBI_2D : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_JACOBI_2D.cpp b/src/polybench/POLYBENCH_JACOBI_2D.cpp index a8d54e751..98ef3aa3d 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D.cpp @@ -72,7 +72,6 @@ POLYBENCH_JACOBI_2D::POLYBENCH_JACOBI_2D(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_JACOBI_2D::~POLYBENCH_JACOBI_2D() diff --git a/src/polybench/POLYBENCH_MVT-StdPar.cpp b/src/polybench/POLYBENCH_MVT-StdPar.cpp index 2c3b30ffb..45da020a8 100644 --- a/src/polybench/POLYBENCH_MVT-StdPar.cpp +++ b/src/polybench/POLYBENCH_MVT-StdPar.cpp @@ -117,77 +117,6 @@ void POLYBENCH_MVT::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - POLYBENCH_MVT_VIEWS_RAJA; - - auto poly_mvt_lam1 = [=] (Real_type &dot) { - POLYBENCH_MVT_BODY1_RAJA; - }; - auto poly_mvt_lam2 = [=] (Index_type i, Index_type j, Real_type &dot) { - POLYBENCH_MVT_BODY2_RAJA; - }; - auto poly_mvt_lam3 = [=] (Index_type i, Real_type &dot) { - POLYBENCH_MVT_BODY3_RAJA; - }; - auto poly_mvt_lam4 = [=] (Real_type &dot) { - POLYBENCH_MVT_BODY4_RAJA; - }; - auto poly_mvt_lam5 = [=] (Index_type i, Index_type j, Real_type &dot) { - POLYBENCH_MVT_BODY5_RAJA; - }; - auto poly_mvt_lam6 = [=] (Index_type i, Real_type &dot) { - POLYBENCH_MVT_BODY6_RAJA; - }; - - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Params<0>>, - RAJA::statement::For<1, RAJA::loop_exec, - RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> - >, - RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> - > - >; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::region( [=]() { - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, N}, - RAJA::RangeSegment{0, N}), - RAJA::tuple{0.0}, - - poly_mvt_lam1, - poly_mvt_lam2, - poly_mvt_lam3 - - ); - - RAJA::kernel_param( - RAJA::make_tuple(RAJA::RangeSegment{0, N}, - RAJA::RangeSegment{0, N}), - RAJA::tuple{0.0}, - - poly_mvt_lam4, - poly_mvt_lam5, - poly_mvt_lam6 - - ); - - }); // end sequential region (for single-source code) - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n POLYBENCH_MVT : Unknown variant id = " << vid << std::endl; } diff --git a/src/polybench/POLYBENCH_MVT.cpp b/src/polybench/POLYBENCH_MVT.cpp index 3ac9d680f..5d1313988 100644 --- a/src/polybench/POLYBENCH_MVT.cpp +++ b/src/polybench/POLYBENCH_MVT.cpp @@ -65,7 +65,6 @@ POLYBENCH_MVT::POLYBENCH_MVT(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } POLYBENCH_MVT::~POLYBENCH_MVT() diff --git a/src/stream/ADD-StdPar.cpp b/src/stream/ADD-StdPar.cpp index 2131dedbf..0a38d1619 100644 --- a/src/stream/ADD-StdPar.cpp +++ b/src/stream/ADD-StdPar.cpp @@ -72,22 +72,6 @@ void ADD::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), add_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n ADD : Unknown variant id = " << vid << std::endl; } diff --git a/src/stream/ADD.cpp b/src/stream/ADD.cpp index 534deee28..0181888a0 100644 --- a/src/stream/ADD.cpp +++ b/src/stream/ADD.cpp @@ -55,7 +55,6 @@ ADD::ADD(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } ADD::~ADD() diff --git a/src/stream/COPY-StdPar.cpp b/src/stream/COPY-StdPar.cpp index bda9af163..bc25a6a64 100644 --- a/src/stream/COPY-StdPar.cpp +++ b/src/stream/COPY-StdPar.cpp @@ -58,22 +58,6 @@ void COPY::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), copy_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n COPY : Unknown variant id = " << vid << std::endl; } diff --git a/src/stream/COPY.cpp b/src/stream/COPY.cpp index 40fae2467..0544c214c 100644 --- a/src/stream/COPY.cpp +++ b/src/stream/COPY.cpp @@ -55,7 +55,6 @@ COPY::COPY(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } COPY::~COPY() diff --git a/src/stream/DOT-StdPar.cpp b/src/stream/DOT-StdPar.cpp index 43d247f72..23d031d91 100644 --- a/src/stream/DOT-StdPar.cpp +++ b/src/stream/DOT-StdPar.cpp @@ -78,28 +78,6 @@ void DOT::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::ReduceSum dot(m_dot_init); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { - DOT_BODY; - }); - - m_dot += static_cast(dot.get()); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n DOT : Unknown variant id = " << vid << std::endl; } diff --git a/src/stream/DOT.cpp b/src/stream/DOT.cpp index d1f701431..48774b354 100644 --- a/src/stream/DOT.cpp +++ b/src/stream/DOT.cpp @@ -55,7 +55,6 @@ DOT::DOT(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } DOT::~DOT() diff --git a/src/stream/MUL-StdPar.cpp b/src/stream/MUL-StdPar.cpp index 082265af2..f8c919e8d 100644 --- a/src/stream/MUL-StdPar.cpp +++ b/src/stream/MUL-StdPar.cpp @@ -72,22 +72,6 @@ void MUL::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if defined(RUN_RAJA_STDPAR) - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), mul_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n MUL : Unknown variant id = " << vid << std::endl; } diff --git a/src/stream/MUL.cpp b/src/stream/MUL.cpp index 38bfe4aca..9bdd5969c 100644 --- a/src/stream/MUL.cpp +++ b/src/stream/MUL.cpp @@ -55,7 +55,6 @@ MUL::MUL(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } MUL::~MUL() diff --git a/src/stream/TRIAD-StdPar.cpp b/src/stream/TRIAD-StdPar.cpp index 4b3db4a49..de8118d0e 100644 --- a/src/stream/TRIAD-StdPar.cpp +++ b/src/stream/TRIAD-StdPar.cpp @@ -71,22 +71,6 @@ void TRIAD::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_StdPar : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), triad_lam); - - } - stopTimer(); - - break; - } -#endif // RUN_RAJA_STDPAR - default : { getCout() << "\n TRIAD : Unknown variant id = " << vid << std::endl; } diff --git a/src/stream/TRIAD.cpp b/src/stream/TRIAD.cpp index e4064b9fd..03a6b670d 100644 --- a/src/stream/TRIAD.cpp +++ b/src/stream/TRIAD.cpp @@ -59,7 +59,6 @@ TRIAD::TRIAD(const RunParams& params) setVariantDefined( Base_StdPar ); setVariantDefined( Lambda_StdPar ); - setVariantDefined( RAJA_StdPar ); } TRIAD::~TRIAD() From 1e3f624c30694c46db5741ab3b3086ca3e7b1826 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 16:52:23 +0300 Subject: [PATCH 070/174] fix the issue with running --- src/common/KernelBase.cpp | 8 ++++++++ src/common/KernelBase.hpp | 2 ++ 2 files changed, 10 insertions(+) diff --git a/src/common/KernelBase.cpp b/src/common/KernelBase.cpp index 2a777971d..d2d7ac141 100644 --- a/src/common/KernelBase.cpp +++ b/src/common/KernelBase.cpp @@ -129,6 +129,14 @@ void KernelBase::setVariantDefined(VariantID vid) #endif break; } + + case Base_StdPar : + case Lambda_StdPar : + { + setStdParTuningDefinitions(vid); + break; + } + // Required for running Kokkos case Kokkos_Lambda : { diff --git a/src/common/KernelBase.hpp b/src/common/KernelBase.hpp index 8c72e854e..8cde6bbe6 100644 --- a/src/common/KernelBase.hpp +++ b/src/common/KernelBase.hpp @@ -90,6 +90,8 @@ class KernelBase virtual void setOpenMPTargetTuningDefinitions(VariantID vid) { addVariantTuningName(vid, getDefaultTuningName()); } #endif + virtual void setStdParTuningDefinitions(VariantID vid) + { addVariantTuningName(vid, getDefaultTuningName()); } #if defined(RUN_KOKKOS) virtual void setKokkosTuningDefinitions(VariantID vid) { addVariantTuningName(vid, getDefaultTuningName()); } From 21885f736666aee4ce20bac15d02a91c55aae03e Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 17:13:54 +0300 Subject: [PATCH 071/174] NVC note --- README.stdpar | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.stdpar b/README.stdpar index 1cb862c9d..f96dcda66 100644 --- a/README.stdpar +++ b/README.stdpar @@ -2,6 +2,9 @@ cmake .. -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-volatile -Wno-unused-parameter" -DENABLE_STDPAR=1 && make -j8 # NVC++ +cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 -stdpar=multicore -tp=haswell" -DENABLE_STDPAR=1 && make -j8 + +^ SORTPAIRS fails with sizefact 0.1 when _all_ kernels are run but not when SORTPAIRS or Algorithm are run by themselves # Intel cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-unused-parameter -Wno-deprecated-volatile -tbb" -DENABLE_STDPAR=1 && make -j8 From 6a26a5f46f436718c20e063b5cec4b7d4ad264ec Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 17:14:27 +0300 Subject: [PATCH 072/174] nuke StdPar SORTPAIRS because it needs work --- src/algorithm/SORTPAIRS-StdPar.cpp | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp index dcb0f3a5c..cad571d4a 100644 --- a/src/algorithm/SORTPAIRS-StdPar.cpp +++ b/src/algorithm/SORTPAIRS-StdPar.cpp @@ -1,7 +1,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC // and RAJA Performance Suite project contributors. -// See the RAJAPerf/COPYRIGHT file for details. +// See the RAJAPerf/LICENSE file for details. // // SPDX-License-Identifier: (BSD-3-Clause) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// @@ -30,8 +30,8 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); + //auto begin = counting_iterator(ibegin); + //auto end = counting_iterator(iend); SORTPAIRS_DATA_SETUP; @@ -51,20 +51,16 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) vector_of_pairs.emplace_back(x[iend*irep + iemp], i[iend*irep + iemp]); } - std::sort( std::execution::par_unseq, - vector_of_pairs.begin(), vector_of_pairs.end(), + std::sort(vector_of_pairs.begin(), vector_of_pairs.end(), [](pair_type const& lhs, pair_type const& rhs) { return lhs.first < rhs.first; }); - //for (Index_type iemp = ibegin; iemp < iend; ++iemp) { - std::for_each( //std::execution::par_unseq, - begin, end, - [=](Index_type iemp) { - const pair_type& pair = vector_of_pairs[iemp - ibegin]; + for (Index_type iemp = ibegin; iemp < iend; ++iemp) { + pair_type& pair = vector_of_pairs[iemp - ibegin]; x[iend*irep + iemp] = pair.first; i[iend*irep + iemp] = pair.second; - }); + } } stopTimer(); From de6a3d282617ec2a969940d066ee3aecf28dce60 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 17:22:07 +0300 Subject: [PATCH 073/174] notes on NVC++ multicore issues --- README.stdpar | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/README.stdpar b/README.stdpar index f96dcda66..94d12d2b7 100644 --- a/README.stdpar +++ b/README.stdpar @@ -1,11 +1,64 @@ # GCC + cmake .. -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-volatile -Wno-unused-parameter" -DENABLE_STDPAR=1 && make -j8 # NVC++ + cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 -stdpar=multicore -tp=haswell" -DENABLE_STDPAR=1 && make -j8 ^ SORTPAIRS fails with sizefact 0.1 when _all_ kernels are run but not when SORTPAIRS or Algorithm are run by themselves +Just disable the lambda one I guess... + +------------------------------------------------------- +Basic_MAT_MAT_SHARED +........................................................ +Base_StdPar-default 1136.6199452543779141 0.0000000000000000000 +Lambda_StdPar-default -nan -nan + +Probably just not atomic... + +------------------------------------------------------- +Basic_PI_ATOMIC +........................................................ +Base_StdPar-default 0.55899274342205662602 2.5825999101679185666 +Lambda_StdPar-default 3.1415926535899751926 0.0000000000000000000 + +Check these to make sure no stupid float<->double stuff happening. + +------------------------------------------------------- +Polybench_GEMVER +........................................................ +Base_Seq-default 16695345.016927006001 0.0000000000000000000 +Lambda_Seq-default 16695345.016927005882 1.1914380593225359917e-10 +RAJA_Seq-default 16695345.016927006608 -6.0663296608254313469e-10 +Base_StdPar-default 16695345.016927005745 2.5647750589996576309e-10 +Lambda_StdPar-default 16695345.016927006608 -6.0663296608254313469e-10 + +------------------------------------------------------- +Polybench_MVT +........................................................ +Base_Seq-default 6821556.1519041797419 0.0000000000000000000 +Lambda_Seq-default 6821556.1519041797419 0.0000000000000000000 +RAJA_Seq-default 6821556.1519041792999 4.4201442506164312363e-10 +Base_StdPar-default 6821556.1519041792999 4.4201442506164312363e-10 +Lambda_StdPar-default 6821556.1519041792999 4.4201442506164312363e-10 + +------------------------------------------------------- +Stream_DOT +........................................................ +Base_Seq-default 39999973.379841431975 0.0000000000000000000 +Lambda_Seq-default 39999973.379841439426 -7.4505805969238281250e-09 +RAJA_Seq-default 39999973.379841662943 -2.3096799850463867188e-07 +Base_StdPar-default 39999973.379841439426 -7.4505805969238281250e-09 +Lambda_StdPar-default 39999973.379841439426 -7.4505805969238281250e-09 + +------------------------------------------------------- +Algorithm_REDUCE_SUM +........................................................ +RAJA_Seq-default 268294.10758353886195 1.5483237802982330322e-08 + # Intel + cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-unused-parameter -Wno-deprecated-volatile -tbb" -DENABLE_STDPAR=1 && make -j8 From f07036beceef1bfa3b3df7e96139f4df0794c332 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 17:31:48 +0300 Subject: [PATCH 074/174] NVC++ GPU fails here --- src/apps/HALOEXCHANGE-StdPar.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/apps/HALOEXCHANGE-StdPar.cpp b/src/apps/HALOEXCHANGE-StdPar.cpp index b8564868e..d2cd73794 100644 --- a/src/apps/HALOEXCHANGE-StdPar.cpp +++ b/src/apps/HALOEXCHANGE-StdPar.cpp @@ -38,7 +38,7 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, + std::for_each( //std::execution::par_unseq, begin, end, [=](Index_type l) { Real_ptr buffer = buffers[l]; @@ -53,7 +53,7 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) } }); - std::for_each( std::execution::par_unseq, + std::for_each( //std::execution::par_unseq, begin, end, [=](Index_type l) { Real_ptr buffer = buffers[l]; @@ -79,9 +79,9 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type l) { + std::for_each( //std::execution::par_unseq, + begin, end, + [=](Index_type l) { Real_ptr buffer = buffers[l]; Int_ptr list = pack_index_lists[l]; Index_type len = pack_index_list_lengths[l]; @@ -97,9 +97,9 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) } }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type l) { + std::for_each( //std::execution::par_unseq, + begin, end, + [=](Index_type l) { Real_ptr buffer = buffers[l]; Int_ptr list = unpack_index_lists[l]; Index_type len = unpack_index_list_lengths[l]; From 00a107ac857ed0b2749d8847fa1e702bfac96618 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 17:33:09 +0300 Subject: [PATCH 075/174] more errata --- README.stdpar | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.stdpar b/README.stdpar index 94d12d2b7..a9d058263 100644 --- a/README.stdpar +++ b/README.stdpar @@ -6,6 +6,8 @@ cmake .. -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_CXX_FLAG cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 -stdpar=multicore -tp=haswell" -DENABLE_STDPAR=1 && make -j8 +## CPU + ^ SORTPAIRS fails with sizefact 0.1 when _all_ kernels are run but not when SORTPAIRS or Algorithm are run by themselves Just disable the lambda one I guess... @@ -58,6 +60,24 @@ Algorithm_REDUCE_SUM ........................................................ RAJA_Seq-default 268294.10758353886195 1.5483237802982330322e-08 +## GPU + +Just disable parallel execution here... + +[ 99%] Linking CXX executable ../bin/raja-perf.exe +nvlink error : Undefined reference to '_ZSt28__throw_bad_array_new_lengthv' in '../lib/libapps.a:HALOEXCHANGE-StdPar.cpp.o' +pgacclnk: child process exit status 2: /opt/nvidia/hpc_sdk/Linux_x86_64/22.5/compilers/bin/tools/nvdd +make[2]: *** [src/CMakeFiles/raja-perf.exe.dir/build.make:109: bin/raja-perf.exe] Error 2 +make[1]: *** [CMakeFiles/Makefile2:1393: src/CMakeFiles/raja-perf.exe.dir/all] Error 2 +make[1]: *** Waiting for unfinished jobs.... + +Exclude this one until fixed... + + Running Base_StdPar variant +terminate called after throwing an instance of 'thrust::system::system_error' + what(): for_each: failed to synchronize: cudaErrorIllegalAddress: an illegal memory access was encountered +Aborted (core dumped) + # Intel cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-unused-parameter -Wno-deprecated-volatile -tbb" -DENABLE_STDPAR=1 && make -j8 From 92f8d12434a962117bf23be618ef2328fcb212d4 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 17:41:14 +0300 Subject: [PATCH 076/174] all the erratum --- README.stdpar | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/README.stdpar b/README.stdpar index a9d058263..19abe0f30 100644 --- a/README.stdpar +++ b/README.stdpar @@ -78,6 +78,28 @@ terminate called after throwing an instance of 'thrust::system::system_error' what(): for_each: failed to synchronize: cudaErrorIllegalAddress: an illegal memory access was encountered Aborted (core dumped) +Ouch... + +------------------------------------------------------- +Polybench_FLOYD_WARSHALL +........................................................ +Base_Seq-default -166623.06893187693646 0.0000000000000000000 +Lambda_Seq-default -166623.06893187693646 0.0000000000000000000 +RAJA_Seq-default -166623.06893187693646 0.0000000000000000000 +Base_StdPar-default -172966.42970694099014 6343.3607750640536835 +Lambda_StdPar-default -170706.96338200639781 4083.8944501294613474 + +Lambda_Seq has the bug too so just disable the Lambda versions... + +------------------------------------------------------- +Basic_MAT_MAT_SHARED +........................................................ +Base_Seq-default 1136.6199452543779141 0.0000000000000000000 +Lambda_Seq-default -6.0464819976872759102e+32 6.0464819976872759102e+32 +RAJA_Seq-default 1136.6199452543779141 0.0000000000000000000 +Base_StdPar-default 1136.6199452543779141 0.0000000000000000000 +Lambda_StdPar-default -6.0464819976872759102e+32 6.0464819976872759102e+32 + # Intel cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-unused-parameter -Wno-deprecated-volatile -tbb" -DENABLE_STDPAR=1 && make -j8 From 645bb4a28570af6a19b418d248522490c37fe58d Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 20:12:50 +0300 Subject: [PATCH 077/174] pointer to atomic for GPU --- src/basic/PI_ATOMIC-StdPar.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/basic/PI_ATOMIC-StdPar.cpp b/src/basic/PI_ATOMIC-StdPar.cpp index d73f13814..44925913d 100644 --- a/src/basic/PI_ATOMIC-StdPar.cpp +++ b/src/basic/PI_ATOMIC-StdPar.cpp @@ -48,14 +48,16 @@ void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - myAtomic a_pi{m_pi_init}; + //myAtomic a_pi{m_pi_init}; + myAtomic * a_pi = new myAtomic; // i hate this + *a_pi = m_pi_init; std::for_each( std::execution::par_unseq, begin, end, - [=,&a_pi](Index_type i) { + [=](Index_type i) { double x = (double(i) + 0.5) * dx; - a_pi = a_pi + dx / (1.0 + x * x); + *a_pi = *a_pi + dx / (1.0 + x * x); }); - *pi = a_pi * 4.0; + *pi = *a_pi * 4.0; } stopTimer(); From dda1d64509566d5094c75c562add38e5818d876f Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 20:14:41 +0300 Subject: [PATCH 078/174] update PI_ATOMIC --- README.stdpar | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.stdpar b/README.stdpar index 19abe0f30..697e27a22 100644 --- a/README.stdpar +++ b/README.stdpar @@ -71,7 +71,7 @@ make[2]: *** [src/CMakeFiles/raja-perf.exe.dir/build.make:109: bin/raja-perf.exe make[1]: *** [CMakeFiles/Makefile2:1393: src/CMakeFiles/raja-perf.exe.dir/all] Error 2 make[1]: *** Waiting for unfinished jobs.... -Exclude this one until fixed... +PI_ATOMIC is fixed by allocating on the heap... Running Base_StdPar variant terminate called after throwing an instance of 'thrust::system::system_error' From 8bbcf1a66f3f43fa3a1194b4ccb39c7eb6571672 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 20:26:13 +0300 Subject: [PATCH 079/174] fix this one - nested way faster on GPU --- .../POLYBENCH_FLOYD_WARSHALL-StdPar.cpp | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp index 4b2a44daa..0c197f2ca 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp @@ -14,7 +14,7 @@ #include -//#define USE_STDPAR_COLLAPSE 1 +#define USE_STDPAR_COLLAPSE 1 namespace rajaperf { @@ -44,25 +44,25 @@ void POLYBENCH_FLOYD_WARSHALL::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + for (Index_type k = 0; k < N; ++k) { #ifdef USE_STDPAR_COLLAPSE std::for_each( std::execution::par_unseq, - begin2, end2, [=](Index_type ki) { - const auto k = ki / N; - const auto i = ki % N; + begin2, end2, [=](Index_type ji) { + const auto j = ji / N; + const auto i = ji % N; #else std::for_each( std::execution::par_unseq, begin, end, - [=](Index_type k) { - std::for_each(begin, end, - [=](Index_type i) { + [=](Index_type i) { + std::for_each( begin, end, + [=](Index_type j) { #endif - for (Index_type j = 0; j < N; ++j) { POLYBENCH_FLOYD_WARSHALL_BODY; - } + }); #ifndef USE_STDPAR_COLLAPSE }); #endif - }); + } } stopTimer(); @@ -80,25 +80,25 @@ void POLYBENCH_FLOYD_WARSHALL::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + for (Index_type k = 0; k < N; ++k) { #ifdef USE_STDPAR_COLLAPSE std::for_each( std::execution::par_unseq, - begin2, end2, [=](Index_type ki) { - const auto k = ki / N; - const auto i = ki % N; + begin2, end2, [=](Index_type ji) { + const auto j = ji / N; + const auto i = ji % N; #else std::for_each( std::execution::par_unseq, begin, end, - [=](Index_type k) { - std::for_each(begin, end, - [=](Index_type i) { + [=](Index_type i) { + std::for_each( begin, end, + [=](Index_type j) { #endif - for (Index_type j = 0; j < N; ++j) { poly_floydwarshall_base_lam(k, i, j); - } -#ifndef USE_STDPAR_COLLAPSE }); -#endif +#ifndef USE_STDPAR_COLLAPSE }); +#endif + } } stopTimer(); From 88547f2ea85755990d21ec8101689ded8eb01f41 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 20:26:44 +0300 Subject: [PATCH 080/174] fix this one - nested way faster on GPU --- README.stdpar | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/README.stdpar b/README.stdpar index 697e27a22..a5b81beca 100644 --- a/README.stdpar +++ b/README.stdpar @@ -78,17 +78,6 @@ terminate called after throwing an instance of 'thrust::system::system_error' what(): for_each: failed to synchronize: cudaErrorIllegalAddress: an illegal memory access was encountered Aborted (core dumped) -Ouch... - -------------------------------------------------------- -Polybench_FLOYD_WARSHALL -........................................................ -Base_Seq-default -166623.06893187693646 0.0000000000000000000 -Lambda_Seq-default -166623.06893187693646 0.0000000000000000000 -RAJA_Seq-default -166623.06893187693646 0.0000000000000000000 -Base_StdPar-default -172966.42970694099014 6343.3607750640536835 -Lambda_StdPar-default -170706.96338200639781 4083.8944501294613474 - Lambda_Seq has the bug too so just disable the Lambda versions... ------------------------------------------------------- From 65963b6d9ed9e2e8e782f3b2aededfa521e9a821 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 8 Jul 2022 20:36:35 +0300 Subject: [PATCH 081/174] remove RAJA_StdPar --- src/basic/MAT_MAT_SHARED-StdPar.cpp | 94 +---------------------------- 1 file changed, 2 insertions(+), 92 deletions(-) diff --git a/src/basic/MAT_MAT_SHARED-StdPar.cpp b/src/basic/MAT_MAT_SHARED-StdPar.cpp index 6aa32ea3b..cc211b719 100644 --- a/src/basic/MAT_MAT_SHARED-StdPar.cpp +++ b/src/basic/MAT_MAT_SHARED-StdPar.cpp @@ -36,7 +36,7 @@ void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type by = 0; by < Ny; ++by) { for (Index_type bx = 0; bx < Nx; ++bx) { - MAT_MAT_SHARED_BODY_0(TL_SZ) + MAT_MAT_SHARED_BODY_0(TL_SZ) for (Index_type ty = 0; ty < TL_SZ; ++ty) { for (Index_type tx = 0; tx < TL_SZ; ++tx) { @@ -81,6 +81,7 @@ void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) auto outer_y = [&](Index_type by) { auto outer_x = [&](Index_type bx) { + MAT_MAT_SHARED_BODY_0(TL_SZ) auto inner_y_1 = [&](Index_type ty) { @@ -152,97 +153,6 @@ void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#ifdef RAJA_ENABLE_STDPAR - case RAJA_Sq: { - - using launch_policy = RAJA::expt::LaunchPolicy; - - using outer_x = RAJA::expt::LoopPolicy; - - using outer_y = RAJA::expt::LoopPolicy; - - using inner_x = RAJA::expt::LoopPolicy; - - using inner_y = RAJA::expt::LoopPolicy; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - //Grid is empty as the host does not need a compute grid to be specified - RAJA::expt::launch(RAJA::expt::Grid(), - [=] RAJA_HOST_DEVICE(RAJA::expt::LaunchContext ctx) { - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, Ny), - [&](Index_type by) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, Nx), - [&](Index_type bx) { - - MAT_MAT_SHARED_BODY_0(TL_SZ) - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), - - [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), - [&](Index_type tx) { - MAT_MAT_SHARED_BODY_1(TL_SZ) - } - ); // RAJA::expt::loop - } - ); // RAJA::expt::loop - - for (Index_type k = 0; k < (TL_SZ + N - 1) / TL_SZ; k++) { - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), - [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), - [&](Index_type tx) { - MAT_MAT_SHARED_BODY_2(TL_SZ) - } - ); // RAJA::expt::loop - } - ); // RAJA::expt::loop - - ctx.teamSync(); - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), - [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), - [&](Index_type tx) { - MAT_MAT_SHARED_BODY_3(TL_SZ) - } - ); // RAJA::expt::loop - } - ); // RAJA::expt::loop - - ctx.teamSync(); - - } // for (k) - - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), - [&](Index_type ty) { - RAJA::expt::loop(ctx, RAJA::RangeSegment(0, TL_SZ), - [&](Index_type tx) { - MAT_MAT_SHARED_BODY_4(TL_SZ) - } - ); // RAJA::expt::loop - } - ); // RAJA::expt::loop - - } // lambda (bx) - ); // RAJA::expt::loop - } // lambda (by) - ); // RAJA::expt::loop - - } // outer lambda (ctx) - ); // RAJA::expt::launch - - } // loop over kernel reps - stopTimer(); - - break; - } -#endif - default: { getCout() << "\n MAT_MAT_SHARED : Unknown variant id = " << vid << std::endl; From bdf03ba82954f9d0becadd8f86e332c2b30ea0a3 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Mon, 11 Jul 2022 23:22:11 -0700 Subject: [PATCH 082/174] disable Lambda_StdPar; use par not par_unseq --- src/basic/PI_ATOMIC-StdPar.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/basic/PI_ATOMIC-StdPar.cpp b/src/basic/PI_ATOMIC-StdPar.cpp index 44925913d..fe7eeb599 100644 --- a/src/basic/PI_ATOMIC-StdPar.cpp +++ b/src/basic/PI_ATOMIC-StdPar.cpp @@ -51,7 +51,7 @@ void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) //myAtomic a_pi{m_pi_init}; myAtomic * a_pi = new myAtomic; // i hate this *a_pi = m_pi_init; - std::for_each( std::execution::par_unseq, + std::for_each( std::execution::par, begin, end, [=](Index_type i) { double x = (double(i) + 0.5) * dx; @@ -65,9 +65,10 @@ void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) break; } +#if 0 case Lambda_StdPar : { - auto piatomic_base_lam = [=](Index_type i, myAtomic &a_pi) { + auto piatomic_base_lam = [=](Index_type i, myAtomic * a_pi) { double x = (double(i) + 0.5) * dx; a_pi = a_pi + dx / (1.0 + x * x); }; @@ -75,17 +76,22 @@ void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - myAtomic a_pi{m_pi_init}; - for (Index_type i = ibegin; i < iend; ++i ) { + //myAtomic a_pi{m_pi_init}; + myAtomic * a_pi = new myAtomic; // i hate this + *a_pi = m_pi_init; + std::for_each( std::execution::par, + begin, end, + [=](Index_type i) { piatomic_base_lam(i,a_pi); - } - *pi = a_pi * 4.0; + }); + *pi = *a_pi * 4.0; } stopTimer(); break; } +#endif default : { getCout() << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; From eeb41c8f2f9cba87e60d4ef137ec445c14037d19 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Mon, 11 Jul 2022 23:22:39 -0700 Subject: [PATCH 083/174] enable par_unseq even though it is slower --- src/apps/HALOEXCHANGE-StdPar.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/apps/HALOEXCHANGE-StdPar.cpp b/src/apps/HALOEXCHANGE-StdPar.cpp index d2cd73794..8a7f36032 100644 --- a/src/apps/HALOEXCHANGE-StdPar.cpp +++ b/src/apps/HALOEXCHANGE-StdPar.cpp @@ -38,9 +38,9 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( //std::execution::par_unseq, - begin, end, - [=](Index_type l) { + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type l) { Real_ptr buffer = buffers[l]; Int_ptr list = pack_index_lists[l]; Index_type len = pack_index_list_lengths[l]; @@ -53,9 +53,9 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) } }); - std::for_each( //std::execution::par_unseq, - begin, end, - [=](Index_type l) { + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type l) { Real_ptr buffer = buffers[l]; Int_ptr list = unpack_index_lists[l]; Index_type len = unpack_index_list_lengths[l]; @@ -79,7 +79,7 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( //std::execution::par_unseq, + std::for_each( std::execution::par_unseq, begin, end, [=](Index_type l) { Real_ptr buffer = buffers[l]; @@ -97,7 +97,7 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) } }); - std::for_each( //std::execution::par_unseq, + std::for_each( std::execution::par_unseq, begin, end, [=](Index_type l) { Real_ptr buffer = buffers[l]; From c9e981b53760b0777f9cbdc69c01f2cd65a6259d Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Mon, 11 Jul 2022 23:22:56 -0700 Subject: [PATCH 084/174] disable Lambda_StdPar --- src/basic/PI_ATOMIC.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/basic/PI_ATOMIC.cpp b/src/basic/PI_ATOMIC.cpp index 6a15d4784..35ee2d502 100644 --- a/src/basic/PI_ATOMIC.cpp +++ b/src/basic/PI_ATOMIC.cpp @@ -55,7 +55,7 @@ PI_ATOMIC::PI_ATOMIC(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Base_StdPar ); - setVariantDefined( Lambda_StdPar ); + //setVariantDefined( Lambda_StdPar ); setVariantDefined( Kokkos_Lambda ); } From 9a07e35835a3701b4f1c6b2b2fc7c4a6c8627d48 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Mon, 11 Jul 2022 23:23:10 -0700 Subject: [PATCH 085/174] move collapse choice here --- src/common/StdParUtils.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/common/StdParUtils.hpp b/src/common/StdParUtils.hpp index 26c65c84b..7775360bc 100644 --- a/src/common/StdParUtils.hpp +++ b/src/common/StdParUtils.hpp @@ -28,6 +28,10 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#if defined(__NVCOMPILER_CUDA__) || defined(_NVHPC_STDPAR_CUDA) +#define USE_STDPAR_COLLAPSE +#endif + // This implementation was authored by David Olsen #include From de7ab8b927b2f28a43b8239ca56ccbfc1c5d7862 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 00:34:43 -0700 Subject: [PATCH 086/174] partially implement SORTPAIRS with StdPar (GPU issues) --- src/algorithm/SORTPAIRS-StdPar.cpp | 39 ++++++++++++++++++++---------- src/common/StdParUtils.hpp | 1 + 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp index cad571d4a..c01a5f093 100644 --- a/src/algorithm/SORTPAIRS-StdPar.cpp +++ b/src/algorithm/SORTPAIRS-StdPar.cpp @@ -30,8 +30,8 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - //auto begin = counting_iterator(ibegin); - //auto end = counting_iterator(iend); + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); SORTPAIRS_DATA_SETUP; @@ -47,20 +47,33 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) std::vector vector_of_pairs; vector_of_pairs.reserve(iend-ibegin); - for (Index_type iemp = ibegin; iemp < iend; ++iemp) { + //for (Index_type iemp = ibegin; iemp < iend; ++iemp) { + std::for_each( +#ifndef NVCXX_GPU_ENABLED + std::execution::par_unseq, +#endif + begin,end, + [=,&vector_of_pairs](Index_type iemp) noexcept { vector_of_pairs.emplace_back(x[iend*irep + iemp], i[iend*irep + iemp]); - } - - std::sort(vector_of_pairs.begin(), vector_of_pairs.end(), - [](pair_type const& lhs, pair_type const& rhs) { - return lhs.first < rhs.first; - }); - - for (Index_type iemp = ibegin; iemp < iend; ++iemp) { - pair_type& pair = vector_of_pairs[iemp - ibegin]; + }); + + std::sort( std::execution::par_unseq, + vector_of_pairs.begin(), vector_of_pairs.end(), + [](pair_type const& lhs, pair_type const& rhs) { + return lhs.first < rhs.first; + }); + + //for (Index_type iemp = ibegin; iemp < iend; ++iemp) { + std::for_each( +#ifndef NVCXX_GPU_ENABLED + std::execution::par_unseq, +#endif + begin,end, + [=](Index_type iemp) { + const pair_type &pair = vector_of_pairs[iemp - ibegin]; x[iend*irep + iemp] = pair.first; i[iend*irep + iemp] = pair.second; - } + }); } stopTimer(); diff --git a/src/common/StdParUtils.hpp b/src/common/StdParUtils.hpp index 7775360bc..7f207e011 100644 --- a/src/common/StdParUtils.hpp +++ b/src/common/StdParUtils.hpp @@ -30,6 +30,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__NVCOMPILER_CUDA__) || defined(_NVHPC_STDPAR_CUDA) #define USE_STDPAR_COLLAPSE +#define NVCXX_GPU_ENABLED #endif // This implementation was authored by David Olsen From e4e06b0854cb3b74df67b6cc6e82066676316fc8 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 00:35:03 -0700 Subject: [PATCH 087/174] partially implement SORTPAIRS with StdPar (GPU issues) --- README.stdpar | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README.stdpar b/README.stdpar index a5b81beca..9d9067e90 100644 --- a/README.stdpar +++ b/README.stdpar @@ -89,6 +89,18 @@ RAJA_Seq-default 1136.6199452543779141 0.0000000000000000000 Base_StdPar-default 1136.6199452543779141 0.0000000000000000000 Lambda_StdPar-default -6.0464819976872759102e+32 6.0464819976872759102e+32 +SORTPAIRS emplace_back not supported on GPU... + +nvlink error : Undefined reference to '_ZSt20__throw_length_errorPKc' in '../lib/libalgorithm.a:SORTPAIRS-StdPar.cpp.o' +nvlink error : Undefined reference to '_ZSt20__throw_length_errorPKc' in '../lib/libalgorithm.a:SORTPAIRS-StdPar.cpp.o' + +SORTPAIRS write out to {x,i} bad... + + Running Base_StdPar variant +terminate called after throwing an instance of 'thrust::system::system_error' + what(): for_each: failed to synchronize: cudaErrorIllegalAddress: an illegal memory access was encountered +Aborted (core dumped) + # Intel cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-unused-parameter -Wno-deprecated-volatile -tbb" -DENABLE_STDPAR=1 && make -j8 From e2fb5760242b50ae860acc3f498b4a4fc8619f57 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 00:36:12 -0700 Subject: [PATCH 088/174] bring USE_STDPAR_COLLAPSE into common header --- src/polybench/POLYBENCH_2MM-StdPar.cpp | 2 +- src/polybench/POLYBENCH_3MM-StdPar.cpp | 2 +- src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp | 2 +- src/polybench/POLYBENCH_GEMM-StdPar.cpp | 2 +- src/polybench/POLYBENCH_GEMVER-StdPar.cpp | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/polybench/POLYBENCH_2MM-StdPar.cpp b/src/polybench/POLYBENCH_2MM-StdPar.cpp index 20ad50043..e067a9842 100644 --- a/src/polybench/POLYBENCH_2MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_2MM-StdPar.cpp @@ -14,7 +14,7 @@ #include -#define USE_STDPAR_COLLAPSE 1 +//#define USE_STDPAR_COLLAPSE 1 namespace rajaperf { diff --git a/src/polybench/POLYBENCH_3MM-StdPar.cpp b/src/polybench/POLYBENCH_3MM-StdPar.cpp index ba0df5bb0..00f431291 100644 --- a/src/polybench/POLYBENCH_3MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_3MM-StdPar.cpp @@ -14,7 +14,7 @@ #include -#define USE_STDPAR_COLLAPSE 1 +//#define USE_STDPAR_COLLAPSE 1 namespace rajaperf { diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp index 0c197f2ca..c3d1e8b15 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp @@ -14,7 +14,7 @@ #include -#define USE_STDPAR_COLLAPSE 1 +//#define USE_STDPAR_COLLAPSE 1 namespace rajaperf { diff --git a/src/polybench/POLYBENCH_GEMM-StdPar.cpp b/src/polybench/POLYBENCH_GEMM-StdPar.cpp index 5d644def0..58a7f11ec 100644 --- a/src/polybench/POLYBENCH_GEMM-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMM-StdPar.cpp @@ -14,7 +14,7 @@ #include -#define USE_STDPAR_COLLAPSE 1 +//#define USE_STDPAR_COLLAPSE 1 namespace rajaperf { diff --git a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp index 7a51496f9..ed94308d8 100644 --- a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp @@ -14,7 +14,7 @@ #include -#define USE_STDPAR_COLLAPSE 1 +//#define USE_STDPAR_COLLAPSE 1 namespace rajaperf { From 6bf160064dd3c84a170200d2abcff0e2d6dbb8f3 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 01:01:01 -0700 Subject: [PATCH 089/174] add StdPar impl using std::reduce --- src/algorithm/REDUCE_SUM-StdPar.cpp | 14 ++++++-------- src/algorithm/REDUCE_SUM.cpp | 3 +++ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/algorithm/REDUCE_SUM-StdPar.cpp b/src/algorithm/REDUCE_SUM-StdPar.cpp index 08cbd206b..c2605250a 100644 --- a/src/algorithm/REDUCE_SUM-StdPar.cpp +++ b/src/algorithm/REDUCE_SUM-StdPar.cpp @@ -41,10 +41,9 @@ void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune Real_type sum = m_sum_init; -#warning needs parallel reduce - for (Index_type i = ibegin; i < iend; ++i ) { - REDUCE_SUM_BODY; - } + sum += std::reduce( std::execution::par_unseq, + x+ibegin, x+iend, + Real_type(0), std::plus() ); m_sum = sum; @@ -65,10 +64,9 @@ void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune Real_type sum = m_sum_init; -#warning needs parallel reduce - for (Index_type i = ibegin; i < iend; ++i ) { - sum += reduce_sum_base_lam(i); - } + sum += std::transform_reduce( std::execution::par_unseq, + begin, end, + Real_type(0), std::plus(), reduce_sum_base_lam); m_sum = sum; diff --git a/src/algorithm/REDUCE_SUM.cpp b/src/algorithm/REDUCE_SUM.cpp index f85b982f6..24cc657e6 100644 --- a/src/algorithm/REDUCE_SUM.cpp +++ b/src/algorithm/REDUCE_SUM.cpp @@ -51,6 +51,9 @@ REDUCE_SUM::REDUCE_SUM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + //setVariantDefined( Lambda_StdPar ); // exists but is not interesting } REDUCE_SUM::~REDUCE_SUM() From cc5bc502538bb45b5ce7aad1525bbfd1aa87369c Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 01:11:28 -0700 Subject: [PATCH 090/174] add comment why GPU disabled --- src/algorithm/SORTPAIRS-StdPar.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp index c01a5f093..6650aa2f6 100644 --- a/src/algorithm/SORTPAIRS-StdPar.cpp +++ b/src/algorithm/SORTPAIRS-StdPar.cpp @@ -50,6 +50,7 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) //for (Index_type iemp = ibegin; iemp < iend; ++iemp) { std::for_each( #ifndef NVCXX_GPU_ENABLED +// GPU implementation crashes std::execution::par_unseq, #endif begin,end, @@ -66,6 +67,7 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) //for (Index_type iemp = ibegin; iemp < iend; ++iemp) { std::for_each( #ifndef NVCXX_GPU_ENABLED +// GPU implementation crashes std::execution::par_unseq, #endif begin,end, From ac4a8f188274e952fec090b27291decef4a91ec9 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 01:11:47 -0700 Subject: [PATCH 091/174] add SCAN StdPar but wrong on GPU??? --- src/algorithm/SCAN-StdPar.cpp | 11 ++++++----- src/algorithm/SCAN.cpp | 2 ++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/algorithm/SCAN-StdPar.cpp b/src/algorithm/SCAN-StdPar.cpp index 5e6638e4b..c421c8a65 100644 --- a/src/algorithm/SCAN-StdPar.cpp +++ b/src/algorithm/SCAN-StdPar.cpp @@ -36,11 +36,12 @@ void SCAN::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { -#warning needs parallel scan - SCAN_PROLOGUE; - for (Index_type i = ibegin; i < iend; ++i ) { - SCAN_BODY; - } + std::exclusive_scan( +#ifndef NVCXX_GPU_ENABLED +// GPU implementation is wrong + std::execution::par_unseq, +#endif + x+ibegin, x+iend, y, (Real_type)0 ); } stopTimer(); diff --git a/src/algorithm/SCAN.cpp b/src/algorithm/SCAN.cpp index 7a4d9091c..d9ae2044d 100644 --- a/src/algorithm/SCAN.cpp +++ b/src/algorithm/SCAN.cpp @@ -55,6 +55,8 @@ SCAN::SCAN(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); } SCAN::~SCAN() From 252c98c93537bf52a7f01dd0d101ea63149080be Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 01:18:15 -0700 Subject: [PATCH 092/174] start working on INDEXLIST StdPar --- src/basic/INDEXLIST-StdPar.cpp | 7 ++++++- src/basic/INDEXLIST.cpp | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/basic/INDEXLIST-StdPar.cpp b/src/basic/INDEXLIST-StdPar.cpp index f2b8cb828..51f29f220 100644 --- a/src/basic/INDEXLIST-StdPar.cpp +++ b/src/basic/INDEXLIST-StdPar.cpp @@ -38,10 +38,15 @@ void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ Index_type count = 0; +#if 0 #warning needs parallel inscan for (Index_type i = ibegin; i < iend; ++i ) { - INDEXLIST_BODY; + if ( x[i] < 0.0 ) { + list[count++] = i; + } } +#else +#endif m_len = count; diff --git a/src/basic/INDEXLIST.cpp b/src/basic/INDEXLIST.cpp index df523fbf6..c369f5ad8 100644 --- a/src/basic/INDEXLIST.cpp +++ b/src/basic/INDEXLIST.cpp @@ -49,6 +49,8 @@ INDEXLIST::INDEXLIST(const RunParams& params) setVariantDefined( Base_CUDA ); setVariantDefined( Base_HIP ); + + //setVariantDefined( Base_StdPar ); } INDEXLIST::~INDEXLIST() From dd4a5e071a7209145d9eef97fd510bfd10862d1e Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 01:48:03 -0700 Subject: [PATCH 093/174] implement DAXPY_ATOMIC StdPar using a variety of atomics, because C++20 atomic_ref is not widely available --- src/basic/DAXPY_ATOMIC-StdPar.cpp | 35 ++++++++++++++++++++++++------- src/basic/DAXPY_ATOMIC.cpp | 3 +++ 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/basic/DAXPY_ATOMIC-StdPar.cpp b/src/basic/DAXPY_ATOMIC-StdPar.cpp index 911e8de6e..ba7421545 100644 --- a/src/basic/DAXPY_ATOMIC-StdPar.cpp +++ b/src/basic/DAXPY_ATOMIC-StdPar.cpp @@ -13,6 +13,12 @@ #include "common/StdParUtils.hpp" #include +#include + +#if defined(NVCXX_GPU_ENABLED) +// this is required to get NVC++ to compile CUDA atomics in StdPar +#include +#endif namespace rajaperf { @@ -41,9 +47,23 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu for (RepIndex_type irep = 0; irep < run_reps; ++irep) { std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { - DAXPY_ATOMIC_BODY; + begin, end, + [=](Index_type i) { +#if __cpp_lib_atomic_ref + auto px = std::atomic_ref(&x[i]); + auto py = std::atomic_ref(&y[i]); + py += a * px; +#elif defined(_OPENMP) + #pragma omp atomic + y[i] += a * x[i]; +#elif defined(_OPENACC) + #pragma acc atomic + y[i] += a * x[i]; +#elif defined(NVCXX_GPU_ENABLED) + atomicaddd(&y[i],a * x[i]); +#else +#error No atomic +#endif }); } @@ -55,15 +75,16 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu case Lambda_StdPar : { auto daxpy_atomic_lam = [=](Index_type i) { - DAXPY_ATOMIC_BODY; - }; + #pragma omp atomic + y[i] += a * x[i] ; + }; startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + begin, end, + [=](Index_type i) { daxpy_atomic_lam(i); }); diff --git a/src/basic/DAXPY_ATOMIC.cpp b/src/basic/DAXPY_ATOMIC.cpp index 200df93db..111010c36 100644 --- a/src/basic/DAXPY_ATOMIC.cpp +++ b/src/basic/DAXPY_ATOMIC.cpp @@ -52,6 +52,9 @@ DAXPY_ATOMIC::DAXPY_ATOMIC(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + //setVariantDefined( Lambda_StdPar ); + setVariantDefined( Kokkos_Lambda ); } From 2e4a4ef74c7e89cc13c54c0c557a928d0a0dcb99 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 02:07:14 -0700 Subject: [PATCH 094/174] use std:: not RAJA min/max --- src/basic/REDUCE3_INT-StdPar.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/basic/REDUCE3_INT-StdPar.cpp b/src/basic/REDUCE3_INT-StdPar.cpp index b2ada68e7..c1de02c72 100644 --- a/src/basic/REDUCE3_INT-StdPar.cpp +++ b/src/basic/REDUCE3_INT-StdPar.cpp @@ -44,8 +44,8 @@ void REDUCE3_INT::runStdParVariant(VariantID vid, size_t tune_idx) typedef std::array Reduce_type; Reduce_type result = std::transform_reduce( std::execution::par_unseq, - begin, end, - Reduce_type{m_vsum_init,m_vmin_init,m_vmax_init}, + begin, end, + Reduce_type{m_vsum_init,m_vmin_init,m_vmax_init}, [=](Reduce_type a, Reduce_type b) -> Reduce_type { auto plus = a[0] + b[0]; auto min = std::min(a[1],b[1]); @@ -61,8 +61,8 @@ void REDUCE3_INT::runStdParVariant(VariantID vid, size_t tune_idx) ); m_vsum += result[0]; - m_vmin = RAJA_MIN(m_vmin, result[1]); - m_vmax = RAJA_MAX(m_vmax, result[2]); + m_vmin = std::min(m_vmin, result[1]); + m_vmax = std::max(m_vmax, result[2]); } stopTimer(); @@ -85,13 +85,13 @@ void REDUCE3_INT::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type i = ibegin; i < iend; ++i ) { vsum += init3_base_lam(i); - vmin = RAJA_MIN(vmin, init3_base_lam(i)); - vmax = RAJA_MAX(vmax, init3_base_lam(i)); + vmin = std::min(vmin, init3_base_lam(i)); + vmax = std::max(vmax, init3_base_lam(i)); } m_vsum += vsum; - m_vmin = RAJA_MIN(m_vmin, vmin); - m_vmax = RAJA_MAX(m_vmax, vmax); + m_vmin = std::min(m_vmin, vmin); + m_vmax = std::max(m_vmax, vmax); } stopTimer(); From 34b83245fbd308cf683d0ee712c8ad1427a215c6 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 02:07:29 -0700 Subject: [PATCH 095/174] implement REDUCE_STRUCT Base_StdPar --- src/basic/REDUCE_STRUCT-StdPar.cpp | 52 ++++++++++++++++++++++++------ src/basic/REDUCE_STRUCT.cpp | 3 ++ 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/src/basic/REDUCE_STRUCT-StdPar.cpp b/src/basic/REDUCE_STRUCT-StdPar.cpp index ee94e542c..2be2360ad 100644 --- a/src/basic/REDUCE_STRUCT-StdPar.cpp +++ b/src/basic/REDUCE_STRUCT-StdPar.cpp @@ -45,15 +45,47 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t Real_type xmin = m_init_min; Real_type ymin = m_init_min; Real_type xmax = m_init_max; Real_type ymax = m_init_max; +#if 0 #warning needs parallel for (Index_type i = ibegin; i < iend; ++i ) { - xsum += points.x[i] ; \ - xmin = RAJA_MIN(xmin, points.x[i]) ; \ - xmax = RAJA_MAX(xmax, points.x[i]) ; \ - ysum += points.y[i] ; \ - ymin = RAJA_MIN(ymin, points.y[i]) ; \ - ymax = RAJA_MAX(ymax, points.y[i]) ; + xsum += points.x[i] ; + xmin = std::min(xmin, points.x[i]) ; + xmax = std::max(xmax, points.x[i]) ; + ysum += points.y[i] ; + ymin = std::min(ymin, points.y[i]) ; + ymax = std::max(ymax, points.y[i]) ; } +#else + using Reduce_type = std::array; + Reduce_type result = + std::transform_reduce( std::execution::par_unseq, + begin, end, + Reduce_type{ m_init_sum, m_init_min, m_init_max, // x + m_init_sum, m_init_min, m_init_max }, // y + [=](Reduce_type a, Reduce_type b) -> Reduce_type { + auto xsum = a[0] + b[0]; + auto xmin = std::min(a[1],b[1]); + auto xmax = std::max(a[2],b[2]); + auto ysum = a[3] + b[3]; + auto ymin = std::min(a[4],b[4]); + auto ymax = std::max(a[5],b[5]); + Reduce_type red{ xsum, xmin, xmax, ysum, ymin, ymax }; + return red; + }, + [=](Index_type i) -> Reduce_type { + Reduce_type val{ points.x[i], points.x[i], points.x[i], + points.y[i], points.y[i], points.y[i] }; + return val; + + } + ); +#endif + xsum = result[0]; + xmin = result[1]; + xmax = result[2]; + ysum = result[3]; + ymin = result[4]; + ymax = result[5]; points.SetCenter(xsum/(points.N), ysum/(points.N)); points.SetXMin(xmin); @@ -88,11 +120,11 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t #warning needs parallel for (Index_type i = ibegin; i < iend; ++i ) { xsum += reduce_struct_x_base_lam(i); - xmin = RAJA_MIN(xmin, reduce_struct_x_base_lam(i)); - xmax = RAJA_MAX(xmax, reduce_struct_x_base_lam(i)); + xmin = std::min(xmin, reduce_struct_x_base_lam(i)); + xmax = std::max(xmax, reduce_struct_x_base_lam(i)); ysum += reduce_struct_y_base_lam(i); - ymin = RAJA_MIN(ymin, reduce_struct_y_base_lam(i)); - ymax = RAJA_MAX(ymax, reduce_struct_y_base_lam(i)); + ymin = std::min(ymin, reduce_struct_y_base_lam(i)); + ymax = std::max(ymax, reduce_struct_y_base_lam(i)); } points.SetCenter(xsum/(points.N), ysum/(points.N)); diff --git a/src/basic/REDUCE_STRUCT.cpp b/src/basic/REDUCE_STRUCT.cpp index d5c33f906..0be2df509 100644 --- a/src/basic/REDUCE_STRUCT.cpp +++ b/src/basic/REDUCE_STRUCT.cpp @@ -56,6 +56,9 @@ REDUCE_STRUCT::REDUCE_STRUCT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + //setVariantDefined( Lambda_StdPar ); } REDUCE_STRUCT::~REDUCE_STRUCT() From de072c1d28f43a657dc0d95420e23ba4be37d328 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 02:08:57 -0700 Subject: [PATCH 096/174] remove warning --- src/basic/REDUCE_STRUCT-StdPar.cpp | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/basic/REDUCE_STRUCT-StdPar.cpp b/src/basic/REDUCE_STRUCT-StdPar.cpp index 2be2360ad..b8e15d033 100644 --- a/src/basic/REDUCE_STRUCT-StdPar.cpp +++ b/src/basic/REDUCE_STRUCT-StdPar.cpp @@ -45,17 +45,6 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t Real_type xmin = m_init_min; Real_type ymin = m_init_min; Real_type xmax = m_init_max; Real_type ymax = m_init_max; -#if 0 -#warning needs parallel - for (Index_type i = ibegin; i < iend; ++i ) { - xsum += points.x[i] ; - xmin = std::min(xmin, points.x[i]) ; - xmax = std::max(xmax, points.x[i]) ; - ysum += points.y[i] ; - ymin = std::min(ymin, points.y[i]) ; - ymax = std::max(ymax, points.y[i]) ; - } -#else using Reduce_type = std::array; Reduce_type result = std::transform_reduce( std::execution::par_unseq, @@ -79,7 +68,7 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t } ); -#endif + xsum = result[0]; xmin = result[1]; xmax = result[2]; From 7b05b04471111a457e0e4025ba06bfd8592b72fe Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 02:44:43 -0700 Subject: [PATCH 097/174] FIRST_MIN Lambda_StdPar unimplemented --- src/lcals/FIRST_MIN-OMP.cpp | 8 ++++---- src/lcals/FIRST_MIN.cpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lcals/FIRST_MIN-OMP.cpp b/src/lcals/FIRST_MIN-OMP.cpp index ef7791739..2b95528c1 100644 --- a/src/lcals/FIRST_MIN-OMP.cpp +++ b/src/lcals/FIRST_MIN-OMP.cpp @@ -36,12 +36,12 @@ void FIRST_MIN::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - #pragma omp declare reduction(minloc : MyMinLoc : \ + //#pragma omp declare reduction(minloc : MyMinLoc : \ omp_out = MinLoc_compare(omp_out, omp_in)) FIRST_MIN_MINLOC_INIT; - #pragma omp parallel for reduction(minloc:mymin) + //#pragma omp parallel for reduction(minloc:mymin) for (Index_type i = ibegin; i < iend; ++i ) { FIRST_MIN_BODY; } @@ -63,12 +63,12 @@ void FIRST_MIN::runOpenMPVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - #pragma omp declare reduction(minloc : MyMinLoc : \ + //#pragma omp declare reduction(minloc : MyMinLoc : \ omp_out = MinLoc_compare(omp_out, omp_in)) FIRST_MIN_MINLOC_INIT; - #pragma omp parallel for reduction(minloc:mymin) + //#pragma omp parallel for reduction(minloc:mymin) for (Index_type i = ibegin; i < iend; ++i ) { if ( firstmin_base_lam(i) < mymin.val ) { mymin.val = x[i]; diff --git a/src/lcals/FIRST_MIN.cpp b/src/lcals/FIRST_MIN.cpp index e8825dd17..69778e263 100644 --- a/src/lcals/FIRST_MIN.cpp +++ b/src/lcals/FIRST_MIN.cpp @@ -59,7 +59,7 @@ FIRST_MIN::FIRST_MIN(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Base_StdPar ); - setVariantDefined( Lambda_StdPar ); + //setVariantDefined( Lambda_StdPar ); } FIRST_MIN::~FIRST_MIN() From 97e534869540b34863cc6dc1ee62e48cb073c69e Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 02:46:47 -0700 Subject: [PATCH 098/174] s/RAJA_MAX/std::max/g --- src/lcals/FIRST_MIN-StdPar.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lcals/FIRST_MIN-StdPar.cpp b/src/lcals/FIRST_MIN-StdPar.cpp index ef6a11c93..10e579197 100644 --- a/src/lcals/FIRST_MIN-StdPar.cpp +++ b/src/lcals/FIRST_MIN-StdPar.cpp @@ -42,7 +42,7 @@ void FIRST_MIN::runStdParVariant(VariantID vid, size_t tune_idx) &x[ibegin], &x[iend]); auto loc = std::distance(&x[ibegin], result); - m_minloc = RAJA_MAX(m_minloc, loc); + m_minloc = std::max(m_minloc, loc); } stopTimer(); @@ -63,12 +63,12 @@ void FIRST_MIN::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type i = ibegin; i < iend; ++i ) { if ( firstmin_base_lam(i) < mymin.val ) { \ - mymin.val = x[i]; \ - mymin.loc = i; \ + mymin.val = x[i]; + mymin.loc = i; } } - m_minloc = RAJA_MAX(m_minloc, mymin.loc); + m_minloc = std::max(m_minloc, mymin.loc); } stopTimer(); From c98264d29bf2b087a6e8deb362c2066c9a7b3e06 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 13:20:10 +0300 Subject: [PATCH 099/174] CPU StdPar --- README.stdpar | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.stdpar b/README.stdpar index 9d9067e90..5efe4a24d 100644 --- a/README.stdpar +++ b/README.stdpar @@ -6,6 +6,8 @@ cmake .. -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_CXX_FLAG cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 -stdpar=multicore -tp=haswell" -DENABLE_STDPAR=1 && make -j8 +cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 --diag_supress=volatile_inc_dec_deprecated -stdpar=gpu -tp=haswell -acc" -DENABLE_STDPAR=1 && make -j8 + ## CPU ^ SORTPAIRS fails with sizefact 0.1 when _all_ kernels are run but not when SORTPAIRS or Algorithm are run by themselves From 15ed8ac6ab7503487479c08cbc5d6a0f46e0823d Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 13:40:58 +0300 Subject: [PATCH 100/174] disable atomic_ref --- src/basic/DAXPY_ATOMIC-StdPar.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/basic/DAXPY_ATOMIC-StdPar.cpp b/src/basic/DAXPY_ATOMIC-StdPar.cpp index ba7421545..a2ba4fe28 100644 --- a/src/basic/DAXPY_ATOMIC-StdPar.cpp +++ b/src/basic/DAXPY_ATOMIC-StdPar.cpp @@ -49,7 +49,7 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu std::for_each( std::execution::par_unseq, begin, end, [=](Index_type i) { -#if __cpp_lib_atomic_ref +#if 0 //__cpp_lib_atomic_ref auto px = std::atomic_ref(&x[i]); auto py = std::atomic_ref(&y[i]); py += a * px; From 0d7545077dd46bac5d615bd170ddffbab5beea44 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 13:41:09 +0300 Subject: [PATCH 101/174] CPU info --- README.stdpar | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.stdpar b/README.stdpar index 5efe4a24d..a1996134c 100644 --- a/README.stdpar +++ b/README.stdpar @@ -4,10 +4,11 @@ cmake .. -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_CXX_FLAG # NVC++ -cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 -stdpar=multicore -tp=haswell" -DENABLE_STDPAR=1 && make -j8 +cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 --diag_suppress=volatile_inc_dec_deprecated -stdpar=multicore -tp=haswell" -DENABLE_STDPAR=1 && make -j8 cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 --diag_supress=volatile_inc_dec_deprecated -stdpar=gpu -tp=haswell -acc" -DENABLE_STDPAR=1 && make -j8 + ## CPU ^ SORTPAIRS fails with sizefact 0.1 when _all_ kernels are run but not when SORTPAIRS or Algorithm are run by themselves From e25ec9918b0d79a813b9b0ec8c247b1e15b00873 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 04:49:23 -0700 Subject: [PATCH 102/174] fix no GPU StdPar in SCAN --- src/algorithm/SCAN-StdPar.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/algorithm/SCAN-StdPar.cpp b/src/algorithm/SCAN-StdPar.cpp index c421c8a65..0c99ae9d9 100644 --- a/src/algorithm/SCAN-StdPar.cpp +++ b/src/algorithm/SCAN-StdPar.cpp @@ -37,8 +37,10 @@ void SCAN::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { std::exclusive_scan( -#ifndef NVCXX_GPU_ENABLED +#ifdef NVCXX_GPU_ENABLED // GPU implementation is wrong + std::execution::seq, +#else std::execution::par_unseq, #endif x+ibegin, x+iend, y, (Real_type)0 ); From f8425554eb699abb6d4511008a9842b417f85f33 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 12 Jul 2022 04:49:43 -0700 Subject: [PATCH 103/174] Lambda_StdPar HEAT_3D added --- src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp | 36 ++++++++++++++-------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp index 8f9e1bc54..24d0b9afd 100644 --- a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp @@ -93,21 +93,33 @@ void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type t = 0; t < tsteps; ++t) { - for (Index_type i = 1; i < N-1; ++i ) { - for (Index_type j = 1; j < N-1; ++j ) { - for (Index_type k = 1; k < N-1; ++k ) { + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type j) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type k) { poly_heat3d_base_lam1(i, j, k); - } - } - } + }); + }); + }); - for (Index_type i = 1; i < N-1; ++i ) { - for (Index_type j = 1; j < N-1; ++j ) { - for (Index_type k = 1; k < N-1; ++k ) { + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type j) { + std::for_each( std::execution::unseq, + begin, end, + [=](Index_type k) { poly_heat3d_base_lam2(i, j, k); - } - } - } + }); + }); + }); } From 7a96a085dff8e6d2bc259da26b5b06c2a3bcfaca Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 31 Aug 2022 11:57:12 +0300 Subject: [PATCH 104/174] fix correctness issues caused by emplace_back in parallel Signed-off-by: Jeff Hammond --- src/algorithm/SORTPAIRS-StdPar.cpp | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp index 6650aa2f6..844ca6e7b 100644 --- a/src/algorithm/SORTPAIRS-StdPar.cpp +++ b/src/algorithm/SORTPAIRS-StdPar.cpp @@ -45,18 +45,25 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) using pair_type = std::pair; std::vector vector_of_pairs; + +#if 0 vector_of_pairs.reserve(iend-ibegin); //for (Index_type iemp = ibegin; iemp < iend; ++iemp) { - std::for_each( -#ifndef NVCXX_GPU_ENABLED -// GPU implementation crashes - std::execution::par_unseq, -#endif + std::for_each( //std::execution::par, // parallelism leads to incorrectness begin,end, [=,&vector_of_pairs](Index_type iemp) noexcept { vector_of_pairs.emplace_back(x[iend*irep + iemp], i[iend*irep + iemp]); }); +#else + vector_of_pairs.resize(iend-ibegin); + + std::for_each( std::execution::par_unseq, + begin,end, + [=,&vector_of_pairs](Index_type iemp) noexcept { + vector_of_pairs[iemp] = std::make_pair(x[iend*irep + iemp], i[iend*irep + iemp]); + }); +#endif std::sort( std::execution::par_unseq, vector_of_pairs.begin(), vector_of_pairs.end(), @@ -65,11 +72,7 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) }); //for (Index_type iemp = ibegin; iemp < iend; ++iemp) { - std::for_each( -#ifndef NVCXX_GPU_ENABLED -// GPU implementation crashes - std::execution::par_unseq, -#endif + std::for_each( std::execution::par_unseq, begin,end, [=](Index_type iemp) { const pair_type &pair = vector_of_pairs[iemp - ibegin]; From a38b1909c4fd16681237fcec6ec2a7f671c4cb4f Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 31 Aug 2022 12:58:53 +0300 Subject: [PATCH 105/174] deal with exception mess --- src/apps/HALOEXCHANGE-StdPar.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/apps/HALOEXCHANGE-StdPar.cpp b/src/apps/HALOEXCHANGE-StdPar.cpp index 8a7f36032..690d0e4f7 100644 --- a/src/apps/HALOEXCHANGE-StdPar.cpp +++ b/src/apps/HALOEXCHANGE-StdPar.cpp @@ -11,6 +11,9 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" +#if defined(__NVCOMPILER_CUDA__) || defined(_NVHPC_STDPAR_CUDA) +static inline void std::__throw_bad_array_new_length() { std::abort(); } +#endif #include @@ -40,7 +43,7 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) std::for_each( std::execution::par_unseq, begin, end, - [=](Index_type l) { + [=](Index_type l) noexcept { Real_ptr buffer = buffers[l]; Int_ptr list = pack_index_lists[l]; Index_type len = pack_index_list_lengths[l]; @@ -55,7 +58,7 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) std::for_each( std::execution::par_unseq, begin, end, - [=](Index_type l) { + [=](Index_type l) noexcept { Real_ptr buffer = buffers[l]; Int_ptr list = unpack_index_lists[l]; Index_type len = unpack_index_list_lengths[l]; @@ -81,7 +84,7 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) std::for_each( std::execution::par_unseq, begin, end, - [=](Index_type l) { + [=](Index_type l) noexcept { Real_ptr buffer = buffers[l]; Int_ptr list = pack_index_lists[l]; Index_type len = pack_index_list_lengths[l]; @@ -99,7 +102,7 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) std::for_each( std::execution::par_unseq, begin, end, - [=](Index_type l) { + [=](Index_type l) noexcept { Real_ptr buffer = buffers[l]; Int_ptr list = unpack_index_lists[l]; Index_type len = unpack_index_list_lengths[l]; From 7b4039ce2425a69a3ed1f3f8112c0293b7fbf2ec Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 31 Aug 2022 12:59:11 +0300 Subject: [PATCH 106/174] fix emplace_back and deal with exceptions --- src/algorithm/SORTPAIRS-StdPar.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp index 844ca6e7b..00892e15f 100644 --- a/src/algorithm/SORTPAIRS-StdPar.cpp +++ b/src/algorithm/SORTPAIRS-StdPar.cpp @@ -11,6 +11,9 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" +#if defined(__NVCOMPILER_CUDA__) || defined(_NVHPC_STDPAR_CUDA) +static inline void std::__throw_bad_array_new_length() { std::abort(); } +#endif #include #include @@ -49,7 +52,6 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) #if 0 vector_of_pairs.reserve(iend-ibegin); - //for (Index_type iemp = ibegin; iemp < iend; ++iemp) { std::for_each( //std::execution::par, // parallelism leads to incorrectness begin,end, [=,&vector_of_pairs](Index_type iemp) noexcept { @@ -58,24 +60,25 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) #else vector_of_pairs.resize(iend-ibegin); + auto p = vector_of_pairs.data(); std::for_each( std::execution::par_unseq, begin,end, - [=,&vector_of_pairs](Index_type iemp) noexcept { - vector_of_pairs[iemp] = std::make_pair(x[iend*irep + iemp], i[iend*irep + iemp]); + [=](Index_type iemp) noexcept { + p[iemp] = std::make_pair(x[iend*irep + iemp], i[iend*irep + iemp]); }); #endif std::sort( std::execution::par_unseq, vector_of_pairs.begin(), vector_of_pairs.end(), - [](pair_type const& lhs, pair_type const& rhs) { + [](pair_type const& lhs, pair_type const& rhs) noexcept { return lhs.first < rhs.first; }); - //for (Index_type iemp = ibegin; iemp < iend; ++iemp) { std::for_each( std::execution::par_unseq, begin,end, - [=](Index_type iemp) { - const pair_type &pair = vector_of_pairs[iemp - ibegin]; + [=](Index_type iemp) noexcept { + //const pair_type &pair = vector_of_pairs[iemp - ibegin]; + const pair_type &pair = p[iemp - ibegin]; x[iend*irep + iemp] = pair.first; i[iend*irep + iemp] = pair.second; }); From 8f7e804282d3148934a6e0329e27bf4c83ef8d94 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 31 Aug 2022 13:24:24 +0300 Subject: [PATCH 107/174] remove listed issues that have been fixed --- README.stdpar | 48 +++++++++++++++++------------------------------- 1 file changed, 17 insertions(+), 31 deletions(-) diff --git a/README.stdpar b/README.stdpar index a1996134c..392bfa9ad 100644 --- a/README.stdpar +++ b/README.stdpar @@ -4,15 +4,29 @@ cmake .. -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_CXX_FLAG # NVC++ -cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 --diag_suppress=volatile_inc_dec_deprecated -stdpar=multicore -tp=haswell" -DENABLE_STDPAR=1 && make -j8 +## Patches + +$ diff /opt/nvidia/hpc_sdk/Linux_x86_64/22.[35]/compilers/include/nvhpc/algorithm_execution.hpp +1066c1066 +< _ASSERT_RANDOM_ACCESS(_FIt); +--- +> //_ASSERT_RANDOM_ACCESS(_FIt); + +$ diff /opt/nvidia/hpc_sdk/Linux_x86_64/22.[35]/compilers/include/nvhpc/numeric_execution.hpp +386c386 +< _ASSERT_RANDOM_ACCESS(_FIt); +--- +> //_ASSERT_RANDOM_ACCESS(_FIt); + +## OpenMP/OpenACC for atomics + +cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 --diag_suppress=volatile_inc_dec_deprecated -stdpar=multicore -acc=multicore -mp=multicore -tp=haswell" -DENABLE_STDPAR=1 && make -j8 cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 --diag_supress=volatile_inc_dec_deprecated -stdpar=gpu -tp=haswell -acc" -DENABLE_STDPAR=1 && make -j8 ## CPU -^ SORTPAIRS fails with sizefact 0.1 when _all_ kernels are run but not when SORTPAIRS or Algorithm are run by themselves - Just disable the lambda one I guess... ------------------------------------------------------- @@ -65,22 +79,6 @@ RAJA_Seq-default 268294.10758353886195 1.5483237802982330322e-08 ## GPU -Just disable parallel execution here... - -[ 99%] Linking CXX executable ../bin/raja-perf.exe -nvlink error : Undefined reference to '_ZSt28__throw_bad_array_new_lengthv' in '../lib/libapps.a:HALOEXCHANGE-StdPar.cpp.o' -pgacclnk: child process exit status 2: /opt/nvidia/hpc_sdk/Linux_x86_64/22.5/compilers/bin/tools/nvdd -make[2]: *** [src/CMakeFiles/raja-perf.exe.dir/build.make:109: bin/raja-perf.exe] Error 2 -make[1]: *** [CMakeFiles/Makefile2:1393: src/CMakeFiles/raja-perf.exe.dir/all] Error 2 -make[1]: *** Waiting for unfinished jobs.... - -PI_ATOMIC is fixed by allocating on the heap... - - Running Base_StdPar variant -terminate called after throwing an instance of 'thrust::system::system_error' - what(): for_each: failed to synchronize: cudaErrorIllegalAddress: an illegal memory access was encountered -Aborted (core dumped) - Lambda_Seq has the bug too so just disable the Lambda versions... ------------------------------------------------------- @@ -92,18 +90,6 @@ RAJA_Seq-default 1136.6199452543779141 0.0000000000000000000 Base_StdPar-default 1136.6199452543779141 0.0000000000000000000 Lambda_StdPar-default -6.0464819976872759102e+32 6.0464819976872759102e+32 -SORTPAIRS emplace_back not supported on GPU... - -nvlink error : Undefined reference to '_ZSt20__throw_length_errorPKc' in '../lib/libalgorithm.a:SORTPAIRS-StdPar.cpp.o' -nvlink error : Undefined reference to '_ZSt20__throw_length_errorPKc' in '../lib/libalgorithm.a:SORTPAIRS-StdPar.cpp.o' - -SORTPAIRS write out to {x,i} bad... - - Running Base_StdPar variant -terminate called after throwing an instance of 'thrust::system::system_error' - what(): for_each: failed to synchronize: cudaErrorIllegalAddress: an illegal memory access was encountered -Aborted (core dumped) - # Intel cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-unused-parameter -Wno-deprecated-volatile -tbb" -DENABLE_STDPAR=1 && make -j8 From d7716e58e128821cf9a9d55c650ead1431327621 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 31 Aug 2022 13:58:00 +0300 Subject: [PATCH 108/174] no clue what to do with these --- src/basic/INDEXLIST-StdPar.cpp | 8 ++--- src/basic/INDEXLIST_3LOOP-StdPar.cpp | 52 +++++++++++++++++++--------- 2 files changed, 40 insertions(+), 20 deletions(-) diff --git a/src/basic/INDEXLIST-StdPar.cpp b/src/basic/INDEXLIST-StdPar.cpp index 51f29f220..1f0bf5cd3 100644 --- a/src/basic/INDEXLIST-StdPar.cpp +++ b/src/basic/INDEXLIST-StdPar.cpp @@ -27,6 +27,9 @@ void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + INDEXLIST_DATA_SETUP; switch ( vid ) { @@ -38,15 +41,12 @@ void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ Index_type count = 0; -#if 0 -#warning needs parallel inscan +#warning needs parallel something for (Index_type i = ibegin; i < iend; ++i ) { if ( x[i] < 0.0 ) { list[count++] = i; } } -#else -#endif m_len = count; diff --git a/src/basic/INDEXLIST_3LOOP-StdPar.cpp b/src/basic/INDEXLIST_3LOOP-StdPar.cpp index f07bdd583..475623ef0 100644 --- a/src/basic/INDEXLIST_3LOOP-StdPar.cpp +++ b/src/basic/INDEXLIST_3LOOP-StdPar.cpp @@ -48,24 +48,34 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { -#warning needs parallel for - for (Index_type i = ibegin; i < iend; ++i ) { + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { counts[i] = (INDEXLIST_3LOOP_CONDITIONAL) ? 1 : 0; - } + }); +#if 0 Index_type count = 0; - -#warning needs parallel scan + for (Index_type i = ibegin; i < iend+1; ++i ) { Index_type inc = counts[i]; counts[i] = count; count += inc; } +#else + // The validation does not notice if the exscan + // is removed, or otherwise forced to be wrong... +#warning This may be incorrect... + std::exclusive_scan( std::execution::par_unseq, + counts+ibegin, counts+iend+1, + counts+ibegin, 0); +#endif -#warning needs parallel for - for (Index_type i = ibegin; i < iend; ++i ) { + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { INDEXLIST_3LOOP_MAKE_LIST; - } + }); m_len = counts[iend]; @@ -92,24 +102,34 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { -#warning needs parallel for - for (Index_type i = ibegin; i < iend; ++i ) { + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { indexlist_conditional_lam(i); - } + }); +#if 0 Index_type count = 0; - -#warning needs parallel scan + for (Index_type i = ibegin; i < iend+1; ++i ) { Index_type inc = counts[i]; counts[i] = count; count += inc; } +#else + // The validation does not notice if the exscan + // is removed, or otherwise forced to be wrong... +#warning This may be incorrect... + std::exclusive_scan( std::execution::par_unseq, + counts+ibegin, counts+iend+1, + counts+ibegin, 0); +#endif -#warning needs parallel for - for (Index_type i = ibegin; i < iend; ++i ) { + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type i) { indexlist_make_list_lam(i); - } + }); m_len = counts[iend]; From ea0feffb751b1c9730a240f06743f01266d35cd4 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 31 Aug 2022 14:27:01 +0300 Subject: [PATCH 109/174] code compiles but is wrong, like Base --- src/basic/PI_ATOMIC-StdPar.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/basic/PI_ATOMIC-StdPar.cpp b/src/basic/PI_ATOMIC-StdPar.cpp index fe7eeb599..ffb56f016 100644 --- a/src/basic/PI_ATOMIC-StdPar.cpp +++ b/src/basic/PI_ATOMIC-StdPar.cpp @@ -65,12 +65,11 @@ void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#if 0 case Lambda_StdPar : { auto piatomic_base_lam = [=](Index_type i, myAtomic * a_pi) { double x = (double(i) + 0.5) * dx; - a_pi = a_pi + dx / (1.0 + x * x); + *a_pi = *a_pi + dx / (1.0 + x * x); }; startTimer(); @@ -91,7 +90,6 @@ void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) break; } -#endif default : { getCout() << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; From aea0bca791e6a2c368ee67f6ad50fb46e62e7de0 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 31 Aug 2022 14:27:40 +0300 Subject: [PATCH 110/174] code compiles but is wrong, like Base --- src/basic/PI_ATOMIC.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/basic/PI_ATOMIC.cpp b/src/basic/PI_ATOMIC.cpp index 35ee2d502..6a15d4784 100644 --- a/src/basic/PI_ATOMIC.cpp +++ b/src/basic/PI_ATOMIC.cpp @@ -55,7 +55,7 @@ PI_ATOMIC::PI_ATOMIC(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Base_StdPar ); - //setVariantDefined( Lambda_StdPar ); + setVariantDefined( Lambda_StdPar ); setVariantDefined( Kokkos_Lambda ); } From 69ce57cd8257052e4e8b7db877581c5accfd4f0d Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 11 Oct 2022 05:14:04 -0700 Subject: [PATCH 111/174] change output --- src/common/Executor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/Executor.cpp b/src/common/Executor.cpp index f3ee040ff..cadda6c54 100644 --- a/src/common/Executor.cpp +++ b/src/common/Executor.cpp @@ -1069,10 +1069,10 @@ void Executor::writeCSVReport(ostream& file, CSVRepMode mode, if ( (mode == CSVRepMode::Speedup) && (!kern->hasVariantTuningDefined(reference_vid, reference_tune_idx) || !kern->hasVariantTuningDefined(vid, tuning_name)) ) { - file << "Not run"; + file << "NotRun"; } else if ( (mode == CSVRepMode::Timing) && !kern->hasVariantTuningDefined(vid, tuning_name) ) { - file << "Not run"; + file << "NotRun"; } else { file << setprecision(prec) << std::fixed << getReportDataEntry(mode, combiner, kern, vid, From e4248d6cad2f8f33a4efb087dd878bc41d97b6be Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 11 Oct 2022 05:14:14 -0700 Subject: [PATCH 112/174] addd comment --- src/apps/DIFFUSION3DPA-StdPar.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/apps/DIFFUSION3DPA-StdPar.cpp b/src/apps/DIFFUSION3DPA-StdPar.cpp index ae21b8df0..bc6837bfa 100644 --- a/src/apps/DIFFUSION3DPA-StdPar.cpp +++ b/src/apps/DIFFUSION3DPA-StdPar.cpp @@ -17,6 +17,9 @@ #include +// This is used below, which is bad for GPU +//#define CPU_FOREACH(i, k, N) for (int i = 0; i < N; i++) + namespace rajaperf { namespace apps { From db398c2fa3d4f8be2d394acd94792ad5235d4375 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 11 Oct 2022 06:17:27 -0700 Subject: [PATCH 113/174] collapse in StdPar is really important for GPU --- src/apps/LTIMES-StdPar.cpp | 37 ++++++++++++++---- src/apps/LTIMES_NOVIEW-StdPar.cpp | 49 ++++++++++++++++------- src/lcals/HYDRO_2D-StdPar.cpp | 65 ++++++++++++++++++++++++++++++- 3 files changed, 128 insertions(+), 23 deletions(-) diff --git a/src/apps/LTIMES-StdPar.cpp b/src/apps/LTIMES-StdPar.cpp index aab2c55bc..1d6c2bc58 100644 --- a/src/apps/LTIMES-StdPar.cpp +++ b/src/apps/LTIMES-StdPar.cpp @@ -28,8 +28,13 @@ void LTIMES::runStdParVariant(VariantID vid, size_t tune_idx) LTIMES_DATA_SETUP; +#ifdef USE_STDPAR_COLLAPSE + auto begin = counting_iterator(0); + auto end = counting_iterator(num_z*num_g*num_m); +#else auto begin = counting_iterator(0); auto end = counting_iterator(num_z); +#endif switch ( vid ) { @@ -38,16 +43,24 @@ void LTIMES::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type zgm) { + const auto z = zgm / (num_g*num_m); + const auto gm = zgm % (num_g*num_m); + const auto g = gm / num_m; + const auto m = gm % num_m; +#else std::for_each( std::execution::par_unseq, begin, end, [=](Index_type z) { - for (Index_type g = 0; g < num_g; ++g ) { - for (Index_type m = 0; m < num_m; ++m ) { + for (Index_type g = 0; g < num_g; ++g ) + for (Index_type m = 0; m < num_m; ++m ) +#endif for (Index_type d = 0; d < num_d; ++d ) { LTIMES_BODY; } - } - } }); } @@ -66,16 +79,24 @@ void LTIMES::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type zgm) { + const auto z = zgm / (num_g*num_m); + const auto gm = zgm % (num_g*num_m); + const auto g = gm / num_m; + const auto m = gm % num_m; +#else std::for_each( std::execution::par_unseq, begin, end, [=](Index_type z) { - for (Index_type g = 0; g < num_g; ++g ) { - for (Index_type m = 0; m < num_m; ++m ) { + for (Index_type g = 0; g < num_g; ++g ) + for (Index_type m = 0; m < num_m; ++m ) +#endif for (Index_type d = 0; d < num_d; ++d ) { ltimes_base_lam(d, z, g, m); } - } - } }); } diff --git a/src/apps/LTIMES_NOVIEW-StdPar.cpp b/src/apps/LTIMES_NOVIEW-StdPar.cpp index d35e03a3d..067067084 100644 --- a/src/apps/LTIMES_NOVIEW-StdPar.cpp +++ b/src/apps/LTIMES_NOVIEW-StdPar.cpp @@ -27,14 +27,14 @@ void LTIMES_NOVIEW::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type run_reps = getRunReps(); LTIMES_NOVIEW_DATA_SETUP; - + +#ifdef USE_STDPAR_COLLAPSE + auto begin = counting_iterator(0); + auto end = counting_iterator(num_z*num_g*num_m); +#else auto begin = counting_iterator(0); auto end = counting_iterator(num_z); - - auto ltimesnoview_lam = [=](Index_type d, Index_type z, - Index_type g, Index_type m) { - LTIMES_NOVIEW_BODY; - }; +#endif switch ( vid ) { @@ -43,16 +43,24 @@ void LTIMES_NOVIEW::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type zgm) { + const auto z = zgm / (num_g*num_m); + const auto gm = zgm % (num_g*num_m); + const auto g = gm / num_m; + const auto m = gm % num_m; +#else std::for_each( std::execution::par_unseq, begin, end, [=](Index_type z) { - for (Index_type g = 0; g < num_g; ++g ) { - for (Index_type m = 0; m < num_m; ++m ) { + for (Index_type g = 0; g < num_g; ++g ) + for (Index_type m = 0; m < num_m; ++m ) +#endif for (Index_type d = 0; d < num_d; ++d ) { LTIMES_NOVIEW_BODY; } - } - } }); } @@ -63,19 +71,32 @@ void LTIMES_NOVIEW::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { + auto ltimesnoview_lam = [=](Index_type d, Index_type z, + Index_type g, Index_type m) { + LTIMES_NOVIEW_BODY; + }; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type zgm) { + const auto z = zgm / (num_g*num_m); + const auto gm = zgm % (num_g*num_m); + const auto g = gm / num_m; + const auto m = gm % num_m; +#else std::for_each( std::execution::par_unseq, begin, end, [=](Index_type z) { - for (Index_type g = 0; g < num_g; ++g ) { - for (Index_type m = 0; m < num_m; ++m ) { + for (Index_type g = 0; g < num_g; ++g ) + for (Index_type m = 0; m < num_m; ++m ) +#endif for (Index_type d = 0; d < num_d; ++d ) { ltimesnoview_lam(d, z, g, m); } - } - } }); } diff --git a/src/lcals/HYDRO_2D-StdPar.cpp b/src/lcals/HYDRO_2D-StdPar.cpp index 8b8ff5c66..e779012e6 100644 --- a/src/lcals/HYDRO_2D-StdPar.cpp +++ b/src/lcals/HYDRO_2D-StdPar.cpp @@ -19,7 +19,6 @@ namespace rajaperf namespace lcals { - void HYDRO_2D::runStdParVariant(VariantID vid, size_t tune_idx) { #if defined(RUN_STDPAR) @@ -30,10 +29,19 @@ void HYDRO_2D::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type jbeg = 1; const Index_type jend = m_jn - 1; +#ifdef USE_STDPAR_COLLAPSE + // this is going to run from [(0,0),..] + // we will add (1,1) later + const auto nk = kend-1; + const auto nj = jend-1; + auto begin = counting_iterator(0); + auto end = counting_iterator(nk*nj); +#else auto beginK = counting_iterator(kbeg); auto endK = counting_iterator(kend); auto beginJ = counting_iterator(jbeg); auto endJ = counting_iterator(jend); +#endif HYDRO_2D_DATA_SETUP; @@ -44,34 +52,62 @@ void HYDRO_2D::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type kj) { + const auto k = 1 + kj / nj; + const auto j = 1 + kj % nj; +#else std::for_each( std::execution::par, beginK, endK, [=](Index_type k) { std::for_each( std::execution::unseq, beginJ, endJ, [=](Index_type j) { +#endif + //std::cerr << "JEFF: " << k << "," << j << "\n"; HYDRO_2D_BODY1; +#ifndef USE_STDPAR_COLLAPSE }); +#endif }); +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type kj) { + const auto k = 1 + kj / nj; + const auto j = 1 + kj % nj; +#else std::for_each( std::execution::par, beginK, endK, [=](Index_type k) { std::for_each( std::execution::unseq, beginJ, endJ, [=](Index_type j) { +#endif HYDRO_2D_BODY2; +#ifndef USE_STDPAR_COLLAPSE }); +#endif }); +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type kj) { + const auto k = 1 + kj / nj; + const auto j = 1 + kj % nj; +#else std::for_each( std::execution::par, beginK, endK, [=](Index_type k) { std::for_each( std::execution::unseq, beginJ, endJ, [=](Index_type j) { +#endif HYDRO_2D_BODY3; +#ifndef USE_STDPAR_COLLAPSE }); +#endif }); } @@ -95,34 +131,61 @@ void HYDRO_2D::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type kj) { + const auto k = 1 + kj / nj; + const auto j = 1 + kj % nj; +#else std::for_each( std::execution::par, beginK, endK, [=](Index_type k) { std::for_each( std::execution::unseq, beginJ, endJ, [=](Index_type j) { +#endif hydro2d_base_lam1(k, j); +#ifndef USE_STDPAR_COLLAPSE }); +#endif }); +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type kj) { + const auto k = 1 + kj / nj; + const auto j = 1 + kj % nj; +#else std::for_each( std::execution::par, beginK, endK, [=](Index_type k) { std::for_each( std::execution::unseq, beginJ, endJ, [=](Index_type j) { +#endif hydro2d_base_lam2(k, j); +#ifndef USE_STDPAR_COLLAPSE }); +#endif }); +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, [=](Index_type kj) { + const auto k = 1 + kj / nj; + const auto j = 1 + kj % nj; +#else std::for_each( std::execution::par, beginK, endK, [=](Index_type k) { std::for_each( std::execution::unseq, beginJ, endJ, [=](Index_type j) { +#endif hydro2d_base_lam3(k, j); +#ifndef USE_STDPAR_COLLAPSE }); +#endif }); } From 274ad60e37a2c1edaac0a56deb3329048329db21 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 11 Oct 2022 06:43:05 -0700 Subject: [PATCH 114/174] collapse in StdPar is really important for GPU --- src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp | 54 ++++++++++++++++++++ src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp | 46 +++++++++++++++++ 2 files changed, 100 insertions(+) diff --git a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp index 24d0b9afd..f33db4493 100644 --- a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp @@ -27,8 +27,14 @@ void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) POLYBENCH_HEAT_3D_DATA_SETUP; +#ifdef USE_STDPAR_COLLAPSE + const auto nn = N-2; + counting_iterator begin(0); + counting_iterator end(nn*nn*nn); +#else counting_iterator begin(1); counting_iterator end(N-1); +#endif switch ( vid ) { @@ -39,6 +45,15 @@ void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type t = 0; t < tsteps; ++t) { +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type ijk) { + const auto i = 1 + ijk / (nn*nn); + const auto jk = ijk % (nn*nn); + const auto j = 1 + jk / nn; + const auto k = 1 + jk % nn; +#else std::for_each( std::execution::par_unseq, begin, end, [=](Index_type i) { @@ -48,11 +63,23 @@ void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) std::for_each( std::execution::unseq, begin, end, [=](Index_type k) { +#endif POLYBENCH_HEAT_3D_BODY1; +#ifndef USE_STDPAR_COLLAPSE }); }); +#endif }); +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type ijk) { + const auto i = 1 + ijk / (nn*nn); + const auto jk = ijk % (nn*nn); + const auto j = 1 + jk / nn; + const auto k = 1 + jk % nn; +#else std::for_each( std::execution::par_unseq, begin, end, [=](Index_type i) { @@ -62,9 +89,12 @@ void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) std::for_each( std::execution::unseq, begin, end, [=](Index_type k) { +#endif POLYBENCH_HEAT_3D_BODY2; +#ifndef USE_STDPAR_COLLAPSE }); }); +#endif }); } @@ -93,6 +123,15 @@ void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type t = 0; t < tsteps; ++t) { +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type ijk) { + const auto i = 1 + ijk / (nn*nn); + const auto jk = ijk % (nn*nn); + const auto j = 1 + jk / nn; + const auto k = 1 + jk % nn; +#else std::for_each( std::execution::par_unseq, begin, end, [=](Index_type i) { @@ -102,11 +141,23 @@ void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) std::for_each( std::execution::unseq, begin, end, [=](Index_type k) { +#endif poly_heat3d_base_lam1(i, j, k); +#ifndef USE_STDPAR_COLLAPSE }); }); +#endif }); +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type ijk) { + const auto i = 1 + ijk / (nn*nn); + const auto jk = ijk % (nn*nn); + const auto j = 1 + jk / nn; + const auto k = 1 + jk % nn; +#else std::for_each( std::execution::par_unseq, begin, end, [=](Index_type i) { @@ -116,9 +167,12 @@ void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) std::for_each( std::execution::unseq, begin, end, [=](Index_type k) { +#endif poly_heat3d_base_lam2(i, j, k); +#ifndef USE_STDPAR_COLLAPSE }); }); +#endif }); } diff --git a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp index b5c4ace75..445024820 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp @@ -27,8 +27,14 @@ void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) POLYBENCH_JACOBI_2D_DATA_SETUP; +#ifdef USE_STDPAR_COLLAPSE + const auto nn = N-2; + counting_iterator begin(0); + counting_iterator end(nn*nn); +#else counting_iterator begin(1); counting_iterator end(N-1); +#endif switch ( vid ) { @@ -39,23 +45,43 @@ void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type t = 0; t < tsteps; ++t) { +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type ij) { + const auto i = 1 + ij / nn; + const auto j = 1 + ij % nn; +#else std::for_each( std::execution::par_unseq, begin, end, [=](Index_type i) { std::for_each( std::execution::unseq, begin, end, [=](Index_type j) { +#endif POLYBENCH_JACOBI_2D_BODY1; +#ifndef USE_STDPAR_COLLAPSE }); +#endif }); +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type ij) { + const auto i = 1 + ij / nn; + const auto j = 1 + ij % nn; +#else std::for_each( std::execution::par_unseq, begin, end, [=](Index_type i) { std::for_each( std::execution::unseq, begin, end, [=](Index_type j) { +#endif POLYBENCH_JACOBI_2D_BODY2; +#ifndef USE_STDPAR_COLLAPSE }); +#endif }); } @@ -82,24 +108,44 @@ void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type t = 0; t < tsteps; ++t) { +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type ij) { + const auto i = 1 + ij / nn; + const auto j = 1 + ij % nn; +#else std::for_each( std::execution::par_unseq, begin, end, [=](Index_type i) { std::for_each( std::execution::unseq, begin, end, [=](Index_type j) { +#endif poly_jacobi2d_base_lam1(i, j); +#ifndef USE_STDPAR_COLLAPSE }); +#endif }); +#ifdef USE_STDPAR_COLLAPSE + std::for_each( std::execution::par_unseq, + begin, end, + [=](Index_type ij) { + const auto i = 1 + ij / nn; + const auto j = 1 + ij % nn; +#else std::for_each( std::execution::par_unseq, begin, end, [=](Index_type i) { std::for_each( std::execution::unseq, begin, end, [=](Index_type j) { +#endif poly_jacobi2d_base_lam2(i, j); +#ifndef USE_STDPAR_COLLAPSE }); +#endif }); } From 076abfbb53cb18010cf1d0b0d0f3510f3aadb713 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 11 Oct 2022 07:15:54 -0700 Subject: [PATCH 115/174] fixed lambda collapse - need to do the base version and cleanup --- src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp | 59 +++++++++++++++------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp index 4742b76f9..8f44e2812 100644 --- a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp @@ -54,6 +54,7 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) begin1X, end1X, [=](Index_type i) { for (Index_type j = 0; j < ny; j++) { + //std::cerr << "B2: " << i << "," << j << "\n"; POLYBENCH_FDTD_2D_BODY2; } }); @@ -61,6 +62,7 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) beginX, endX, [=](Index_type i) { for (Index_type j = 1; j < ny; j++) { + //std::cerr << "B3: " << i << "," << j << "\n"; POLYBENCH_FDTD_2D_BODY3; } }); @@ -68,6 +70,7 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) beginXm1, endXm1, [=](Index_type i) { for (Index_type j = 0; j < ny - 1; j++) { + //std::cerr << "B4: " << i << "," << j << "\n"; POLYBENCH_FDTD_2D_BODY4; } }); @@ -87,7 +90,7 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) // scalar variable 't' used in it is updated for each // t-loop iteration. // - auto poly_fdtd2d_base_lam1 = [&](Index_type j) { + auto poly_fdtd2d_base_lam1 = [=](Index_type j) { POLYBENCH_FDTD_2D_BODY1; }; auto poly_fdtd2d_base_lam2 = [=](Index_type i, Index_type j) { @@ -105,31 +108,53 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) for (t = 0; t < tsteps; ++t) { - std::for_each( //std::execution::par_unseq, + //for (Index_type j = 0; j < ny; j++) { + std::for_each( std::execution::par_unseq, beginY, endY, [=](Index_type j) { poly_fdtd2d_base_lam1(j); }); - std::for_each( //std::execution::par_unseq, - begin1X, end1X, - [=](Index_type i) { - for (Index_type j = 0; j < ny; j++) { + + counting_iterator begin2(0); + counting_iterator end2((nx-1)*ny); + + //for (Index_type i = 1; i < nx; i++) { + // for (Index_type j = 0; j < ny; j++) { + std::for_each( std::execution::par_unseq, + begin2, end2, + [=](Index_type ij) { + const auto i = 1 + ij / ny; + const auto j = ij % ny; + //std::cerr << "L2: " << i << "," << j << "\n"; poly_fdtd2d_base_lam2(i, j); - } }); - std::for_each( //std::execution::par_unseq, - beginX, endX, - [=](Index_type i) { - for (Index_type j = 1; j < ny; j++) { + + counting_iterator begin3(0); + counting_iterator end3(nx*(ny-1)); + + //for (Index_type i = 0; i < nx; i++) { + // for (Index_type j = 1; j < ny; j++) { + std::for_each( std::execution::par_unseq, + begin3, end3, + [=](Index_type ij) { + const auto i = ij / (ny-1); + const auto j = 1 + ij % (ny-1); + //std::cerr << "L3: " << i << "," << j << "\n"; poly_fdtd2d_base_lam3(i, j); - } }); - std::for_each( //std::execution::par_unseq, - beginXm1, endXm1, - [=](Index_type i) { - for (Index_type j = 0; j < ny - 1; j++) { + + counting_iterator begin4(0); + counting_iterator end4((nx-1)*(ny-1)); + + //for (Index_type i = 0; i < nx - 1; i++) { + // for (Index_type j = 0; j < ny - 1; j++) { + std::for_each( std::execution::par_unseq, + begin4, end4, + [=](Index_type ij) { + const auto i = ij / (ny-1); + const auto j = ij % (ny-1); + //std::cerr << "L4: " << i << "," << j << "\n"; poly_fdtd2d_base_lam4(i, j); - } }); } // tstep loop From a884cd16c854f8d43c66fe4d2342534517eecb7d Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 12 Oct 2022 09:01:14 +0300 Subject: [PATCH 116/174] fix atomic_ref --- src/basic/DAXPY_ATOMIC-StdPar.cpp | 34 ++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/src/basic/DAXPY_ATOMIC-StdPar.cpp b/src/basic/DAXPY_ATOMIC-StdPar.cpp index a2ba4fe28..4e4e0f75b 100644 --- a/src/basic/DAXPY_ATOMIC-StdPar.cpp +++ b/src/basic/DAXPY_ATOMIC-StdPar.cpp @@ -15,11 +15,6 @@ #include #include -#if defined(NVCXX_GPU_ENABLED) -// this is required to get NVC++ to compile CUDA atomics in StdPar -#include -#endif - namespace rajaperf { namespace basic @@ -49,9 +44,9 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu std::for_each( std::execution::par_unseq, begin, end, [=](Index_type i) { -#if 0 //__cpp_lib_atomic_ref - auto px = std::atomic_ref(&x[i]); - auto py = std::atomic_ref(&y[i]); +#if __cpp_lib_atomic_ref + auto px = std::atomic_ref(x[i]); + auto py = std::atomic_ref(y[i]); py += a * px; #elif defined(_OPENMP) #pragma omp atomic @@ -62,7 +57,8 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu #elif defined(NVCXX_GPU_ENABLED) atomicaddd(&y[i],a * x[i]); #else -#error No atomic +#warning No atomic + y[i] += a * x[i]; #endif }); @@ -75,9 +71,23 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu case Lambda_StdPar : { auto daxpy_atomic_lam = [=](Index_type i) { - #pragma omp atomic - y[i] += a * x[i] ; - }; +#if __cpp_lib_atomic_ref + auto px = std::atomic_ref(x[i]); + auto py = std::atomic_ref(y[i]); + py += a * px; +#elif defined(_OPENMP) + #pragma omp atomic + y[i] += a * x[i]; +#elif defined(_OPENACC) + #pragma acc atomic + y[i] += a * x[i]; +#elif defined(NVCXX_GPU_ENABLED) + atomicaddd(&y[i],a * x[i]); +#else +#warning No atomic + y[i] += a * x[i]; +#endif + }; startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { From 145696220e493c0ea3598b5b9e797003021bdebc Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 12 Oct 2022 09:39:11 +0300 Subject: [PATCH 117/174] move openacc.h header to stdpar common header --- src/common/StdParUtils.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/common/StdParUtils.hpp b/src/common/StdParUtils.hpp index 7f207e011..48636d9ec 100644 --- a/src/common/StdParUtils.hpp +++ b/src/common/StdParUtils.hpp @@ -33,6 +33,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define NVCXX_GPU_ENABLED #endif +#if defined(NVCXX_GPU_ENABLED) +// this is required to get NVC++ to compile CUDA atomics in StdPar +#include +#endif + // This implementation was authored by David Olsen #include From 00bd052e6ec922ce85fe09fdc0ab9234731bb1f1 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 12 Oct 2022 09:39:37 +0300 Subject: [PATCH 118/174] use for_each everywhere - correct with intel and nvhpc it seems --- src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp | 66 +++++++++++++++------- 1 file changed, 45 insertions(+), 21 deletions(-) diff --git a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp index 8f44e2812..471abf542 100644 --- a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp @@ -27,15 +27,6 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) POLYBENCH_FDTD_2D_DATA_SETUP; - counting_iterator beginX(0); - counting_iterator endX(nx); - counting_iterator beginY(0); - counting_iterator endY(ny); - counting_iterator begin1X(1); - counting_iterator end1X(nx); - counting_iterator beginXm1(0); - counting_iterator endXm1(nx-1); - switch ( vid ) { case Base_StdPar : { @@ -45,34 +36,63 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) for (t = 0; t < tsteps; ++t) { + counting_iterator beginY(0); + counting_iterator endY(ny); + std::for_each( std::execution::par_unseq, beginY, endY, [=](Index_type j) { + //std::cerr << j << "B1\n"; POLYBENCH_FDTD_2D_BODY1; }); + + counting_iterator begin1X(1); + counting_iterator end1X(nx); + std::for_each( std::execution::par_unseq, begin1X, end1X, [=](Index_type i) { - for (Index_type j = 0; j < ny; j++) { - //std::cerr << "B2: " << i << "," << j << "\n"; + //for (Index_type j = 0; j < ny; j++) { + std::for_each( std::execution::unseq, + beginY, endY, + [=](Index_type j) { + //std::cerr << i << "," << j << "B2\n"; POLYBENCH_FDTD_2D_BODY2; - } + }); }); + + counting_iterator beginX(0); + counting_iterator endX(nx); + counting_iterator begin1Y(1); + counting_iterator end1Y(ny); + std::for_each( std::execution::par_unseq, beginX, endX, [=](Index_type i) { - for (Index_type j = 1; j < ny; j++) { - //std::cerr << "B3: " << i << "," << j << "\n"; + //for (Index_type j = 1; j < ny; j++) { + std::for_each( std::execution::unseq, + begin1Y, end1Y, + [=](Index_type j) { + //std::cerr << i << "," << j << "B3\n"; POLYBENCH_FDTD_2D_BODY3; - } + }); }); + + counting_iterator beginXm1(0); + counting_iterator endXm1(nx-1); + counting_iterator beginYm1(0); + counting_iterator endYm1(ny-1); + std::for_each( std::execution::par_unseq, beginXm1, endXm1, [=](Index_type i) { - for (Index_type j = 0; j < ny - 1; j++) { - //std::cerr << "B4: " << i << "," << j << "\n"; + //for (Index_type j = 0; j < ny - 1; j++) { + std::for_each( std::execution::unseq, + beginYm1, endYm1, + [=](Index_type j) { + //std::cerr << i << "," << j << "B4\n"; POLYBENCH_FDTD_2D_BODY4; - } + }); }); } // tstep loop @@ -108,10 +128,14 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) for (t = 0; t < tsteps; ++t) { + counting_iterator beginY(0); + counting_iterator endY(ny); + //for (Index_type j = 0; j < ny; j++) { std::for_each( std::execution::par_unseq, beginY, endY, [=](Index_type j) { + //std::cerr << j << "L1\n"; poly_fdtd2d_base_lam1(j); }); @@ -125,7 +149,7 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) [=](Index_type ij) { const auto i = 1 + ij / ny; const auto j = ij % ny; - //std::cerr << "L2: " << i << "," << j << "\n"; + //std::cerr << i << "," << j << "L2\n"; poly_fdtd2d_base_lam2(i, j); }); @@ -139,7 +163,7 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) [=](Index_type ij) { const auto i = ij / (ny-1); const auto j = 1 + ij % (ny-1); - //std::cerr << "L3: " << i << "," << j << "\n"; + //std::cerr << i << "," << j << "L3\n"; poly_fdtd2d_base_lam3(i, j); }); @@ -153,7 +177,7 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) [=](Index_type ij) { const auto i = ij / (ny-1); const auto j = ij % (ny-1); - //std::cerr << "L4: " << i << "," << j << "\n"; + //std::cerr << i << "," << j << "L4\n"; poly_fdtd2d_base_lam4(i, j); }); From cfa64d9ccc6b3025c8e9d4ec1e2279ae971b09dc Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 12 Oct 2022 10:08:01 +0300 Subject: [PATCH 119/174] debug FDTD_2D with GCC --- src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp | 92 +++++++--------------- 1 file changed, 28 insertions(+), 64 deletions(-) diff --git a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp index 471abf542..ef6f055bc 100644 --- a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp @@ -27,6 +27,18 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) POLYBENCH_FDTD_2D_DATA_SETUP; + counting_iterator beginY(0); + counting_iterator endY(ny); + + counting_iterator begin2(0); + counting_iterator end2((nx-1)*ny); + + counting_iterator begin3(0); + counting_iterator end3(nx*(ny-1)); + + counting_iterator begin4(0); + counting_iterator end4((nx-1)*(ny-1)); + switch ( vid ) { case Base_StdPar : { @@ -36,63 +48,34 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) for (t = 0; t < tsteps; ++t) { - counting_iterator beginY(0); - counting_iterator endY(ny); - std::for_each( std::execution::par_unseq, beginY, endY, [=](Index_type j) { - //std::cerr << j << "B1\n"; POLYBENCH_FDTD_2D_BODY1; }); - counting_iterator begin1X(1); - counting_iterator end1X(nx); - std::for_each( std::execution::par_unseq, - begin1X, end1X, - [=](Index_type i) { - //for (Index_type j = 0; j < ny; j++) { - std::for_each( std::execution::unseq, - beginY, endY, - [=](Index_type j) { - //std::cerr << i << "," << j << "B2\n"; + begin2, end2, + [=](Index_type ij) { + const auto i = 1 + ij / ny; + const auto j = ij % ny; POLYBENCH_FDTD_2D_BODY2; - }); }); - counting_iterator beginX(0); - counting_iterator endX(nx); - counting_iterator begin1Y(1); - counting_iterator end1Y(ny); - std::for_each( std::execution::par_unseq, - beginX, endX, - [=](Index_type i) { - //for (Index_type j = 1; j < ny; j++) { - std::for_each( std::execution::unseq, - begin1Y, end1Y, - [=](Index_type j) { - //std::cerr << i << "," << j << "B3\n"; + begin3, end3, + [=](Index_type ij) { + const auto i = ij / (ny-1); + const auto j = 1 + ij % (ny-1); POLYBENCH_FDTD_2D_BODY3; - }); }); - counting_iterator beginXm1(0); - counting_iterator endXm1(nx-1); - counting_iterator beginYm1(0); - counting_iterator endYm1(ny-1); - std::for_each( std::execution::par_unseq, - beginXm1, endXm1, - [=](Index_type i) { - //for (Index_type j = 0; j < ny - 1; j++) { - std::for_each( std::execution::unseq, - beginYm1, endYm1, - [=](Index_type j) { - //std::cerr << i << "," << j << "B4\n"; + begin4, end4, + [=](Index_type ij) { + const auto i = ij / (ny-1); + const auto j = ij % (ny-1); POLYBENCH_FDTD_2D_BODY4; - }); }); } // tstep loop @@ -110,7 +93,8 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) // scalar variable 't' used in it is updated for each // t-loop iteration. // - auto poly_fdtd2d_base_lam1 = [=](Index_type j) { + // THIS ONE, AND ONLY THIS ONE, NEEDS TO BE [&] TO BE CORRECT, AT LEAST WITH GCC + auto poly_fdtd2d_base_lam1 = [&](Index_type j) { POLYBENCH_FDTD_2D_BODY1; }; auto poly_fdtd2d_base_lam2 = [=](Index_type i, Index_type j) { @@ -128,56 +112,36 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) for (t = 0; t < tsteps; ++t) { - counting_iterator beginY(0); - counting_iterator endY(ny); - - //for (Index_type j = 0; j < ny; j++) { std::for_each( std::execution::par_unseq, beginY, endY, [=](Index_type j) { - //std::cerr << j << "L1\n"; poly_fdtd2d_base_lam1(j); }); - counting_iterator begin2(0); - counting_iterator end2((nx-1)*ny); - - //for (Index_type i = 1; i < nx; i++) { - // for (Index_type j = 0; j < ny; j++) { std::for_each( std::execution::par_unseq, begin2, end2, [=](Index_type ij) { const auto i = 1 + ij / ny; const auto j = ij % ny; - //std::cerr << i << "," << j << "L2\n"; poly_fdtd2d_base_lam2(i, j); }); - counting_iterator begin3(0); - counting_iterator end3(nx*(ny-1)); - - //for (Index_type i = 0; i < nx; i++) { - // for (Index_type j = 1; j < ny; j++) { std::for_each( std::execution::par_unseq, begin3, end3, [=](Index_type ij) { const auto i = ij / (ny-1); const auto j = 1 + ij % (ny-1); - //std::cerr << i << "," << j << "L3\n"; poly_fdtd2d_base_lam3(i, j); }); - counting_iterator begin4(0); - counting_iterator end4((nx-1)*(ny-1)); + counting_iterator begin4(0); + counting_iterator end4((nx-1)*(ny-1)); - //for (Index_type i = 0; i < nx - 1; i++) { - // for (Index_type j = 0; j < ny - 1; j++) { std::for_each( std::execution::par_unseq, begin4, end4, [=](Index_type ij) { const auto i = ij / (ny-1); const auto j = ij % (ny-1); - //std::cerr << i << "," << j << "L4\n"; poly_fdtd2d_base_lam4(i, j); }); From 2aeb29be1b2053b13cb3bbe9d0de076825ca8712 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 12 Oct 2022 10:15:00 +0300 Subject: [PATCH 120/174] debug the GCC problem more --- src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp index ef6f055bc..41e316603 100644 --- a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp @@ -93,8 +93,9 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) // scalar variable 't' used in it is updated for each // t-loop iteration. // - // THIS ONE, AND ONLY THIS ONE, NEEDS TO BE [&] TO BE CORRECT, AT LEAST WITH GCC - auto poly_fdtd2d_base_lam1 = [&](Index_type j) { + // capturing t by reference is required for GCC 11 to generate correct results + auto poly_fdtd2d_base_lam1 = [=,&t](Index_type j) { + //ey[j + 0*ny] = fict[t]; POLYBENCH_FDTD_2D_BODY1; }; auto poly_fdtd2d_base_lam2 = [=](Index_type i, Index_type j) { From eef71d51aca0c3a18a6afd1ae204da3c15139c8b Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 12 Oct 2022 10:35:17 +0300 Subject: [PATCH 121/174] add a note about a bad idea --- src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp index 41e316603..aadba53f4 100644 --- a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp @@ -54,6 +54,11 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) POLYBENCH_FDTD_2D_BODY1; }); + // Note to future developers: + // Do not try to be smart and use more C++ than necessary. + // auto [i,j] = std::div(ij,ny); i++; + // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This is noticeably slower than below. + std::for_each( std::execution::par_unseq, begin2, end2, [=](Index_type ij) { From 788e2b4c5547486d3691c2e76db4b0b418db6911 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 12 Oct 2022 10:35:34 +0300 Subject: [PATCH 122/174] ugh atomics are such a pain right now --- src/basic/DAXPY_ATOMIC-StdPar.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/basic/DAXPY_ATOMIC-StdPar.cpp b/src/basic/DAXPY_ATOMIC-StdPar.cpp index 4e4e0f75b..da1e21c36 100644 --- a/src/basic/DAXPY_ATOMIC-StdPar.cpp +++ b/src/basic/DAXPY_ATOMIC-StdPar.cpp @@ -44,18 +44,19 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu std::for_each( std::execution::par_unseq, begin, end, [=](Index_type i) { -#if __cpp_lib_atomic_ref - auto px = std::atomic_ref(x[i]); - auto py = std::atomic_ref(y[i]); - py += a * px; -#elif defined(_OPENMP) +#if defined(_OPENMP) #pragma omp atomic y[i] += a * x[i]; #elif defined(_OPENACC) #pragma acc atomic y[i] += a * x[i]; #elif defined(NVCXX_GPU_ENABLED) + //atomicAdd(&y[i],a * x[i]); atomicaddd(&y[i],a * x[i]); +#elif __cpp_lib_atomic_ref + auto px = std::atomic_ref(x[i]); + auto py = std::atomic_ref(y[i]); + py += a * px; #else #warning No atomic y[i] += a * x[i]; From 1f10a865397293d0353f93fba18565ba7e1834cf Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 12 Oct 2022 10:56:18 +0300 Subject: [PATCH 123/174] change atomics again --- src/basic/DAXPY_ATOMIC-StdPar.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/basic/DAXPY_ATOMIC-StdPar.cpp b/src/basic/DAXPY_ATOMIC-StdPar.cpp index da1e21c36..ed632ae33 100644 --- a/src/basic/DAXPY_ATOMIC-StdPar.cpp +++ b/src/basic/DAXPY_ATOMIC-StdPar.cpp @@ -44,15 +44,15 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu std::for_each( std::execution::par_unseq, begin, end, [=](Index_type i) { -#if defined(_OPENMP) +#if defined(NVCXX_GPU_ENABLED) + //atomicAdd(&y[i],a * x[i]); + atomicaddd(&y[i],a * x[i]); +#elif defined(_OPENMP) #pragma omp atomic y[i] += a * x[i]; #elif defined(_OPENACC) #pragma acc atomic y[i] += a * x[i]; -#elif defined(NVCXX_GPU_ENABLED) - //atomicAdd(&y[i],a * x[i]); - atomicaddd(&y[i],a * x[i]); #elif __cpp_lib_atomic_ref auto px = std::atomic_ref(x[i]); auto py = std::atomic_ref(y[i]); @@ -72,18 +72,19 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu case Lambda_StdPar : { auto daxpy_atomic_lam = [=](Index_type i) { -#if __cpp_lib_atomic_ref - auto px = std::atomic_ref(x[i]); - auto py = std::atomic_ref(y[i]); - py += a * px; +#if defined(NVCXX_GPU_ENABLED) + //atomicAdd(&y[i],a * x[i]); + atomicaddd(&y[i],a * x[i]); #elif defined(_OPENMP) #pragma omp atomic y[i] += a * x[i]; #elif defined(_OPENACC) #pragma acc atomic y[i] += a * x[i]; -#elif defined(NVCXX_GPU_ENABLED) - atomicaddd(&y[i],a * x[i]); +#elif __cpp_lib_atomic_ref + auto px = std::atomic_ref(x[i]); + auto py = std::atomic_ref(y[i]); + py += a * px; #else #warning No atomic y[i] += a * x[i]; From 3c936373314a2f264c1de374a53ae6f84bd088d7 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 12 Oct 2022 10:56:39 +0300 Subject: [PATCH 124/174] solve lambda capture t issue a diff way --- src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp index aadba53f4..07a62aefe 100644 --- a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp @@ -99,7 +99,9 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) // t-loop iteration. // // capturing t by reference is required for GCC 11 to generate correct results - auto poly_fdtd2d_base_lam1 = [=,&t](Index_type j) { + //auto poly_fdtd2d_base_lam1 = [=,&t](Index_type j) { + // but that breaks NVHPC GPU, so we instead make it an explicit parameter + auto poly_fdtd2d_base_lam1 = [=](Index_type j, Index_type t) { //ey[j + 0*ny] = fict[t]; POLYBENCH_FDTD_2D_BODY1; }; @@ -121,7 +123,7 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) std::for_each( std::execution::par_unseq, beginY, endY, [=](Index_type j) { - poly_fdtd2d_base_lam1(j); + poly_fdtd2d_base_lam1(j,t); }); std::for_each( std::execution::par_unseq, From f64306dfd43598cd453b397a1f90aa818aefabd2 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 12 Oct 2022 15:10:47 +0300 Subject: [PATCH 125/174] switch to for_each_n in algorithm --- src/algorithm/MEMCPY-StdPar.cpp | 18 ++++++------------ src/algorithm/MEMSET-StdPar.cpp | 18 ++++++------------ src/algorithm/SORTPAIRS-StdPar.cpp | 21 +++++++++------------ 3 files changed, 21 insertions(+), 36 deletions(-) diff --git a/src/algorithm/MEMCPY-StdPar.cpp b/src/algorithm/MEMCPY-StdPar.cpp index 1d7d74709..4ee637961 100644 --- a/src/algorithm/MEMCPY-StdPar.cpp +++ b/src/algorithm/MEMCPY-StdPar.cpp @@ -66,15 +66,12 @@ void MEMCPY::runStdParVariantDefault(VariantID vid) case Base_StdPar : { - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin,end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { MEMCPY_BODY; }); @@ -90,15 +87,12 @@ void MEMCPY::runStdParVariantDefault(VariantID vid) MEMCPY_BODY; }; - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin,end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { memcpy_lambda(i); }); diff --git a/src/algorithm/MEMSET-StdPar.cpp b/src/algorithm/MEMSET-StdPar.cpp index 835b27b74..3b4c4edab 100644 --- a/src/algorithm/MEMSET-StdPar.cpp +++ b/src/algorithm/MEMSET-StdPar.cpp @@ -66,15 +66,12 @@ void MEMSET::runStdParVariantDefault(VariantID vid) case Base_StdPar : { - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin,end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { MEMSET_BODY; }); @@ -90,15 +87,12 @@ void MEMSET::runStdParVariantDefault(VariantID vid) MEMSET_BODY; }; - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin,end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { memset_lambda(i); }); diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp index 00892e15f..9cfbf74a8 100644 --- a/src/algorithm/SORTPAIRS-StdPar.cpp +++ b/src/algorithm/SORTPAIRS-StdPar.cpp @@ -33,9 +33,6 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - SORTPAIRS_DATA_SETUP; switch ( vid ) { @@ -52,18 +49,18 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) #if 0 vector_of_pairs.reserve(iend-ibegin); - std::for_each( //std::execution::par, // parallelism leads to incorrectness - begin,end, - [=,&vector_of_pairs](Index_type iemp) noexcept { + std::for_each_n( //std::execution::par, // parallelism leads to incorrectness + counting_iterator(ibegin), iend-ibegin, + [=,&vector_of_pairs](Index_type iemp) noexcept { vector_of_pairs.emplace_back(x[iend*irep + iemp], i[iend*irep + iemp]); }); #else vector_of_pairs.resize(iend-ibegin); auto p = vector_of_pairs.data(); - std::for_each( std::execution::par_unseq, - begin,end, - [=](Index_type iemp) noexcept { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type iemp) noexcept { p[iemp] = std::make_pair(x[iend*irep + iemp], i[iend*irep + iemp]); }); #endif @@ -74,9 +71,9 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) return lhs.first < rhs.first; }); - std::for_each( std::execution::par_unseq, - begin,end, - [=](Index_type iemp) noexcept { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type iemp) noexcept { //const pair_type &pair = vector_of_pairs[iemp - ibegin]; const pair_type &pair = p[iemp - ibegin]; x[iend*irep + iemp] = pair.first; From 50a661d997fa4b0f65f3e2fdd43c7bd2df38270e Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 12 Oct 2022 15:14:59 +0300 Subject: [PATCH 126/174] switch to for_each_n in stream --- src/stream/ADD-StdPar.cpp | 15 ++++++--------- src/stream/MUL-StdPar.cpp | 15 ++++++--------- src/stream/TRIAD-StdPar.cpp | 16 +++++++--------- 3 files changed, 19 insertions(+), 27 deletions(-) diff --git a/src/stream/ADD-StdPar.cpp b/src/stream/ADD-StdPar.cpp index 0a38d1619..d3ab6de8d 100644 --- a/src/stream/ADD-StdPar.cpp +++ b/src/stream/ADD-StdPar.cpp @@ -28,9 +28,6 @@ void ADD::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - ADD_DATA_SETUP; auto add_lam = [=](Index_type i) { @@ -44,9 +41,9 @@ void ADD::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin,end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { ADD_BODY; }); @@ -61,9 +58,9 @@ void ADD::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin,end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { add_lam(i); }); } diff --git a/src/stream/MUL-StdPar.cpp b/src/stream/MUL-StdPar.cpp index f8c919e8d..399c62dd7 100644 --- a/src/stream/MUL-StdPar.cpp +++ b/src/stream/MUL-StdPar.cpp @@ -28,9 +28,6 @@ void MUL::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - MUL_DATA_SETUP; auto mul_lam = [=](Index_type i) { @@ -44,9 +41,9 @@ void MUL::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin,end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { MUL_BODY; }); @@ -61,9 +58,9 @@ void MUL::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin,end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { mul_lam(i); }); } diff --git a/src/stream/TRIAD-StdPar.cpp b/src/stream/TRIAD-StdPar.cpp index de8118d0e..cb0ab58e5 100644 --- a/src/stream/TRIAD-StdPar.cpp +++ b/src/stream/TRIAD-StdPar.cpp @@ -23,13 +23,11 @@ namespace stream void TRIAD::runStdParVariant(VariantID vid, size_t tune_idx) { #if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - TRIAD_DATA_SETUP; auto triad_lam = [=](Index_type i) { @@ -43,9 +41,9 @@ void TRIAD::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin,end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { TRIAD_BODY; }); @@ -60,9 +58,9 @@ void TRIAD::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin,end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { triad_lam(i); }); } From 3c2368b8eb597ecf852b98d8d16df330cce8c20b Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 12 Oct 2022 15:54:19 +0300 Subject: [PATCH 127/174] switch to for_each_n in basic --- src/basic/DAXPY-StdPar.cpp | 15 ++++----- src/basic/DAXPY_ATOMIC-StdPar.cpp | 16 ++++------ src/basic/IF_QUAD-StdPar.cpp | 16 ++++------ src/basic/INDEXLIST_3LOOP-StdPar.cpp | 28 ++++++++--------- src/basic/INIT3-StdPar.cpp | 15 ++++----- src/basic/INIT_VIEW1D-StdPar.cpp | 16 ++++------ src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp | 16 ++++------ src/basic/MULADDSUB-StdPar.cpp | 23 ++++++-------- src/basic/NESTED_INIT-StdPar.cpp | 42 ++++++++++--------------- src/basic/PI_ATOMIC-StdPar.cpp | 11 +++---- 10 files changed, 80 insertions(+), 118 deletions(-) diff --git a/src/basic/DAXPY-StdPar.cpp b/src/basic/DAXPY-StdPar.cpp index 66d09cd9e..5f255b6e5 100644 --- a/src/basic/DAXPY-StdPar.cpp +++ b/src/basic/DAXPY-StdPar.cpp @@ -28,9 +28,6 @@ void DAXPY::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - DAXPY_DATA_SETUP; switch ( vid ) { @@ -40,9 +37,9 @@ void DAXPY::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { DAXPY_BODY; }); @@ -61,9 +58,9 @@ void DAXPY::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { daxpy_lam(i); }); } diff --git a/src/basic/DAXPY_ATOMIC-StdPar.cpp b/src/basic/DAXPY_ATOMIC-StdPar.cpp index ed632ae33..c952ec895 100644 --- a/src/basic/DAXPY_ATOMIC-StdPar.cpp +++ b/src/basic/DAXPY_ATOMIC-StdPar.cpp @@ -29,9 +29,6 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - DAXPY_ATOMIC_DATA_SETUP; switch ( vid ) { @@ -41,9 +38,9 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { #if defined(NVCXX_GPU_ENABLED) //atomicAdd(&y[i],a * x[i]); atomicaddd(&y[i],a * x[i]); @@ -94,12 +91,11 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { daxpy_atomic_lam(i); }); - } stopTimer(); diff --git a/src/basic/IF_QUAD-StdPar.cpp b/src/basic/IF_QUAD-StdPar.cpp index dac40ccab..9cb80cd47 100644 --- a/src/basic/IF_QUAD-StdPar.cpp +++ b/src/basic/IF_QUAD-StdPar.cpp @@ -28,9 +28,6 @@ void IF_QUAD::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - IF_QUAD_DATA_SETUP; auto ifquad_lam = [=](Index_type i) { @@ -44,9 +41,9 @@ void IF_QUAD::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { IF_QUAD_BODY; }); @@ -61,12 +58,11 @@ void IF_QUAD::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { ifquad_lam(i); }); - } stopTimer(); diff --git a/src/basic/INDEXLIST_3LOOP-StdPar.cpp b/src/basic/INDEXLIST_3LOOP-StdPar.cpp index 475623ef0..68da23c56 100644 --- a/src/basic/INDEXLIST_3LOOP-StdPar.cpp +++ b/src/basic/INDEXLIST_3LOOP-StdPar.cpp @@ -30,13 +30,11 @@ namespace basic void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { #if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - INDEXLIST_3LOOP_DATA_SETUP; switch ( vid ) { @@ -48,9 +46,9 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { counts[i] = (INDEXLIST_3LOOP_CONDITIONAL) ? 1 : 0; }); @@ -71,9 +69,9 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG counts+ibegin, 0); #endif - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { INDEXLIST_3LOOP_MAKE_LIST; }); @@ -102,9 +100,9 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { indexlist_conditional_lam(i); }); @@ -125,9 +123,9 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG counts+ibegin, 0); #endif - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { indexlist_make_list_lam(i); }); diff --git a/src/basic/INIT3-StdPar.cpp b/src/basic/INIT3-StdPar.cpp index a01964a85..9db2bfa9c 100644 --- a/src/basic/INIT3-StdPar.cpp +++ b/src/basic/INIT3-StdPar.cpp @@ -27,9 +27,6 @@ void INIT3::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - INIT3_DATA_SETUP; switch ( vid ) { @@ -39,9 +36,9 @@ void INIT3::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { INIT3_BODY; }); @@ -60,9 +57,9 @@ void INIT3::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { init3_lam(i); }); diff --git a/src/basic/INIT_VIEW1D-StdPar.cpp b/src/basic/INIT_VIEW1D-StdPar.cpp index 13cc0fdf5..4f6655e50 100644 --- a/src/basic/INIT_VIEW1D-StdPar.cpp +++ b/src/basic/INIT_VIEW1D-StdPar.cpp @@ -28,9 +28,6 @@ void INIT_VIEW1D::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - INIT_VIEW1D_DATA_SETUP; switch ( vid ) { @@ -40,9 +37,9 @@ void INIT_VIEW1D::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { INIT_VIEW1D_BODY; }); @@ -61,12 +58,11 @@ void INIT_VIEW1D::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { initview1d_base_lam(i); }); - } stopTimer(); diff --git a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp index e60db90b2..d5f93350a 100644 --- a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp @@ -28,9 +28,6 @@ void INIT_VIEW1D_OFFSET::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 1; const Index_type iend = getActualProblemSize()+1; - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - INIT_VIEW1D_OFFSET_DATA_SETUP; switch ( vid ) { @@ -40,9 +37,9 @@ void INIT_VIEW1D_OFFSET::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { INIT_VIEW1D_OFFSET_BODY; }); @@ -61,12 +58,11 @@ void INIT_VIEW1D_OFFSET::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { initview1doffset_base_lam(i); }); - } stopTimer(); diff --git a/src/basic/MULADDSUB-StdPar.cpp b/src/basic/MULADDSUB-StdPar.cpp index 9f01a117e..42f558c1a 100644 --- a/src/basic/MULADDSUB-StdPar.cpp +++ b/src/basic/MULADDSUB-StdPar.cpp @@ -28,15 +28,8 @@ void MULADDSUB::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - MULADDSUB_DATA_SETUP; - auto mas_lam = [=](Index_type i) { - MULADDSUB_BODY; - }; - switch ( vid ) { case Base_StdPar : { @@ -44,9 +37,9 @@ void MULADDSUB::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { MULADDSUB_BODY; }); @@ -58,12 +51,16 @@ void MULADDSUB::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { + auto mas_lam = [=](Index_type i) { + MULADDSUB_BODY; + }; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { mas_lam(i); }); } diff --git a/src/basic/NESTED_INIT-StdPar.cpp b/src/basic/NESTED_INIT-StdPar.cpp index 705ed38bd..7ae128554 100644 --- a/src/basic/NESTED_INIT-StdPar.cpp +++ b/src/basic/NESTED_INIT-StdPar.cpp @@ -28,14 +28,6 @@ void NESTED_INIT::runStdParVariant(VariantID vid, size_t tune_idx) NESTED_INIT_DATA_SETUP; -#ifdef USE_STDPAR_COLLAPSE - auto begin = counting_iterator(0); - auto end = counting_iterator(ni*nj*nk); -#else - auto begin = counting_iterator(0); - auto end = counting_iterator(nk); -#endif - auto nestedinit_lam = [=](Index_type i, Index_type j, Index_type k) { NESTED_INIT_BODY; }; @@ -48,23 +40,23 @@ void NESTED_INIT::runStdParVariant(VariantID vid, size_t tune_idx) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type idx) { - const auto k = idx / (nj*ni); - const auto ij = idx % (nj*ni); + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nj*nk, + [=](Index_type ijk) { + const auto k = ijk / (nj*ni); + const auto ij = ijk % (nj*ni); const auto j = ij / ni; const auto i = ij % ni; #else - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type k) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nk, + [=](Index_type k) { for (Index_type j = 0; j < nj; ++j ) for (Index_type i = 0; i < ni; ++i ) #endif { NESTED_INIT_BODY; - //getCout() << i << "," << j << "," << k << ";" << idx << " PAR\n"; + //getCout() << i << "," << j << "," << k << ";" << ijk << " PAR\n"; } }); @@ -80,17 +72,17 @@ void NESTED_INIT::runStdParVariant(VariantID vid, size_t tune_idx) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type idx) { - const auto k = idx / (nj*ni); - const auto ij = idx % (nj*ni); + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nj*nk, + [=](Index_type ijk) { + const auto k = ijk / (nj*ni); + const auto ij = ijk % (nj*ni); const auto j = ij / ni; const auto i = ij % ni; #else - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type k) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nk, + [=](Index_type k) { for (Index_type j = 0; j < nj; ++j ) for (Index_type i = 0; i < ni; ++i ) #endif diff --git a/src/basic/PI_ATOMIC-StdPar.cpp b/src/basic/PI_ATOMIC-StdPar.cpp index ffb56f016..06eace9a2 100644 --- a/src/basic/PI_ATOMIC-StdPar.cpp +++ b/src/basic/PI_ATOMIC-StdPar.cpp @@ -36,9 +36,6 @@ void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - PI_ATOMIC_DATA_SETUP; switch ( vid ) { @@ -51,8 +48,8 @@ void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) //myAtomic a_pi{m_pi_init}; myAtomic * a_pi = new myAtomic; // i hate this *a_pi = m_pi_init; - std::for_each( std::execution::par, - begin, end, + std::for_each_n( std::execution::par, + counting_iterator(ibegin), iend-ibegin, [=](Index_type i) { double x = (double(i) + 0.5) * dx; *a_pi = *a_pi + dx / (1.0 + x * x); @@ -78,8 +75,8 @@ void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) //myAtomic a_pi{m_pi_init}; myAtomic * a_pi = new myAtomic; // i hate this *a_pi = m_pi_init; - std::for_each( std::execution::par, - begin, end, + std::for_each_n( std::execution::par, + counting_iterator(ibegin), iend-ibegin, [=](Index_type i) { piatomic_base_lam(i,a_pi); }); From 0b67f209ecc575648da1da6097648b0397dddd7d Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 12 Oct 2022 16:00:19 +0300 Subject: [PATCH 128/174] for_each_n --- src/polybench/POLYBENCH_MVT-StdPar.cpp | 51 ++++++++++++-------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/src/polybench/POLYBENCH_MVT-StdPar.cpp b/src/polybench/POLYBENCH_MVT-StdPar.cpp index 45da020a8..d94b105dd 100644 --- a/src/polybench/POLYBENCH_MVT-StdPar.cpp +++ b/src/polybench/POLYBENCH_MVT-StdPar.cpp @@ -25,9 +25,6 @@ void POLYBENCH_MVT::runStdParVariant(VariantID vid, size_t tune_idx) POLYBENCH_MVT_DATA_SETUP; - counting_iterator begin(0); - counting_iterator end(N); - switch ( vid ) { case Base_StdPar : { @@ -35,25 +32,25 @@ void POLYBENCH_MVT::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type i) { POLYBENCH_MVT_BODY1; - std::for_each( std::execution::unseq, - begin, end, - [=,&dot](Index_type j) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), N, + [=,&dot](Index_type j) { POLYBENCH_MVT_BODY2; }); POLYBENCH_MVT_BODY3; }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type i) { POLYBENCH_MVT_BODY4; - std::for_each( std::execution::unseq, - begin, end, - [=,&dot](Index_type j) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), N, + [=,&dot](Index_type j) { POLYBENCH_MVT_BODY5; }); POLYBENCH_MVT_BODY6; @@ -87,25 +84,25 @@ void POLYBENCH_MVT::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type i) { POLYBENCH_MVT_BODY1; - std::for_each( std::execution::unseq, - begin, end, - [=,&dot](Index_type j) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), N, + [=,&dot](Index_type j) { poly_mvt_base_lam2(i, j, dot); }); poly_mvt_base_lam3(i, dot); }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type i) { POLYBENCH_MVT_BODY4; - std::for_each( std::execution::unseq, - begin, end, - [=,&dot](Index_type j) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), N, + [=,&dot](Index_type j) { poly_mvt_base_lam5(i, j, dot); }); poly_mvt_base_lam6(i, dot); From c18465dd75e2c01d3d2864519c2648be09b3df52 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 12 Oct 2022 16:11:20 +0300 Subject: [PATCH 129/174] for_each_n --- src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp | 69 +++++++++----------- 1 file changed, 30 insertions(+), 39 deletions(-) diff --git a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp index 445024820..a71a5d406 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp @@ -27,15 +27,6 @@ void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) POLYBENCH_JACOBI_2D_DATA_SETUP; -#ifdef USE_STDPAR_COLLAPSE - const auto nn = N-2; - counting_iterator begin(0); - counting_iterator end(nn*nn); -#else - counting_iterator begin(1); - counting_iterator end(N-1); -#endif - switch ( vid ) { case Base_StdPar : { @@ -46,18 +37,18 @@ void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type t = 0; t < tsteps; ++t) { #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type ij) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), (N-2)*(N-2), + [=](Index_type ij) { const auto i = 1 + ij / nn; const auto j = 1 + ij % nn; #else - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { - std::for_each( std::execution::unseq, - begin, end, - [=](Index_type j) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N-2, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), N-2, + [=](Index_type j) { #endif POLYBENCH_JACOBI_2D_BODY1; #ifndef USE_STDPAR_COLLAPSE @@ -71,12 +62,12 @@ void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) const auto i = 1 + ij / nn; const auto j = 1 + ij % nn; #else - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { - std::for_each( std::execution::unseq, - begin, end, - [=](Index_type j) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N-2, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), N-2, + [=](Index_type j) { #endif POLYBENCH_JACOBI_2D_BODY2; #ifndef USE_STDPAR_COLLAPSE @@ -109,18 +100,18 @@ void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type t = 0; t < tsteps; ++t) { #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type ij) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), (N-2)*(N-2), + [=](Index_type ij) { const auto i = 1 + ij / nn; const auto j = 1 + ij % nn; #else - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { - std::for_each( std::execution::unseq, - begin, end, - [=](Index_type j) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N-2, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), N-2, + [=](Index_type j) { #endif poly_jacobi2d_base_lam1(i, j); #ifndef USE_STDPAR_COLLAPSE @@ -135,12 +126,12 @@ void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) const auto i = 1 + ij / nn; const auto j = 1 + ij % nn; #else - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { - std::for_each( std::execution::unseq, - begin, end, - [=](Index_type j) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N-2, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), N-2, + [=](Index_type j) { #endif poly_jacobi2d_base_lam2(i, j); #ifndef USE_STDPAR_COLLAPSE From 1aa331fbf6fe5f4cea02bb3b3a0d59f1f22c3648 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 12 Oct 2022 18:31:07 +0300 Subject: [PATCH 130/174] fix collapsed case --- src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp | 50 ++++++++++---------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp index a71a5d406..36ccba5af 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp @@ -27,6 +27,8 @@ void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) POLYBENCH_JACOBI_2D_DATA_SETUP; + const auto n2 = (N-2); + switch ( vid ) { case Base_StdPar : { @@ -38,16 +40,16 @@ void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) #ifdef USE_STDPAR_COLLAPSE std::for_each_n( std::execution::par_unseq, - counting_iterator(0), (N-2)*(N-2), + counting_iterator(0), n2*n2, [=](Index_type ij) { - const auto i = 1 + ij / nn; - const auto j = 1 + ij % nn; + const auto i = 1 + ij / n2; + const auto j = 1 + ij % n2; #else std::for_each_n( std::execution::par_unseq, - counting_iterator(1), N-2, + counting_iterator(1), n2, [=](Index_type i) { std::for_each_n( std::execution::unseq, - counting_iterator(1), N-2, + counting_iterator(1), n2, [=](Index_type j) { #endif POLYBENCH_JACOBI_2D_BODY1; @@ -56,17 +58,17 @@ void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) #endif }); #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type ij) { - const auto i = 1 + ij / nn; - const auto j = 1 + ij % nn; + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n2*n2, + [=](Index_type ij) { + const auto i = 1 + ij / n2; + const auto j = 1 + ij % n2; #else std::for_each_n( std::execution::par_unseq, - counting_iterator(1), N-2, + counting_iterator(1), n2, [=](Index_type i) { std::for_each_n( std::execution::unseq, - counting_iterator(1), N-2, + counting_iterator(1), n2, [=](Index_type j) { #endif POLYBENCH_JACOBI_2D_BODY2; @@ -101,16 +103,16 @@ void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) #ifdef USE_STDPAR_COLLAPSE std::for_each_n( std::execution::par_unseq, - counting_iterator(0), (N-2)*(N-2), + counting_iterator(0), n2*n2, [=](Index_type ij) { - const auto i = 1 + ij / nn; - const auto j = 1 + ij % nn; + const auto i = 1 + ij / n2; + const auto j = 1 + ij % n2; #else std::for_each_n( std::execution::par_unseq, - counting_iterator(1), N-2, + counting_iterator(1), n2, [=](Index_type i) { std::for_each_n( std::execution::unseq, - counting_iterator(1), N-2, + counting_iterator(1), n2, [=](Index_type j) { #endif poly_jacobi2d_base_lam1(i, j); @@ -120,17 +122,17 @@ void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) }); #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type ij) { - const auto i = 1 + ij / nn; - const auto j = 1 + ij % nn; + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n2*n2, + [=](Index_type ij) { + const auto i = 1 + ij / n2; + const auto j = 1 + ij % n2; #else std::for_each_n( std::execution::par_unseq, - counting_iterator(1), N-2, + counting_iterator(1), n2, [=](Index_type i) { std::for_each_n( std::execution::unseq, - counting_iterator(1), N-2, + counting_iterator(1), n2, [=](Index_type j) { #endif poly_jacobi2d_base_lam2(i, j); From 27e59b319dbffbd23f2261bf1dca8687ff3e59fd Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 12 Oct 2022 18:34:09 +0300 Subject: [PATCH 131/174] for_each_n --- src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp | 27 +++++++++----------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp index 3b95527e9..fa63d259f 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp @@ -27,9 +27,6 @@ void POLYBENCH_JACOBI_1D::runStdParVariant(VariantID vid, size_t tune_idx) POLYBENCH_JACOBI_1D_DATA_SETUP; - counting_iterator begin(1); - counting_iterator end(N-1); - switch ( vid ) { case Base_StdPar : { @@ -39,14 +36,14 @@ void POLYBENCH_JACOBI_1D::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type t = 0; t < tsteps; ++t) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N-2, + [=](Index_type i) { POLYBENCH_JACOBI_1D_BODY1; }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N-2, + [=](Index_type i) { POLYBENCH_JACOBI_1D_BODY2; }); @@ -74,14 +71,14 @@ void POLYBENCH_JACOBI_1D::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type t = 0; t < tsteps; ++t) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N-2, + [=](Index_type i) { poly_jacobi1d_lam1(i); }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N-2, + [=](Index_type i) { poly_jacobi1d_lam2(i); }); From 456e72285261096db7ff243a4441c31011aae393 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 13 Oct 2022 09:35:06 +0300 Subject: [PATCH 132/174] for_each_n --- src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp | 101 ++++++++++----------- 1 file changed, 48 insertions(+), 53 deletions(-) diff --git a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp index f33db4493..aae0c085b 100644 --- a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp @@ -29,11 +29,6 @@ void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) #ifdef USE_STDPAR_COLLAPSE const auto nn = N-2; - counting_iterator begin(0); - counting_iterator end(nn*nn*nn); -#else - counting_iterator begin(1); - counting_iterator end(N-1); #endif switch ( vid ) { @@ -46,23 +41,23 @@ void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type t = 0; t < tsteps; ++t) { #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type ijk) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nn*nn*nn, + [=](Index_type ijk) { const auto i = 1 + ijk / (nn*nn); const auto jk = ijk % (nn*nn); const auto j = 1 + jk / nn; const auto k = 1 + jk % nn; #else - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { - std::for_each( std::execution::unseq, - begin, end, - [=](Index_type j) { - std::for_each( std::execution::unseq, - begin, end, - [=](Index_type k) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N-2, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), N-2, + [=](Index_type j) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), N-2, + [=](Index_type k) { #endif POLYBENCH_HEAT_3D_BODY1; #ifndef USE_STDPAR_COLLAPSE @@ -72,23 +67,23 @@ void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) }); #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type ijk) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nn*nn*nn, + [=](Index_type ijk) { const auto i = 1 + ijk / (nn*nn); const auto jk = ijk % (nn*nn); const auto j = 1 + jk / nn; const auto k = 1 + jk % nn; #else - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { - std::for_each( std::execution::unseq, - begin, end, - [=](Index_type j) { - std::for_each( std::execution::unseq, - begin, end, - [=](Index_type k) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N-2, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), N-2, + [=](Index_type j) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), N-2, + [=](Index_type k) { #endif POLYBENCH_HEAT_3D_BODY2; #ifndef USE_STDPAR_COLLAPSE @@ -124,23 +119,23 @@ void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type t = 0; t < tsteps; ++t) { #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type ijk) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nn*nn*nn, + [=](Index_type ijk) { const auto i = 1 + ijk / (nn*nn); const auto jk = ijk % (nn*nn); const auto j = 1 + jk / nn; const auto k = 1 + jk % nn; #else - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { - std::for_each( std::execution::unseq, - begin, end, - [=](Index_type j) { - std::for_each( std::execution::unseq, - begin, end, - [=](Index_type k) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N-2, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), N-2, + [=](Index_type j) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), N-2, + [=](Index_type k) { #endif poly_heat3d_base_lam1(i, j, k); #ifndef USE_STDPAR_COLLAPSE @@ -150,23 +145,23 @@ void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) }); #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type ijk) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nn*nn*nn, + [=](Index_type ijk) { const auto i = 1 + ijk / (nn*nn); const auto jk = ijk % (nn*nn); const auto j = 1 + jk / nn; const auto k = 1 + jk % nn; #else - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { - std::for_each( std::execution::unseq, - begin, end, - [=](Index_type j) { - std::for_each( std::execution::unseq, - begin, end, - [=](Index_type k) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N-2, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), N-2, + [=](Index_type j) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), N-2, + [=](Index_type k) { #endif poly_heat3d_base_lam2(i, j, k); #ifndef USE_STDPAR_COLLAPSE From a31da74544743f2f5580189d5bf391b712c40963 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 13 Oct 2022 09:48:33 +0300 Subject: [PATCH 133/174] for_each_n --- src/polybench/POLYBENCH_2MM-StdPar.cpp | 106 +++++++++++++------------ 1 file changed, 56 insertions(+), 50 deletions(-) diff --git a/src/polybench/POLYBENCH_2MM-StdPar.cpp b/src/polybench/POLYBENCH_2MM-StdPar.cpp index e067a9842..e45235349 100644 --- a/src/polybench/POLYBENCH_2MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_2MM-StdPar.cpp @@ -14,8 +14,6 @@ #include -//#define USE_STDPAR_COLLAPSE 1 - namespace rajaperf { namespace polybench @@ -29,22 +27,6 @@ void POLYBENCH_2MM::runStdParVariant(VariantID vid, size_t tune_idx) POLYBENCH_2MM_DATA_SETUP; -#ifdef USE_STDPAR_COLLAPSE - counting_iterator beginIJ(0); - counting_iterator endIJ(ni*nj); - counting_iterator beginIL(0); - counting_iterator endIL(ni*nl); -#else - counting_iterator beginI(0); - counting_iterator endI(ni); - counting_iterator beginL(0); - counting_iterator endL(nl); -#endif - counting_iterator beginJ(0); - counting_iterator endJ(nj); - counting_iterator beginK(0); - counting_iterator endK(nk); - switch ( vid ) { case Base_StdPar : { @@ -53,17 +35,23 @@ void POLYBENCH_2MM::runStdParVariant(VariantID vid, size_t tune_idx) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - beginIJ, endIJ, [=](Index_type ij) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nj, + [=](Index_type ij) { const auto i = ij / nj; const auto j = ij % nj; #else - std::for_each( std::execution::par_unseq, - beginI, endI, [=](Index_type i) { - std::for_each(beginJ, endJ, [=](Index_type j) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), nj, + [=](Index_type j) { #endif POLYBENCH_2MM_BODY1; - std::for_each(beginK, endK, [=,&dot](Index_type k) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), nk, + [=,&dot](Index_type k) { POLYBENCH_2MM_BODY2; }); POLYBENCH_2MM_BODY3; @@ -73,17 +61,23 @@ void POLYBENCH_2MM::runStdParVariant(VariantID vid, size_t tune_idx) }); #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - beginIL, endIL, [=](Index_type il) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nl, + [=](Index_type il) { const auto i = il / nl; const auto l = il % nl; #else - std::for_each( std::execution::par_unseq, - beginI, endI, [=](Index_type i) { - std::for_each(beginL, endL, [=](Index_type l) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), nl, + [=](Index_type l) { #endif POLYBENCH_2MM_BODY4; - std::for_each(beginJ, endJ, [=,&dot](Index_type j) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), nj, + [=,&dot](Index_type j) { POLYBENCH_2MM_BODY5; }); POLYBENCH_2MM_BODY6; @@ -100,20 +94,20 @@ void POLYBENCH_2MM::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { - auto poly_2mm_base_lam2 = [=](Index_type i, Index_type j, - Index_type k, Real_type &dot) { + auto poly_2mm_base_lam2 = + [=](Index_type i, Index_type j, Index_type k, Real_type &dot) { POLYBENCH_2MM_BODY2; }; - auto poly_2mm_base_lam3 = [=](Index_type i, Index_type j, - Real_type &dot) { + auto poly_2mm_base_lam3 = + [=](Index_type i, Index_type j, Real_type &dot) { POLYBENCH_2MM_BODY3; }; - auto poly_2mm_base_lam5 = [=](Index_type i, Index_type l, - Index_type j, Real_type &dot) { + auto poly_2mm_base_lam5 = + [=](Index_type i, Index_type l, Index_type j, Real_type &dot) { POLYBENCH_2MM_BODY5; }; - auto poly_2mm_base_lam6 = [=](Index_type i, Index_type l, - Real_type &dot) { + auto poly_2mm_base_lam6 = + [=](Index_type i, Index_type l, Real_type &dot) { POLYBENCH_2MM_BODY6; }; @@ -121,17 +115,23 @@ void POLYBENCH_2MM::runStdParVariant(VariantID vid, size_t tune_idx) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - beginIJ, endIJ, [=](Index_type ij) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nj, + [=](Index_type ij) { const auto i = ij / nj; const auto j = ij % nj; #else - std::for_each( std::execution::par_unseq, - beginI, endI, [=](Index_type i) { - std::for_each(beginJ, endJ, [=](Index_type j) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), nj, + [=](Index_type j) { #endif POLYBENCH_2MM_BODY1; - std::for_each(beginK, endK, [=,&dot](Index_type k) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), nk, + [=,&dot](Index_type k) { poly_2mm_base_lam2(i, j, k, dot); }); poly_2mm_base_lam3(i, j, dot); @@ -141,17 +141,23 @@ void POLYBENCH_2MM::runStdParVariant(VariantID vid, size_t tune_idx) }); #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - beginIL, endIL, [=](Index_type il) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nl, + [=](Index_type il) { const auto i = il / nl; const auto l = il % nl; #else - std::for_each( std::execution::par_unseq, - beginI, endI, [=](Index_type i) { - std::for_each(beginL, endL, [=](Index_type l) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), nl, + [=](Index_type l) { #endif POLYBENCH_2MM_BODY4; - std::for_each(beginJ, endJ, [=,&dot](Index_type j) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), nj, + [=,&dot](Index_type j) { poly_2mm_base_lam5(i, l, j, dot); }); poly_2mm_base_lam6(i, l, dot); From 7fbae76c86ff221822dd26965ca90ea76580e250 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 13 Oct 2022 10:45:31 +0300 Subject: [PATCH 134/174] add failed experiment w xform red --- src/polybench/POLYBENCH_2MM-StdPar.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/polybench/POLYBENCH_2MM-StdPar.cpp b/src/polybench/POLYBENCH_2MM-StdPar.cpp index e45235349..8c4a85741 100644 --- a/src/polybench/POLYBENCH_2MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_2MM-StdPar.cpp @@ -27,6 +27,11 @@ void POLYBENCH_2MM::runStdParVariant(VariantID vid, size_t tune_idx) POLYBENCH_2MM_DATA_SETUP; +#if 0 + auto begin = counting_iterator(0); + auto end = counting_iterator(nk); +#endif + switch ( vid ) { case Base_StdPar : { @@ -48,6 +53,7 @@ void POLYBENCH_2MM::runStdParVariant(VariantID vid, size_t tune_idx) counting_iterator(0), nj, [=](Index_type j) { #endif +#if 1 POLYBENCH_2MM_BODY1; std::for_each_n( std::execution::unseq, counting_iterator(0), nk, @@ -55,6 +61,14 @@ void POLYBENCH_2MM::runStdParVariant(VariantID vid, size_t tune_idx) POLYBENCH_2MM_BODY2; }); POLYBENCH_2MM_BODY3; +#else + tmp[j + i*nj] = std::transform_reduce( std::execution::unseq, + begin, end, + (Real_type)0, std::plus(), + [=] (Index_type k) { + return alpha * A[k + i*nk] * B[j + k*nj]; + }); +#endif #ifndef USE_STDPAR_COLLAPSE }); #endif From 6853072cf276d45697112a80f5cb1b52aa053831 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 13 Oct 2022 10:53:45 +0300 Subject: [PATCH 135/174] for_each_n --- src/polybench/POLYBENCH_3MM-StdPar.cpp | 142 +++++++++++++------------ 1 file changed, 72 insertions(+), 70 deletions(-) diff --git a/src/polybench/POLYBENCH_3MM-StdPar.cpp b/src/polybench/POLYBENCH_3MM-StdPar.cpp index 00f431291..cb758fe5b 100644 --- a/src/polybench/POLYBENCH_3MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_3MM-StdPar.cpp @@ -14,8 +14,6 @@ #include -//#define USE_STDPAR_COLLAPSE 1 - namespace rajaperf { namespace polybench @@ -29,26 +27,6 @@ void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) POLYBENCH_3MM_DATA_SETUP; -#ifdef USE_STDPAR_COLLAPSE - counting_iterator beginIJ(0); - counting_iterator endIJ(ni*nj); - counting_iterator beginIL(0); - counting_iterator endIL(ni*nl); - counting_iterator beginJL(0); - counting_iterator endJL(nj*nl); -#else - counting_iterator beginI(0); - counting_iterator endI(ni); - counting_iterator beginL(0); - counting_iterator endL(nl); -#endif - counting_iterator beginJ(0); - counting_iterator endJ(nj); - counting_iterator beginK(0); - counting_iterator endK(nk); - counting_iterator beginM(0); - counting_iterator endM(nm); - switch ( vid ) { case Base_StdPar : { @@ -57,17 +35,21 @@ void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - beginIJ, endIJ, [=](Index_type ij) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nj, + [=](Index_type ij) { const auto i = ij / nj; const auto j = ij % nj; #else - std::for_each( std::execution::par_unseq, - beginI, endI, [=](Index_type i) { - std::for_each(beginJ, endJ, [=](Index_type j) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni, + [=](Index_type i) { + std::for_each_n( counting_iterator(0), nj, + [=](Index_type j) { #endif POLYBENCH_3MM_BODY1; - std::for_each(beginK, endK, [=,&dot](Index_type k) { + std::for_each_n( counting_iterator(0), nk, + [=,&dot](Index_type k) { POLYBENCH_3MM_BODY2; }); POLYBENCH_3MM_BODY3; @@ -77,17 +59,21 @@ void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) }); #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - beginJL, endJL, [=](Index_type jl) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nj*nl, + [=](Index_type jl) { const auto j = jl / nl; const auto l = jl % nl; #else - std::for_each( std::execution::par_unseq, - beginJ, endJ, [=](Index_type j) { - std::for_each(beginL, endL, [=](Index_type l) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nj, + [=](Index_type j) { + std::for_each_n( counting_iterator(0), nl, + [=](Index_type l) { #endif POLYBENCH_3MM_BODY4; - std::for_each(beginM, endM, [=,&dot](Index_type m) { + std::for_each_n( counting_iterator(0), nm, + [=,&dot](Index_type m) { POLYBENCH_3MM_BODY5; }); POLYBENCH_3MM_BODY6; @@ -97,17 +83,21 @@ void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) }); #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - beginIL, endIL, [=](Index_type il) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nl, + [=](Index_type il) { const auto i = il / nl; const auto l = il % nl; #else - std::for_each( std::execution::par_unseq, - beginI, endI, [=](Index_type i) { - std::for_each(beginL, endL, [=](Index_type l) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni, + [=](Index_type i) { + std::for_each_n( counting_iterator(0), nl, + [=](Index_type l) { #endif POLYBENCH_3MM_BODY7; - std::for_each(beginJ, endJ, [=,&dot](Index_type j) { + std::for_each_n( counting_iterator(0), nj, + [=,&dot](Index_type j) { POLYBENCH_3MM_BODY8; }); POLYBENCH_3MM_BODY9; @@ -124,28 +114,28 @@ void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { - auto poly_3mm_base_lam2 = [=] (Index_type i, Index_type j, Index_type k, - Real_type &dot) { + auto poly_3mm_base_lam2 = + [=] (Index_type i, Index_type j, Index_type k, Real_type &dot) { POLYBENCH_3MM_BODY2; }; - auto poly_3mm_base_lam3 = [=] (Index_type i, Index_type j, - Real_type &dot) { + auto poly_3mm_base_lam3 = + [=] (Index_type i, Index_type j, Real_type &dot) { POLYBENCH_3MM_BODY3; }; - auto poly_3mm_base_lam5 = [=] (Index_type j, Index_type l, Index_type m, - Real_type &dot) { + auto poly_3mm_base_lam5 = + [=] (Index_type j, Index_type l, Index_type m, Real_type &dot) { POLYBENCH_3MM_BODY5; }; - auto poly_3mm_base_lam6 = [=] (Index_type j, Index_type l, - Real_type &dot) { + auto poly_3mm_base_lam6 = + [=] (Index_type j, Index_type l, Real_type &dot) { POLYBENCH_3MM_BODY6; }; - auto poly_3mm_base_lam8 = [=] (Index_type i, Index_type l, Index_type j, - Real_type &dot) { + auto poly_3mm_base_lam8 = + [=] (Index_type i, Index_type l, Index_type j, Real_type &dot) { POLYBENCH_3MM_BODY8; }; - auto poly_3mm_base_lam9 = [=] (Index_type i, Index_type l, - Real_type &dot) { + auto poly_3mm_base_lam9 = + [=] (Index_type i, Index_type l, Real_type &dot) { POLYBENCH_3MM_BODY9; }; @@ -153,17 +143,21 @@ void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - beginIJ, endIJ, [=](Index_type ij) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nj, + [=](Index_type ij) { const auto i = ij / nj; const auto j = ij % nj; #else - std::for_each( std::execution::par_unseq, - beginI, endI, [=](Index_type i) { - std::for_each(beginJ, endJ, [=](Index_type j) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni, + [=](Index_type i) { + std::for_each_n( counting_iterator(0), nj, + [=](Index_type j) { #endif POLYBENCH_3MM_BODY1; - std::for_each(beginK, endK, [=,&dot](Index_type k) { + std::for_each_n( counting_iterator(0), nk, + [=,&dot](Index_type k) { poly_3mm_base_lam2(i, j, k, dot); }); poly_3mm_base_lam3(i, j, dot); @@ -173,17 +167,21 @@ void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) }); #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - beginJL, endJL, [=](Index_type jl) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nj*nl, + [=](Index_type jl) { const auto j = jl / nl; const auto l = jl % nl; #else - std::for_each( std::execution::par_unseq, - beginJ, endJ, [=](Index_type j) { - std::for_each(beginL, endL, [=](Index_type l) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nj, + [=](Index_type j) { + std::for_each_n( counting_iterator(0), nl, + [=](Index_type l) { #endif POLYBENCH_3MM_BODY4; - std::for_each(beginM, endM, [=,&dot](Index_type m) { + std::for_each_n( counting_iterator(0), nm, + [=,&dot](Index_type m) { poly_3mm_base_lam5(j, l, m, dot); }); poly_3mm_base_lam6(j, l, dot); @@ -193,17 +191,21 @@ void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) }); #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - beginIL, endIL, [=](Index_type il) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nl, + [=](Index_type il) { const auto i = il / nl; const auto l = il % nl; #else - std::for_each( std::execution::par_unseq, - beginI, endI, [=](Index_type i) { - std::for_each(beginL, endL, [=](Index_type l) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni, + [=](Index_type i) { + std::for_each_n( counting_iterator(0), nl, + [=](Index_type l) { #endif POLYBENCH_3MM_BODY7; - std::for_each(beginJ, endJ, [=,&dot](Index_type j) { + std::for_each_n( counting_iterator(0), nj, + [=,&dot](Index_type j) { poly_3mm_base_lam8(i, l, j, dot); }); poly_3mm_base_lam9(i, l, dot); From 4a82e0e97235a43dfb49df05f1f61cdf4e06d434 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 13 Oct 2022 10:57:38 +0300 Subject: [PATCH 136/174] for_each_n --- src/polybench/POLYBENCH_GEMM-StdPar.cpp | 52 +++++++++++-------------- 1 file changed, 22 insertions(+), 30 deletions(-) diff --git a/src/polybench/POLYBENCH_GEMM-StdPar.cpp b/src/polybench/POLYBENCH_GEMM-StdPar.cpp index 58a7f11ec..7c689c497 100644 --- a/src/polybench/POLYBENCH_GEMM-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMM-StdPar.cpp @@ -14,8 +14,6 @@ #include -//#define USE_STDPAR_COLLAPSE 1 - namespace rajaperf { namespace polybench @@ -29,18 +27,6 @@ void POLYBENCH_GEMM::runStdParVariant(VariantID vid, size_t tune_idx) POLYBENCH_GEMM_DATA_SETUP; -#ifdef USE_STDPAR_COLLAPSE - counting_iterator beginIJ(0); - counting_iterator endIJ(ni*nj); -#else - counting_iterator beginI(0); - counting_iterator beginJ(0); - counting_iterator endJ(nj); - counting_iterator endI(ni); -#endif - counting_iterator beginK(0); - counting_iterator endK(nk); - switch ( vid ) { case Base_StdPar : { @@ -49,18 +35,22 @@ void POLYBENCH_GEMM::runStdParVariant(VariantID vid, size_t tune_idx) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - beginIJ, endIJ, [=](Index_type ij) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nj, + [=](Index_type ij) { const auto i = ij / nj; const auto j = ij % nj; #else - std::for_each( std::execution::par_unseq, - beginI, endI, [=](Index_type i) { - std::for_each(beginJ, endJ, [=](Index_type j) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni, + [=](Index_type i) { + std::for_each_n( counting_iterator(0), nj, + [=](Index_type j) { #endif POLYBENCH_GEMM_BODY1; POLYBENCH_GEMM_BODY2; - std::for_each(beginK, endK, [=,&dot](Index_type k) { + std::for_each_n( counting_iterator(0), nk, + [=,&dot](Index_type k) { POLYBENCH_GEMM_BODY3; }); POLYBENCH_GEMM_BODY4; @@ -80,12 +70,10 @@ void POLYBENCH_GEMM::runStdParVariant(VariantID vid, size_t tune_idx) auto poly_gemm_base_lam2 = [=](Index_type i, Index_type j) { POLYBENCH_GEMM_BODY2; }; - auto poly_gemm_base_lam3 = [=](Index_type i, Index_type j, Index_type k, - Real_type& dot) { + auto poly_gemm_base_lam3 = [=](Index_type i, Index_type j, Index_type k, Real_type& dot) { POLYBENCH_GEMM_BODY3; }; - auto poly_gemm_base_lam4 = [=](Index_type i, Index_type j, - Real_type& dot) { + auto poly_gemm_base_lam4 = [=](Index_type i, Index_type j, Real_type& dot) { POLYBENCH_GEMM_BODY4; }; @@ -93,18 +81,22 @@ void POLYBENCH_GEMM::runStdParVariant(VariantID vid, size_t tune_idx) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - beginIJ, endIJ, [=](Index_type ij) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nj, + [=](Index_type ij) { const auto i = ij / nj; const auto j = ij % nj; #else - std::for_each( std::execution::par_unseq, - beginI, endI, [=](Index_type i) { - std::for_each(beginJ, endJ, [=](Index_type j) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni, + [=](Index_type i) { + std::for_each_n( counting_iterator(0), nj, + [=](Index_type j) { #endif POLYBENCH_GEMM_BODY1; poly_gemm_base_lam2(i, j); - std::for_each(beginK, endK, [=,&dot](Index_type k) { + std::for_each_n( counting_iterator(0), nk, + [=,&dot](Index_type k) { poly_gemm_base_lam3(i, j, k, dot); }); poly_gemm_base_lam4(i, j, dot); From 8207cb1d8881957d918620af4b0a9c9c448847cf Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 13 Oct 2022 11:22:54 +0300 Subject: [PATCH 137/174] for_each_n --- src/polybench/POLYBENCH_GEMVER-StdPar.cpp | 73 ++++++++++++---------- src/polybench/POLYBENCH_GESUMMV-StdPar.cpp | 29 +++++---- 2 files changed, 54 insertions(+), 48 deletions(-) diff --git a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp index ed94308d8..4fa031e46 100644 --- a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp @@ -14,8 +14,6 @@ #include -//#define USE_STDPAR_COLLAPSE 1 - namespace rajaperf { namespace polybench @@ -29,9 +27,6 @@ void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx) POLYBENCH_GEMVER_DATA_SETUP; - counting_iterator begin(0); - counting_iterator end(n); - switch ( vid ) { case Base_StdPar : { @@ -39,31 +34,38 @@ void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, [=](Index_type i) { - std::for_each(begin, end, [=](Index_type j) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n, + [=](Index_type i) { + std::for_each_n( counting_iterator(0), n, + [=](Index_type j) { POLYBENCH_GEMVER_BODY1; }); }); - std::for_each( std::execution::par_unseq, - begin, end, [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n, + [=](Index_type i) { POLYBENCH_GEMVER_BODY2; - std::for_each(begin, end, [=,&dot](Index_type j) { + std::for_each_n( counting_iterator(0), n, + [=,&dot](Index_type j) { POLYBENCH_GEMVER_BODY3; }); POLYBENCH_GEMVER_BODY4; }); - std::for_each( std::execution::par_unseq, - begin, end, [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n, + [=](Index_type i) { POLYBENCH_GEMVER_BODY5; }); - std::for_each( std::execution::par_unseq, - begin, end, [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n, + [=](Index_type i) { POLYBENCH_GEMVER_BODY6; - std::for_each(begin, end, [=,&dot](Index_type j) { + std::for_each_n( counting_iterator(0), n, + [=,&dot](Index_type j) { POLYBENCH_GEMVER_BODY7; }); POLYBENCH_GEMVER_BODY8; @@ -80,8 +82,7 @@ void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx) auto poly_gemver_base_lam1 = [=](Index_type i, Index_type j) { POLYBENCH_GEMVER_BODY1; }; - auto poly_gemver_base_lam3 = [=](Index_type i, Index_type j, - Real_type &dot) { + auto poly_gemver_base_lam3 = [=](Index_type i, Index_type j, Real_type &dot) { POLYBENCH_GEMVER_BODY3; }; auto poly_gemver_base_lam4 = [=](Index_type i, Real_type &dot) { @@ -90,8 +91,7 @@ void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx) auto poly_gemver_base_lam5 = [=](Index_type i) { POLYBENCH_GEMVER_BODY5; }; - auto poly_gemver_base_lam7 = [=](Index_type i, Index_type j, - Real_type &dot) { + auto poly_gemver_base_lam7 = [=](Index_type i, Index_type j, Real_type &dot) { POLYBENCH_GEMVER_BODY7; }; auto poly_gemver_base_lam8 = [=](Index_type i, Real_type &dot) { @@ -101,31 +101,38 @@ void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, [=](Index_type i) { - std::for_each(begin, end, [=](Index_type j) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n, + [=](Index_type i) { + std::for_each_n( counting_iterator(0), n, + [=](Index_type j) { poly_gemver_base_lam1(i, j); }); }); - std::for_each( std::execution::par_unseq, - begin, end, [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n, + [=](Index_type i) { POLYBENCH_GEMVER_BODY2; - std::for_each(begin, end, [=,&dot](Index_type j) { + std::for_each_n( counting_iterator(0), n, + [=,&dot](Index_type j) { poly_gemver_base_lam3(i, j, dot); }); poly_gemver_base_lam4(i, dot); }); - std::for_each( std::execution::par_unseq, - begin, end, [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n, + [=](Index_type i) { poly_gemver_base_lam5(i); }); - std::for_each( std::execution::par_unseq, - begin, end, [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n, + [=](Index_type i) { POLYBENCH_GEMVER_BODY6; - std::for_each(begin, end, [=,&dot](Index_type j) { + std::for_each_n( counting_iterator(0), n, + [=,&dot](Index_type j) { poly_gemver_base_lam7(i, j, dot); }); poly_gemver_base_lam8(i, dot); @@ -146,5 +153,5 @@ void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx) #endif } -} // end namespace basic -} // end namespace rajaperf +} // n namespace basic +} // n namespace rajaperf diff --git a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp index 7ad6e101b..574916e3b 100644 --- a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp +++ b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp @@ -27,9 +27,6 @@ void POLYBENCH_GESUMMV::runStdParVariant(VariantID vid, size_t tune_idx) POLYBENCH_GESUMMV_DATA_SETUP; - counting_iterator begin(0); - counting_iterator end(N); - switch ( vid ) { case Base_StdPar : { @@ -37,10 +34,12 @@ void POLYBENCH_GESUMMV::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type i) { POLYBENCH_GESUMMV_BODY1; - std::for_each(begin, end, [=,&tmpdot,&ydot](Index_type j) { + std::for_each_n( counting_iterator(0), N, + [=,&tmpdot,&ydot](Index_type j) { POLYBENCH_GESUMMV_BODY2; }); POLYBENCH_GESUMMV_BODY3; @@ -54,22 +53,22 @@ void POLYBENCH_GESUMMV::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { - auto poly_gesummv_base_lam2 = [=](Index_type i, Index_type j, - Real_type& tmpdot, Real_type& ydot) { + auto poly_gesummv_base_lam2 = [=](Index_type i, Index_type j, Real_type& tmpdot, Real_type& ydot) { POLYBENCH_GESUMMV_BODY2; }; - auto poly_gesummv_base_lam3 = [=](Index_type i, - Real_type& tmpdot, Real_type& ydot) { + auto poly_gesummv_base_lam3 = [=](Index_type i, Real_type& tmpdot, Real_type& ydot) { POLYBENCH_GESUMMV_BODY3; }; startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type i) { POLYBENCH_GESUMMV_BODY1; - std::for_each(begin, end, [=,&tmpdot,&ydot](Index_type j) { + std::for_each_n( counting_iterator(0), N, + [=,&tmpdot,&ydot](Index_type j) { poly_gesummv_base_lam2(i, j, tmpdot, ydot); }); poly_gesummv_base_lam3(i, tmpdot, ydot); @@ -90,5 +89,5 @@ void POLYBENCH_GESUMMV::runStdParVariant(VariantID vid, size_t tune_idx) #endif } -} // end namespace polybench -} // end namespace rajaperf +} // N namespace polybench +} // N namespace rajaperf From 02d54664bc1cf4c5cc71b8893c41cc9ac6872695 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 13 Oct 2022 11:33:31 +0300 Subject: [PATCH 138/174] for_each_n --- src/polybench/POLYBENCH_ADI-StdPar.cpp | 31 ++++----- src/polybench/POLYBENCH_ATAX-StdPar.cpp | 67 +++++++++---------- src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp | 64 +++++++----------- .../POLYBENCH_FLOYD_WARSHALL-StdPar.cpp | 45 ++++++------- 4 files changed, 87 insertions(+), 120 deletions(-) diff --git a/src/polybench/POLYBENCH_ADI-StdPar.cpp b/src/polybench/POLYBENCH_ADI-StdPar.cpp index 0151f931b..b8a75b305 100644 --- a/src/polybench/POLYBENCH_ADI-StdPar.cpp +++ b/src/polybench/POLYBENCH_ADI-StdPar.cpp @@ -27,9 +27,6 @@ void POLYBENCH_ADI::runStdParVariant(VariantID vid, size_t tune_idx) POLYBENCH_ADI_DATA_SETUP; - counting_iterator begin(1); - counting_iterator end(n-1); - switch ( vid ) { case Base_StdPar : { @@ -39,9 +36,9 @@ void POLYBENCH_ADI::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type t = 1; t <= tsteps; ++t) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), n-2, + [=](Index_type i) { POLYBENCH_ADI_BODY2; for (Index_type j = 1; j < n-1; ++j) { POLYBENCH_ADI_BODY3; @@ -52,9 +49,9 @@ void POLYBENCH_ADI::runStdParVariant(VariantID vid, size_t tune_idx) } }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), n-2, + [=](Index_type i) { POLYBENCH_ADI_BODY6; for (Index_type j = 1; j < n-1; ++j) { POLYBENCH_ADI_BODY7; @@ -105,9 +102,9 @@ void POLYBENCH_ADI::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type t = 1; t <= tsteps; ++t) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), n-2, + [=](Index_type i) { poly_adi_base_lam2(i); for (Index_type j = 1; j < n-1; ++j) { poly_adi_base_lam3(i, j); @@ -118,9 +115,9 @@ void POLYBENCH_ADI::runStdParVariant(VariantID vid, size_t tune_idx) } }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), n-2, + [=](Index_type i) { poly_adi_base_lam6(i); for (Index_type j = 1; j < n-1; ++j) { poly_adi_base_lam7(i, j); @@ -148,5 +145,5 @@ void POLYBENCH_ADI::runStdParVariant(VariantID vid, size_t tune_idx) #endif } -} // end namespace polybench -} // end namespace rajaperf +} // n-2 namespace polybench +} // n-2 namespace rajaperf diff --git a/src/polybench/POLYBENCH_ATAX-StdPar.cpp b/src/polybench/POLYBENCH_ATAX-StdPar.cpp index 6137fcb70..8aa00c5a0 100644 --- a/src/polybench/POLYBENCH_ATAX-StdPar.cpp +++ b/src/polybench/POLYBENCH_ATAX-StdPar.cpp @@ -27,9 +27,6 @@ void POLYBENCH_ATAX::runStdParVariant(VariantID vid, size_t tune_idx) POLYBENCH_ATAX_DATA_SETUP; - counting_iterator begin(0); - counting_iterator end(N); - switch ( vid ) { case Base_StdPar : { @@ -37,25 +34,25 @@ void POLYBENCH_ATAX::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type i) { POLYBENCH_ATAX_BODY1; - std::for_each( std::execution::unseq, - begin, end, - [=,&dot](Index_type j) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), N, + [=,&dot](Index_type j) { POLYBENCH_ATAX_BODY2; }); POLYBENCH_ATAX_BODY3; }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type j) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type j) { POLYBENCH_ATAX_BODY4; - std::for_each( std::execution::unseq, - begin, end, - [=,&dot](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), N, + [=,&dot](Index_type i) { POLYBENCH_ATAX_BODY5; }); POLYBENCH_ATAX_BODY6; @@ -69,45 +66,41 @@ void POLYBENCH_ATAX::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { - auto poly_atax_base_lam2 = [=] (Index_type i, Index_type j, - Real_type &dot) { + auto poly_atax_base_lam2 = [=] (Index_type i, Index_type j, Real_type &dot) { POLYBENCH_ATAX_BODY2; }; - auto poly_atax_base_lam3 = [=] (Index_type i, - Real_type &dot) { + auto poly_atax_base_lam3 = [=] (Index_type i, Real_type &dot) { POLYBENCH_ATAX_BODY3; }; - auto poly_atax_base_lam5 = [=] (Index_type i, Index_type j , - Real_type &dot) { + auto poly_atax_base_lam5 = [=] (Index_type i, Index_type j , Real_type &dot) { POLYBENCH_ATAX_BODY5; }; - auto poly_atax_base_lam6 = [=] (Index_type j, - Real_type &dot) { + auto poly_atax_base_lam6 = [=] (Index_type j, Real_type &dot) { POLYBENCH_ATAX_BODY6; }; startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type i) { POLYBENCH_ATAX_BODY1; - std::for_each( std::execution::unseq, - begin, end, - [=,&dot](Index_type j) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), N, + [=,&dot](Index_type j) { poly_atax_base_lam2(i, j, dot); }); poly_atax_base_lam3(i, dot); }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type j) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type j) { POLYBENCH_ATAX_BODY4; - std::for_each( std::execution::unseq, - begin, end, - [=,&dot](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), N, + [=,&dot](Index_type i) { poly_atax_base_lam5(i, j, dot); }); poly_atax_base_lam6(j, dot); @@ -128,5 +121,5 @@ void POLYBENCH_ATAX::runStdParVariant(VariantID vid, size_t tune_idx) #endif } -} // end namespace polybench -} // end namespace rajaperf +} // N namespace polybench +} // N namespace rajaperf diff --git a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp index 07a62aefe..64b2ae0d2 100644 --- a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp @@ -27,18 +27,6 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) POLYBENCH_FDTD_2D_DATA_SETUP; - counting_iterator beginY(0); - counting_iterator endY(ny); - - counting_iterator begin2(0); - counting_iterator end2((nx-1)*ny); - - counting_iterator begin3(0); - counting_iterator end3(nx*(ny-1)); - - counting_iterator begin4(0); - counting_iterator end4((nx-1)*(ny-1)); - switch ( vid ) { case Base_StdPar : { @@ -48,9 +36,9 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) for (t = 0; t < tsteps; ++t) { - std::for_each( std::execution::par_unseq, - beginY, endY, - [=](Index_type j) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ny, + [=](Index_type j) { POLYBENCH_FDTD_2D_BODY1; }); @@ -59,25 +47,25 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) // auto [i,j] = std::div(ij,ny); i++; // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This is noticeably slower than below. - std::for_each( std::execution::par_unseq, - begin2, end2, - [=](Index_type ij) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), (nx-1)*ny, + [=](Index_type ij) { const auto i = 1 + ij / ny; const auto j = ij % ny; POLYBENCH_FDTD_2D_BODY2; }); - std::for_each( std::execution::par_unseq, - begin3, end3, - [=](Index_type ij) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nx*(ny-1), + [=](Index_type ij) { const auto i = ij / (ny-1); const auto j = 1 + ij % (ny-1); POLYBENCH_FDTD_2D_BODY3; }); - std::for_each( std::execution::par_unseq, - begin4, end4, - [=](Index_type ij) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), (nx-1)*(ny-1), + [=](Index_type ij) { const auto i = ij / (ny-1); const auto j = ij % (ny-1); POLYBENCH_FDTD_2D_BODY4; @@ -99,7 +87,6 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) // t-loop iteration. // // capturing t by reference is required for GCC 11 to generate correct results - //auto poly_fdtd2d_base_lam1 = [=,&t](Index_type j) { // but that breaks NVHPC GPU, so we instead make it an explicit parameter auto poly_fdtd2d_base_lam1 = [=](Index_type j, Index_type t) { //ey[j + 0*ny] = fict[t]; @@ -120,34 +107,31 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) for (t = 0; t < tsteps; ++t) { - std::for_each( std::execution::par_unseq, - beginY, endY, - [=](Index_type j) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ny, + [=](Index_type j) { poly_fdtd2d_base_lam1(j,t); }); - std::for_each( std::execution::par_unseq, - begin2, end2, - [=](Index_type ij) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), (nx-1)*ny, + [=](Index_type ij) { const auto i = 1 + ij / ny; const auto j = ij % ny; poly_fdtd2d_base_lam2(i, j); }); - std::for_each( std::execution::par_unseq, - begin3, end3, - [=](Index_type ij) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nx*(ny-1), + [=](Index_type ij) { const auto i = ij / (ny-1); const auto j = 1 + ij % (ny-1); poly_fdtd2d_base_lam3(i, j); }); - counting_iterator begin4(0); - counting_iterator end4((nx-1)*(ny-1)); - - std::for_each( std::execution::par_unseq, - begin4, end4, - [=](Index_type ij) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), (nx-1)*(ny-1), + [=](Index_type ij) { const auto i = ij / (ny-1); const auto j = ij % (ny-1); poly_fdtd2d_base_lam4(i, j); diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp index c3d1e8b15..a4c76d7fb 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp @@ -29,14 +29,6 @@ void POLYBENCH_FLOYD_WARSHALL::runStdParVariant(VariantID vid, size_t tune_idx) POLYBENCH_FLOYD_WARSHALL_DATA_SETUP; -#ifdef USE_STDPAR_COLLAPSE - counting_iterator begin2(0); - counting_iterator end2(N*N); -#else - counting_iterator begin(0); - counting_iterator end(N); -#endif - switch ( vid ) { case Base_StdPar : { @@ -46,16 +38,17 @@ void POLYBENCH_FLOYD_WARSHALL::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type k = 0; k < N; ++k) { #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin2, end2, [=](Index_type ji) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N*N, + [=](Index_type ji) { const auto j = ji / N; const auto i = ji % N; #else - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { - std::for_each( begin, end, - [=](Index_type j) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type i) { + std::for_each_n( counting_iterator(0), N, + [=](Index_type j) { #endif POLYBENCH_FLOYD_WARSHALL_BODY; }); @@ -72,8 +65,7 @@ void POLYBENCH_FLOYD_WARSHALL::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { - auto poly_floydwarshall_base_lam = [=](Index_type k, Index_type i, - Index_type j) { + auto poly_floydwarshall_base_lam = [=](Index_type k, Index_type i, Index_type j) { POLYBENCH_FLOYD_WARSHALL_BODY; }; @@ -82,16 +74,17 @@ void POLYBENCH_FLOYD_WARSHALL::runStdParVariant(VariantID vid, size_t tune_idx) for (Index_type k = 0; k < N; ++k) { #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin2, end2, [=](Index_type ji) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N*N, + [=](Index_type ji) { const auto j = ji / N; const auto i = ji % N; #else - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { - std::for_each( begin, end, - [=](Index_type j) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type i) { + std::for_each_n( counting_iterator(0), N, + [=](Index_type j) { #endif poly_floydwarshall_base_lam(k, i, j); }); @@ -115,5 +108,5 @@ void POLYBENCH_FLOYD_WARSHALL::runStdParVariant(VariantID vid, size_t tune_idx) #endif } -} // end namespace polybench -} // end namespace rajaperf +} // N namespace polybench +} // N namespace rajaperf From e0a0331bf2b9df85950d3804d06bcf90163cee65 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 13 Oct 2022 11:44:02 +0300 Subject: [PATCH 139/174] for_each_n --- src/lcals/GEN_LIN_RECUR-StdPar.cpp | 47 ++++++++++++------------------ src/lcals/INT_PREDICT-StdPar.cpp | 15 ++++------ src/lcals/PLANCKIAN-StdPar.cpp | 15 ++++------ src/lcals/TRIDIAG_ELIM-StdPar.cpp | 23 +++++++-------- 4 files changed, 41 insertions(+), 59 deletions(-) diff --git a/src/lcals/GEN_LIN_RECUR-StdPar.cpp b/src/lcals/GEN_LIN_RECUR-StdPar.cpp index fd4cf6ed3..991b01b26 100644 --- a/src/lcals/GEN_LIN_RECUR-StdPar.cpp +++ b/src/lcals/GEN_LIN_RECUR-StdPar.cpp @@ -28,18 +28,6 @@ void GEN_LIN_RECUR::runStdParVariant(VariantID vid, size_t tune_idx) GEN_LIN_RECUR_DATA_SETUP; - auto beginK = counting_iterator(0); - auto endK = counting_iterator(N); - auto beginI = counting_iterator(1); - auto endI = counting_iterator(N+1); - - auto genlinrecur_lam1 = [=](Index_type k) { - GEN_LIN_RECUR_BODY1; - }; - auto genlinrecur_lam2 = [=](Index_type i) { - GEN_LIN_RECUR_BODY2; - }; - switch ( vid ) { case Base_StdPar : { @@ -47,17 +35,15 @@ void GEN_LIN_RECUR::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - //for (Index_type k = 0; k < N; ++k ) { - std::for_each( std::execution::par_unseq, - beginK, endK, - [=](Index_type k) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type k) { GEN_LIN_RECUR_BODY1; }); - //for (Index_type i = 1; i < N+1; ++i ) { - std::for_each( std::execution::par_unseq, - beginI, endI, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N, + [=](Index_type i) { GEN_LIN_RECUR_BODY2; }); @@ -69,20 +55,25 @@ void GEN_LIN_RECUR::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { + auto genlinrecur_lam1 = [=](Index_type k) { + GEN_LIN_RECUR_BODY1; + }; + auto genlinrecur_lam2 = [=](Index_type i) { + GEN_LIN_RECUR_BODY2; + }; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - //for (Index_type k = 0; k < N; ++k ) { - std::for_each( std::execution::par_unseq, - beginK, endK, - [=](Index_type k) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type k) { genlinrecur_lam1(k); }); - //for (Index_type i = 1; i < N+1; ++i ) { - std::for_each( std::execution::par_unseq, - beginI, endI, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N, + [=](Index_type i) { genlinrecur_lam2(i); }); diff --git a/src/lcals/INT_PREDICT-StdPar.cpp b/src/lcals/INT_PREDICT-StdPar.cpp index 2df532913..095662ac6 100644 --- a/src/lcals/INT_PREDICT-StdPar.cpp +++ b/src/lcals/INT_PREDICT-StdPar.cpp @@ -28,9 +28,6 @@ void INT_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - INT_PREDICT_DATA_SETUP; auto intpredict_lam = [=](Index_type i) { @@ -44,9 +41,9 @@ void INT_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { INT_PREDICT_BODY; }); @@ -61,9 +58,9 @@ void INT_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { intpredict_lam(i); }); diff --git a/src/lcals/PLANCKIAN-StdPar.cpp b/src/lcals/PLANCKIAN-StdPar.cpp index e6378319c..1a278765d 100644 --- a/src/lcals/PLANCKIAN-StdPar.cpp +++ b/src/lcals/PLANCKIAN-StdPar.cpp @@ -29,9 +29,6 @@ void PLANCKIAN::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - PLANCKIAN_DATA_SETUP; auto planckian_lam = [=](Index_type i) { @@ -45,9 +42,9 @@ void PLANCKIAN::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { PLANCKIAN_BODY; }); @@ -62,9 +59,9 @@ void PLANCKIAN::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { planckian_lam(i); }); diff --git a/src/lcals/TRIDIAG_ELIM-StdPar.cpp b/src/lcals/TRIDIAG_ELIM-StdPar.cpp index c18df1303..ea06d7b7a 100644 --- a/src/lcals/TRIDIAG_ELIM-StdPar.cpp +++ b/src/lcals/TRIDIAG_ELIM-StdPar.cpp @@ -28,15 +28,8 @@ void TRIDIAG_ELIM::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 1; const Index_type iend = m_N; - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - TRIDIAG_ELIM_DATA_SETUP; - auto tridiag_elim_lam = [=](Index_type i) { - TRIDIAG_ELIM_BODY; - }; - switch ( vid ) { case Base_StdPar : { @@ -44,9 +37,9 @@ void TRIDIAG_ELIM::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { TRIDIAG_ELIM_BODY; }); @@ -58,12 +51,16 @@ void TRIDIAG_ELIM::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { + auto tridiag_elim_lam = [=](Index_type i) { + TRIDIAG_ELIM_BODY; + }; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { tridiag_elim_lam(i); }); From 7af9e3dcc8e02e7606f03e7522c1b6cf910b7d3a Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 13 Oct 2022 12:01:21 +0300 Subject: [PATCH 140/174] for_each_n --- src/common/StdParUtils.hpp | 5 ++ src/lcals/DIFF_PREDICT-StdPar.cpp | 24 +++--- src/lcals/EOS-StdPar.cpp | 24 +++--- src/lcals/FIRST_DIFF-StdPar.cpp | 24 +++--- src/lcals/FIRST_SUM-StdPar.cpp | 24 +++--- src/lcals/HYDRO_1D-StdPar.cpp | 23 +++-- src/lcals/HYDRO_2D-StdPar.cpp | 137 +++++++++++++++--------------- src/lcals/TRIDIAG_ELIM-StdPar.cpp | 4 +- 8 files changed, 129 insertions(+), 136 deletions(-) diff --git a/src/common/StdParUtils.hpp b/src/common/StdParUtils.hpp index 48636d9ec..e3543ad03 100644 --- a/src/common/StdParUtils.hpp +++ b/src/common/StdParUtils.hpp @@ -33,6 +33,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define NVCXX_GPU_ENABLED #endif +#if defined(_NVHPC_STDPAR_MULTICORE) +#warning COLLAPSE +#define USE_STDPAR_COLLAPSE +#endif + #if defined(NVCXX_GPU_ENABLED) // this is required to get NVC++ to compile CUDA atomics in StdPar #include diff --git a/src/lcals/DIFF_PREDICT-StdPar.cpp b/src/lcals/DIFF_PREDICT-StdPar.cpp index c38b3936c..fd2d786ce 100644 --- a/src/lcals/DIFF_PREDICT-StdPar.cpp +++ b/src/lcals/DIFF_PREDICT-StdPar.cpp @@ -28,15 +28,8 @@ void DIFF_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - DIFF_PREDICT_DATA_SETUP; - auto diffpredict_lam = [=](Index_type i) { - DIFF_PREDICT_BODY; - }; - switch ( vid ) { case Base_StdPar : { @@ -44,9 +37,9 @@ void DIFF_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { DIFF_PREDICT_BODY; }); @@ -58,14 +51,19 @@ void DIFF_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { + auto diffpredict_lam = [=](Index_type i) { + DIFF_PREDICT_BODY; + }; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { diffpredict_lam(i); }); + } stopTimer(); diff --git a/src/lcals/EOS-StdPar.cpp b/src/lcals/EOS-StdPar.cpp index f9281b86e..21484dc6e 100644 --- a/src/lcals/EOS-StdPar.cpp +++ b/src/lcals/EOS-StdPar.cpp @@ -28,15 +28,8 @@ void EOS::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - EOS_DATA_SETUP; - auto eos_lam = [=](Index_type i) { - EOS_BODY; - }; - switch ( vid ) { case Base_StdPar : { @@ -44,9 +37,9 @@ void EOS::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { EOS_BODY; }); @@ -58,14 +51,19 @@ void EOS::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { + auto eos_lam = [=](Index_type i) { + EOS_BODY; + }; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { eos_lam(i); }); + } stopTimer(); diff --git a/src/lcals/FIRST_DIFF-StdPar.cpp b/src/lcals/FIRST_DIFF-StdPar.cpp index 720371793..ca3c851dd 100644 --- a/src/lcals/FIRST_DIFF-StdPar.cpp +++ b/src/lcals/FIRST_DIFF-StdPar.cpp @@ -28,15 +28,8 @@ void FIRST_DIFF::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - FIRST_DIFF_DATA_SETUP; - auto firstdiff_lam = [=](Index_type i) { - FIRST_DIFF_BODY; - }; - switch ( vid ) { case Base_StdPar : { @@ -44,9 +37,9 @@ void FIRST_DIFF::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { FIRST_DIFF_BODY; }); @@ -58,14 +51,19 @@ void FIRST_DIFF::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { + auto firstdiff_lam = [=](Index_type i) { + FIRST_DIFF_BODY; + }; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { firstdiff_lam(i); }); + } stopTimer(); diff --git a/src/lcals/FIRST_SUM-StdPar.cpp b/src/lcals/FIRST_SUM-StdPar.cpp index c00a7c062..7f99509de 100644 --- a/src/lcals/FIRST_SUM-StdPar.cpp +++ b/src/lcals/FIRST_SUM-StdPar.cpp @@ -28,15 +28,8 @@ void FIRST_SUM::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 1; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - FIRST_SUM_DATA_SETUP; - auto firstsum_lam = [=](Index_type i) { - FIRST_SUM_BODY; - }; - switch ( vid ) { case Base_StdPar : { @@ -44,9 +37,9 @@ void FIRST_SUM::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { FIRST_SUM_BODY; }); @@ -58,14 +51,19 @@ void FIRST_SUM::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { + auto firstsum_lam = [=](Index_type i) { + FIRST_SUM_BODY; + }; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { firstsum_lam(i); }); + } stopTimer(); diff --git a/src/lcals/HYDRO_1D-StdPar.cpp b/src/lcals/HYDRO_1D-StdPar.cpp index 7d02aaa5d..cdd2a43bc 100644 --- a/src/lcals/HYDRO_1D-StdPar.cpp +++ b/src/lcals/HYDRO_1D-StdPar.cpp @@ -28,15 +28,8 @@ void HYDRO_1D::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - HYDRO_1D_DATA_SETUP; - auto hydro1d_lam = [=](Index_type i) { - HYDRO_1D_BODY; - }; - switch ( vid ) { case Base_StdPar : { @@ -44,9 +37,9 @@ void HYDRO_1D::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { HYDRO_1D_BODY; }); @@ -58,12 +51,16 @@ void HYDRO_1D::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { + auto hydro1d_lam = [=](Index_type i) { + HYDRO_1D_BODY; + }; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { hydro1d_lam(i); }); diff --git a/src/lcals/HYDRO_2D-StdPar.cpp b/src/lcals/HYDRO_2D-StdPar.cpp index e779012e6..0fc0a48e7 100644 --- a/src/lcals/HYDRO_2D-StdPar.cpp +++ b/src/lcals/HYDRO_2D-StdPar.cpp @@ -32,15 +32,8 @@ void HYDRO_2D::runStdParVariant(VariantID vid, size_t tune_idx) #ifdef USE_STDPAR_COLLAPSE // this is going to run from [(0,0),..] // we will add (1,1) later - const auto nk = kend-1; - const auto nj = jend-1; - auto begin = counting_iterator(0); - auto end = counting_iterator(nk*nj); -#else - auto beginK = counting_iterator(kbeg); - auto endK = counting_iterator(kend); - auto beginJ = counting_iterator(jbeg); - auto endJ = counting_iterator(jend); + const auto nk = kend-kbeg; + const auto nj = jend-jbeg; #endif HYDRO_2D_DATA_SETUP; @@ -53,17 +46,18 @@ void HYDRO_2D::runStdParVariant(VariantID vid, size_t tune_idx) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin, end, [=](Index_type kj) { - const auto k = 1 + kj / nj; - const auto j = 1 + kj % nj; + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nk*nj, + [=](Index_type kj) { + const auto k = kbeg + kj / nj; + const auto j = jbeg + kj % nj; #else - std::for_each( std::execution::par, - beginK, endK, - [=](Index_type k) { - std::for_each( std::execution::unseq, - beginJ, endJ, - [=](Index_type j) { + std::for_each_n( std::execution::par, + counting_iterator(kbeg), kend-kbeg, + [=](Index_type k) { + std::for_each_n( std::execution::unseq, + counting_iterator(jbeg), jend-jbeg, + [=](Index_type j) { #endif //std::cerr << "JEFF: " << k << "," << j << "\n"; HYDRO_2D_BODY1; @@ -73,17 +67,18 @@ void HYDRO_2D::runStdParVariant(VariantID vid, size_t tune_idx) }); #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin, end, [=](Index_type kj) { - const auto k = 1 + kj / nj; - const auto j = 1 + kj % nj; + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nk*nj, + [=](Index_type kj) { + const auto k = kbeg + kj / nj; + const auto j = jbeg + kj % nj; #else - std::for_each( std::execution::par, - beginK, endK, - [=](Index_type k) { - std::for_each( std::execution::unseq, - beginJ, endJ, - [=](Index_type j) { + std::for_each_n( std::execution::par, + counting_iterator(kbeg), kend-kbeg, + [=](Index_type k) { + std::for_each_n( std::execution::unseq, + counting_iterator(jbeg), jend-jbeg, + [=](Index_type j) { #endif HYDRO_2D_BODY2; #ifndef USE_STDPAR_COLLAPSE @@ -92,17 +87,18 @@ void HYDRO_2D::runStdParVariant(VariantID vid, size_t tune_idx) }); #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin, end, [=](Index_type kj) { - const auto k = 1 + kj / nj; - const auto j = 1 + kj % nj; + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nk*nj, + [=](Index_type kj) { + const auto k = kbeg + kj / nj; + const auto j = jbeg + kj % nj; #else - std::for_each( std::execution::par, - beginK, endK, - [=](Index_type k) { - std::for_each( std::execution::unseq, - beginJ, endJ, - [=](Index_type j) { + std::for_each_n( std::execution::par, + counting_iterator(kbeg), kend-kbeg, + [=](Index_type k) { + std::for_each_n( std::execution::unseq, + counting_iterator(jbeg), jend-jbeg, + [=](Index_type j) { #endif HYDRO_2D_BODY3; #ifndef USE_STDPAR_COLLAPSE @@ -132,17 +128,18 @@ void HYDRO_2D::runStdParVariant(VariantID vid, size_t tune_idx) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin, end, [=](Index_type kj) { - const auto k = 1 + kj / nj; - const auto j = 1 + kj % nj; + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nk*nj, + [=](Index_type kj) { + const auto k = kbeg + kj / nj; + const auto j = jbeg + kj % nj; #else - std::for_each( std::execution::par, - beginK, endK, - [=](Index_type k) { - std::for_each( std::execution::unseq, - beginJ, endJ, - [=](Index_type j) { + std::for_each_n( std::execution::par, + counting_iterator(kbeg), kend-kbeg, + [=](Index_type k) { + std::for_each_n( std::execution::unseq, + counting_iterator(jbeg), jend-jbeg, + [=](Index_type j) { #endif hydro2d_base_lam1(k, j); #ifndef USE_STDPAR_COLLAPSE @@ -151,17 +148,18 @@ void HYDRO_2D::runStdParVariant(VariantID vid, size_t tune_idx) }); #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin, end, [=](Index_type kj) { - const auto k = 1 + kj / nj; - const auto j = 1 + kj % nj; + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nk*nj, + [=](Index_type kj) { + const auto k = kbeg + kj / nj; + const auto j = jbeg + kj % nj; #else - std::for_each( std::execution::par, - beginK, endK, - [=](Index_type k) { - std::for_each( std::execution::unseq, - beginJ, endJ, - [=](Index_type j) { + std::for_each_n( std::execution::par, + counting_iterator(kbeg), kend-kbeg, + [=](Index_type k) { + std::for_each_n( std::execution::unseq, + counting_iterator(jbeg), jend-jbeg, + [=](Index_type j) { #endif hydro2d_base_lam2(k, j); #ifndef USE_STDPAR_COLLAPSE @@ -170,17 +168,18 @@ void HYDRO_2D::runStdParVariant(VariantID vid, size_t tune_idx) }); #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin, end, [=](Index_type kj) { - const auto k = 1 + kj / nj; - const auto j = 1 + kj % nj; + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nk*nj, + [=](Index_type kj) { + const auto k = kbeg + kj / nj; + const auto j = jbeg + kj % nj; #else - std::for_each( std::execution::par, - beginK, endK, - [=](Index_type k) { - std::for_each( std::execution::unseq, - beginJ, endJ, - [=](Index_type j) { + std::for_each_n( std::execution::par, + counting_iterator(kbeg), kend-kbeg, + [=](Index_type k) { + std::for_each_n( std::execution::unseq, + counting_iterator(jbeg), jend-jbeg, + [=](Index_type j) { #endif hydro2d_base_lam3(k, j); #ifndef USE_STDPAR_COLLAPSE diff --git a/src/lcals/TRIDIAG_ELIM-StdPar.cpp b/src/lcals/TRIDIAG_ELIM-StdPar.cpp index ea06d7b7a..edeba5a30 100644 --- a/src/lcals/TRIDIAG_ELIM-StdPar.cpp +++ b/src/lcals/TRIDIAG_ELIM-StdPar.cpp @@ -59,8 +59,8 @@ void TRIDIAG_ELIM::runStdParVariant(VariantID vid, size_t tune_idx) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { std::for_each_n( std::execution::par_unseq, - counting_iterator(ibegin), iend, - [=](Index_type i) { + counting_iterator(ibegin), iend, + [=](Index_type i) { tridiag_elim_lam(i); }); From f70e3c9f22295b2eaf2afb15355b88e726f56047 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 13 Oct 2022 12:28:26 +0300 Subject: [PATCH 141/174] for_each_n --- src/apps/CONVECTION3DPA-StdPar.cpp | 88 +++++++------------ src/apps/DEL_DOT_VEC_2D-StdPar.cpp | 14 ++- src/apps/DIFFUSION3DPA-StdPar.cpp | 13 ++- src/apps/ENERGY-StdPar.cpp | 117 ++++++++++++------------- src/apps/FIR-StdPar.cpp | 25 +++--- src/apps/HALOEXCHANGE-StdPar.cpp | 32 +++---- src/apps/HALOEXCHANGE_FUSED-StdPar.cpp | 33 +++---- src/apps/LTIMES-StdPar.cpp | 35 +++----- src/apps/LTIMES_NOVIEW-StdPar.cpp | 35 +++----- src/apps/MASS3DPA-StdPar.cpp | 14 +-- src/apps/PRESSURE-StdPar.cpp | 37 ++++---- src/apps/VOL3D-StdPar.cpp | 26 +++--- src/common/StdParUtils.hpp | 2 +- src/lcals/TRIDIAG_ELIM-StdPar.cpp | 2 + 14 files changed, 205 insertions(+), 268 deletions(-) diff --git a/src/apps/CONVECTION3DPA-StdPar.cpp b/src/apps/CONVECTION3DPA-StdPar.cpp index b8c36646e..9405b81f8 100644 --- a/src/apps/CONVECTION3DPA-StdPar.cpp +++ b/src/apps/CONVECTION3DPA-StdPar.cpp @@ -24,9 +24,6 @@ void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( CONVECTION3DPA_DATA_SETUP; - auto begin = counting_iterator(0); - auto end = counting_iterator(NE); - switch (vid) { case Base_StdPar: { @@ -34,99 +31,76 @@ void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](int e) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), NE, + [=](int e) { CONVECTION3DPA_0_CPU; - CPU_FOREACH(dz,z,CPA_D1D) - { - CPU_FOREACH(dy,y,CPA_D1D) - { - CPU_FOREACH(dx,x,CPA_D1D) - { + CPU_FOREACH(dz,z,CPA_D1D) { + CPU_FOREACH(dy,y,CPA_D1D) { + CPU_FOREACH(dx,x,CPA_D1D) { CONVECTION3DPA_1; } } } - CPU_FOREACH(dz,z,CPA_D1D) - { - CPU_FOREACH(dy,y,CPA_D1D) - { - CPU_FOREACH(qx,x,CPA_Q1D) - { + CPU_FOREACH(dz,z,CPA_D1D) { + CPU_FOREACH(dy,y,CPA_D1D) { + CPU_FOREACH(qx,x,CPA_Q1D) { CONVECTION3DPA_2; } } } - CPU_FOREACH(dz,z,CPA_D1D) - { - CPU_FOREACH(qx,x,CPA_Q1D) - { - CPU_FOREACH(qy,y,CPA_Q1D) - { + CPU_FOREACH(dz,z,CPA_D1D) { + CPU_FOREACH(qx,x,CPA_Q1D) { + CPU_FOREACH(qy,y,CPA_Q1D) { CONVECTION3DPA_3; } } } - CPU_FOREACH(qx,x,CPA_Q1D) - { - CPU_FOREACH(qy,y,CPA_Q1D) - { - CPU_FOREACH(qz,z,CPA_Q1D) - { + CPU_FOREACH(qx,x,CPA_Q1D) { + CPU_FOREACH(qy,y,CPA_Q1D) { + CPU_FOREACH(qz,z,CPA_Q1D) { CONVECTION3DPA_4; } } } - CPU_FOREACH(qz,z,CPA_Q1D) - { - CPU_FOREACH(qy,y,CPA_Q1D) - { - CPU_FOREACH(qx,x,CPA_Q1D) - { + CPU_FOREACH(qz,z,CPA_Q1D) { + CPU_FOREACH(qy,y,CPA_Q1D) { + CPU_FOREACH(qx,x,CPA_Q1D) { CONVECTION3DPA_5; } } } - CPU_FOREACH(qx,x,CPA_Q1D) - { - CPU_FOREACH(qy,y,CPA_Q1D) - { - CPU_FOREACH(dz,z,CPA_D1D) - { + CPU_FOREACH(qx,x,CPA_Q1D) { + CPU_FOREACH(qy,y,CPA_Q1D) { + CPU_FOREACH(dz,z,CPA_D1D) { CONVECTION3DPA_6; } } } - CPU_FOREACH(dz,z,CPA_D1D) - { - CPU_FOREACH(qx,x,CPA_Q1D) - { - CPU_FOREACH(dy,y,CPA_D1D) - { + CPU_FOREACH(dz,z,CPA_D1D) { + CPU_FOREACH(qx,x,CPA_Q1D) { + CPU_FOREACH(dy,y,CPA_D1D) { CONVECTION3DPA_7; } } } - CPU_FOREACH(dz,z,CPA_D1D) - { - CPU_FOREACH(dy,y,CPA_D1D) - { - CPU_FOREACH(dx,x,CPA_D1D) - { + CPU_FOREACH(dz,z,CPA_D1D) { + CPU_FOREACH(dy,y,CPA_D1D) { + CPU_FOREACH(dx,x,CPA_D1D) { CONVECTION3DPA_8; } } } + }); // element loop } @@ -136,9 +110,11 @@ void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( } default: - getCout() << "\n CONVECTION3DPA : Unknown StdPar variant id = " << vid - << std::endl; + getCout() << "\n CONVECTION3DPA : Unknown StdPar variant id = " << vid << std::endl; } + +#else + RAJA_UNUSED_VAR(vid); #endif } diff --git a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp index 7b9216949..e10ea9eff 100644 --- a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp +++ b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp @@ -32,8 +32,6 @@ void DEL_DOT_VEC_2D::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = m_domain->n_real_zones; - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); DEL_DOT_VEC_2D_DATA_SETUP; @@ -49,9 +47,9 @@ void DEL_DOT_VEC_2D::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type ii) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type ii) { DEL_DOT_VEC_2D_BODY_INDEX; DEL_DOT_VEC_2D_BODY; }); @@ -72,9 +70,9 @@ void DEL_DOT_VEC_2D::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type ii) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type ii) { deldotvec2d_base_lam(ii); }); diff --git a/src/apps/DIFFUSION3DPA-StdPar.cpp b/src/apps/DIFFUSION3DPA-StdPar.cpp index bc6837bfa..2b4cf5797 100644 --- a/src/apps/DIFFUSION3DPA-StdPar.cpp +++ b/src/apps/DIFFUSION3DPA-StdPar.cpp @@ -30,9 +30,6 @@ void DIFFUSION3DPA::runStdParVariant(VariantID vid, size_t tune_idx) DIFFUSION3DPA_DATA_SETUP; - auto begin = counting_iterator(0); - auto end = counting_iterator(NE); - switch (vid) { case Base_StdPar: { @@ -40,9 +37,9 @@ void DIFFUSION3DPA::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](int e) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), NE, + [=](int e) { DIFFUSION3DPA_0_CPU; @@ -115,6 +112,7 @@ void DIFFUSION3DPA::runStdParVariant(VariantID vid, size_t tune_idx) } }); // element loop + } stopTimer(); @@ -122,8 +120,7 @@ void DIFFUSION3DPA::runStdParVariant(VariantID vid, size_t tune_idx) } default: - getCout() << "\n DIFFUSION3DPA : Unknown StdPar variant id = " << vid - << std::endl; + getCout() << "\n DIFFUSION3DPA : Unknown StdPar variant id = " << vid << std::endl; } #else diff --git a/src/apps/ENERGY-StdPar.cpp b/src/apps/ENERGY-StdPar.cpp index 7c353618e..4c2c15456 100644 --- a/src/apps/ENERGY-StdPar.cpp +++ b/src/apps/ENERGY-StdPar.cpp @@ -28,30 +28,8 @@ void ENERGY::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - ENERGY_DATA_SETUP; - auto energy_lam1 = [=](Index_type i) { - ENERGY_BODY1; - }; - auto energy_lam2 = [=](Index_type i) { - ENERGY_BODY2; - }; - auto energy_lam3 = [=](Index_type i) { - ENERGY_BODY3; - }; - auto energy_lam4 = [=](Index_type i) { - ENERGY_BODY4; - }; - auto energy_lam5 = [=](Index_type i) { - ENERGY_BODY5; - }; - auto energy_lam6 = [=](Index_type i) { - ENERGY_BODY6; - }; - switch ( vid ) { case Base_StdPar : { @@ -59,39 +37,39 @@ void ENERGY::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { ENERGY_BODY1; }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { ENERGY_BODY2; }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { ENERGY_BODY3; }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { ENERGY_BODY4; }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { ENERGY_BODY5; }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { ENERGY_BODY6; }); @@ -103,42 +81,61 @@ void ENERGY::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { + auto energy_lam1 = [=](Index_type i) { + ENERGY_BODY1; + }; + auto energy_lam2 = [=](Index_type i) { + ENERGY_BODY2; + }; + auto energy_lam3 = [=](Index_type i) { + ENERGY_BODY3; + }; + auto energy_lam4 = [=](Index_type i) { + ENERGY_BODY4; + }; + auto energy_lam5 = [=](Index_type i) { + ENERGY_BODY5; + }; + auto energy_lam6 = [=](Index_type i) { + ENERGY_BODY6; + }; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { energy_lam1(i); }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { energy_lam2(i); }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { energy_lam3(i); }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { energy_lam4(i); }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { energy_lam5(i); }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { energy_lam6(i); }); @@ -157,5 +154,5 @@ void ENERGY::runStdParVariant(VariantID vid, size_t tune_idx) #endif } -} // end namespace apps -} // end namespace rajaperf +} // iend-ibegin namespace apps +} // iend-ibegin namespace rajaperf diff --git a/src/apps/FIR-StdPar.cpp b/src/apps/FIR-StdPar.cpp index 4a7cc4235..a7d688df7 100644 --- a/src/apps/FIR-StdPar.cpp +++ b/src/apps/FIR-StdPar.cpp @@ -28,9 +28,6 @@ void FIR::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize() - m_coefflen; - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - FIR_COEFF; FIR_DATA_SETUP; @@ -38,10 +35,6 @@ void FIR::runStdParVariant(VariantID vid, size_t tune_idx) Real_type coeff[FIR_COEFFLEN]; std::copy(std::begin(coeff_array), std::end(coeff_array), std::begin(coeff)); - auto fir_lam = [=](Index_type i) { - FIR_BODY; - }; - switch ( vid ) { case Base_StdPar : { @@ -49,9 +42,9 @@ void FIR::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { FIR_BODY; }); @@ -63,12 +56,16 @@ void FIR::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { + auto fir_lam = [=](Index_type i) { + FIR_BODY; + }; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { fir_lam(i); }); @@ -84,6 +81,8 @@ void FIR::runStdParVariant(VariantID vid, size_t tune_idx) } +#else + RAJA_UNUSED_VAR(vid); #endif } diff --git a/src/apps/HALOEXCHANGE-StdPar.cpp b/src/apps/HALOEXCHANGE-StdPar.cpp index 690d0e4f7..3318e9612 100644 --- a/src/apps/HALOEXCHANGE-StdPar.cpp +++ b/src/apps/HALOEXCHANGE-StdPar.cpp @@ -31,8 +31,8 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) HALOEXCHANGE_DATA_SETUP; - auto begin = counting_iterator(0); - auto end = counting_iterator(num_neighbors); + auto ibegin = 0; + auto iend = num_neighbors; switch ( vid ) { @@ -41,9 +41,9 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type l) noexcept { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type l) noexcept { Real_ptr buffer = buffers[l]; Int_ptr list = pack_index_lists[l]; Index_type len = pack_index_list_lengths[l]; @@ -56,9 +56,9 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) } }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type l) noexcept { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type l) noexcept { Real_ptr buffer = buffers[l]; Int_ptr list = unpack_index_lists[l]; Index_type len = unpack_index_list_lengths[l]; @@ -82,9 +82,9 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type l) noexcept { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type l) noexcept { Real_ptr buffer = buffers[l]; Int_ptr list = pack_index_lists[l]; Index_type len = pack_index_list_lengths[l]; @@ -100,9 +100,9 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) } }); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type l) noexcept { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type l) noexcept { Real_ptr buffer = buffers[l]; Int_ptr list = unpack_index_lists[l]; Index_type len = unpack_index_list_lengths[l]; @@ -132,5 +132,5 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) #endif } -} // end namespace apps -} // end namespace rajaperf +} // iend-ibegin namespace apps +} // iend-ibegin namespace rajaperf diff --git a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp index c89f014e7..db96fb3b2 100644 --- a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp +++ b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp @@ -52,11 +52,9 @@ void HALOEXCHANGE_FUSED::runStdParVariant(VariantID vid, size_t tune_idx) } } - auto begin = counting_iterator(0); - auto end = counting_iterator(pack_index); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type j) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), pack_index, + [=](Index_type j) { Real_ptr buffer = pack_ptr_holders[j].buffer; Int_ptr list = pack_ptr_holders[j].list; Real_ptr var = pack_ptr_holders[j].var; @@ -81,11 +79,9 @@ void HALOEXCHANGE_FUSED::runStdParVariant(VariantID vid, size_t tune_idx) } } - auto begin2 = counting_iterator(0); - auto end2 = counting_iterator(unpack_index); - std::for_each( std::execution::par_unseq, - begin2, end2, - [=](Index_type j) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), unpack_index, + [=](Index_type j) { Real_ptr buffer = unpack_ptr_holders[j].buffer; Int_ptr list = unpack_ptr_holders[j].list; Real_ptr var = unpack_ptr_holders[j].var; @@ -124,11 +120,9 @@ void HALOEXCHANGE_FUSED::runStdParVariant(VariantID vid, size_t tune_idx) buffer += len; } } - auto begin = counting_iterator(0); - auto end = counting_iterator(pack_index); - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type j) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), pack_index, + [=](Index_type j) { auto pack_lambda = pack_lambdas[j]; Index_type len = pack_lens[j]; for (Index_type i = 0; i < len; i++) { @@ -150,12 +144,9 @@ void HALOEXCHANGE_FUSED::runStdParVariant(VariantID vid, size_t tune_idx) buffer += len; } } - auto begin2 = counting_iterator(0); - auto end2 = counting_iterator(unpack_index); - std::for_each( std::execution::par_unseq, - begin2, end2, - [=](Index_type j) { - //for (Index_type j = 0; j < unpack_index; j++) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), unpack_index, + [=](Index_type j) { auto unpack_lambda = unpack_lambdas[j]; Index_type len = unpack_lens[j]; for (Index_type i = 0; i < len; i++) { diff --git a/src/apps/LTIMES-StdPar.cpp b/src/apps/LTIMES-StdPar.cpp index 1d6c2bc58..ce37f3bf1 100644 --- a/src/apps/LTIMES-StdPar.cpp +++ b/src/apps/LTIMES-StdPar.cpp @@ -28,14 +28,6 @@ void LTIMES::runStdParVariant(VariantID vid, size_t tune_idx) LTIMES_DATA_SETUP; -#ifdef USE_STDPAR_COLLAPSE - auto begin = counting_iterator(0); - auto end = counting_iterator(num_z*num_g*num_m); -#else - auto begin = counting_iterator(0); - auto end = counting_iterator(num_z); -#endif - switch ( vid ) { case Base_StdPar : { @@ -44,17 +36,17 @@ void LTIMES::runStdParVariant(VariantID vid, size_t tune_idx) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type zgm) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), num_z*num_g*num_m, + [=](Index_type zgm) { const auto z = zgm / (num_g*num_m); const auto gm = zgm % (num_g*num_m); const auto g = gm / num_m; const auto m = gm % num_m; #else - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type z) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), num_z, + [=](Index_type z) { for (Index_type g = 0; g < num_g; ++g ) for (Index_type m = 0; m < num_m; ++m ) #endif @@ -71,8 +63,7 @@ void LTIMES::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { - auto ltimes_base_lam = [=](Index_type d, Index_type z, - Index_type g, Index_type m) { + auto ltimes_base_lam = [=](Index_type d, Index_type z, Index_type g, Index_type m) { LTIMES_BODY; }; @@ -80,17 +71,17 @@ void LTIMES::runStdParVariant(VariantID vid, size_t tune_idx) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type zgm) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), num_z*num_g*num_m, + [=](Index_type zgm) { const auto z = zgm / (num_g*num_m); const auto gm = zgm % (num_g*num_m); const auto g = gm / num_m; const auto m = gm % num_m; #else - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type z) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), num_z, + [=](Index_type z) { for (Index_type g = 0; g < num_g; ++g ) for (Index_type m = 0; m < num_m; ++m ) #endif diff --git a/src/apps/LTIMES_NOVIEW-StdPar.cpp b/src/apps/LTIMES_NOVIEW-StdPar.cpp index 067067084..7d6102d60 100644 --- a/src/apps/LTIMES_NOVIEW-StdPar.cpp +++ b/src/apps/LTIMES_NOVIEW-StdPar.cpp @@ -28,14 +28,6 @@ void LTIMES_NOVIEW::runStdParVariant(VariantID vid, size_t tune_idx) LTIMES_NOVIEW_DATA_SETUP; -#ifdef USE_STDPAR_COLLAPSE - auto begin = counting_iterator(0); - auto end = counting_iterator(num_z*num_g*num_m); -#else - auto begin = counting_iterator(0); - auto end = counting_iterator(num_z); -#endif - switch ( vid ) { case Base_StdPar : { @@ -44,17 +36,17 @@ void LTIMES_NOVIEW::runStdParVariant(VariantID vid, size_t tune_idx) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type zgm) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), num_z*num_g*num_m, + [=](Index_type zgm) { const auto z = zgm / (num_g*num_m); const auto gm = zgm % (num_g*num_m); const auto g = gm / num_m; const auto m = gm % num_m; #else - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type z) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), num_z, + [=](Index_type z) { for (Index_type g = 0; g < num_g; ++g ) for (Index_type m = 0; m < num_m; ++m ) #endif @@ -71,8 +63,7 @@ void LTIMES_NOVIEW::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { - auto ltimesnoview_lam = [=](Index_type d, Index_type z, - Index_type g, Index_type m) { + auto ltimesnoview_lam = [=](Index_type d, Index_type z, Index_type g, Index_type m) { LTIMES_NOVIEW_BODY; }; @@ -80,17 +71,17 @@ void LTIMES_NOVIEW::runStdParVariant(VariantID vid, size_t tune_idx) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { #ifdef USE_STDPAR_COLLAPSE - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type zgm) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), num_z*num_g*num_m, + [=](Index_type zgm) { const auto z = zgm / (num_g*num_m); const auto gm = zgm % (num_g*num_m); const auto g = gm / num_m; const auto m = gm % num_m; #else - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type z) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), num_z, + [=](Index_type z) { for (Index_type g = 0; g < num_g; ++g ) for (Index_type m = 0; m < num_m; ++m ) #endif diff --git a/src/apps/MASS3DPA-StdPar.cpp b/src/apps/MASS3DPA-StdPar.cpp index 2fdbdcdcf..1ca1e6d6e 100644 --- a/src/apps/MASS3DPA-StdPar.cpp +++ b/src/apps/MASS3DPA-StdPar.cpp @@ -21,7 +21,6 @@ namespace apps { void MASS3DPA::runStdParVariant(VariantID vid, size_t tune_idx) { #if defined(RUN_STDPAR) - const Index_type run_reps = getRunReps(); MASS3DPA_DATA_SETUP; @@ -30,15 +29,12 @@ void MASS3DPA::runStdParVariant(VariantID vid, size_t tune_idx) { case Base_StdPar: { - auto begin = counting_iterator(0); - auto end = counting_iterator((int)NE); - startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](int e) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), NE, + [=](int e) { MASS3DPA_0_CPU @@ -94,6 +90,7 @@ void MASS3DPA::runStdParVariant(VariantID vid, size_t tune_idx) { } }); // element loop + } stopTimer(); @@ -103,6 +100,9 @@ void MASS3DPA::runStdParVariant(VariantID vid, size_t tune_idx) { default: getCout() << "\n MASS3DPA : Unknown StdPar variant id = " << vid << std::endl; } + +#else + RAJA_UNUSED_VAR(vid); #endif } diff --git a/src/apps/PRESSURE-StdPar.cpp b/src/apps/PRESSURE-StdPar.cpp index 17fc0eedf..637db4904 100644 --- a/src/apps/PRESSURE-StdPar.cpp +++ b/src/apps/PRESSURE-StdPar.cpp @@ -28,18 +28,8 @@ void PRESSURE::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - PRESSURE_DATA_SETUP; - auto pressure_lam1 = [=](Index_type i) { - PRESSURE_BODY1; - }; - auto pressure_lam2 = [=](Index_type i) { - PRESSURE_BODY2; - }; - switch ( vid ) { case Base_StdPar : { @@ -47,14 +37,14 @@ void PRESSURE::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, [=](Index_type i) { PRESSURE_BODY1; }); - std::for_each( std::execution::par_unseq, - begin, end, + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, [=](Index_type i) { PRESSURE_BODY2; }); @@ -67,17 +57,24 @@ void PRESSURE::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { + auto pressure_lam1 = [=](Index_type i) { + PRESSURE_BODY1; + }; + auto pressure_lam2 = [=](Index_type i) { + PRESSURE_BODY2; + }; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, [=](Index_type i) { pressure_lam1(i); }); - std::for_each( std::execution::par_unseq, - begin, end, + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, [=](Index_type i) { pressure_lam2(i); }); @@ -97,5 +94,5 @@ void PRESSURE::runStdParVariant(VariantID vid, size_t tune_idx) #endif } -} // end namespace apps -} // end namespace rajaperf +} // iend-ibegin namespace apps +} // iend-ibegin namespace rajaperf diff --git a/src/apps/VOL3D-StdPar.cpp b/src/apps/VOL3D-StdPar.cpp index 324d2bbcc..1aa9dfdd0 100644 --- a/src/apps/VOL3D-StdPar.cpp +++ b/src/apps/VOL3D-StdPar.cpp @@ -30,19 +30,12 @@ void VOL3D::runStdParVariant(VariantID vid, size_t tune_idx) const Index_type ibegin = m_domain->fpz; const Index_type iend = m_domain->lpz+1; - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - VOL3D_DATA_SETUP; NDPTRSET(m_domain->jp, m_domain->kp, x,x0,x1,x2,x3,x4,x5,x6,x7) ; NDPTRSET(m_domain->jp, m_domain->kp, y,y0,y1,y2,y3,y4,y5,y6,y7) ; NDPTRSET(m_domain->jp, m_domain->kp, z,z0,z1,z2,z3,z4,z5,z6,z7) ; - auto vol3d_lam = [=](Index_type i) { - VOL3D_BODY; - }; - switch ( vid ) { case Base_StdPar : { @@ -50,9 +43,9 @@ void VOL3D::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { VOL3D_BODY; }); @@ -60,16 +53,20 @@ void VOL3D::runStdParVariant(VariantID vid, size_t tune_idx) stopTimer(); break; - } + } case Lambda_StdPar : { + auto vol3d_lam = [=](Index_type i) { + VOL3D_BODY; + }; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - std::for_each( std::execution::par_unseq, - begin, end, - [=](Index_type i) { + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { vol3d_lam(i); }); @@ -82,6 +79,7 @@ void VOL3D::runStdParVariant(VariantID vid, size_t tune_idx) default : { getCout() << "\n VOL3D : Unknown variant id = " << vid << std::endl; } + } #else diff --git a/src/common/StdParUtils.hpp b/src/common/StdParUtils.hpp index e3543ad03..514dc56e2 100644 --- a/src/common/StdParUtils.hpp +++ b/src/common/StdParUtils.hpp @@ -34,7 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #if defined(_NVHPC_STDPAR_MULTICORE) -#warning COLLAPSE +#warning COLLAPSE (TESTING ONLY - DISABLE IN PRODUCTION) #define USE_STDPAR_COLLAPSE #endif diff --git a/src/lcals/TRIDIAG_ELIM-StdPar.cpp b/src/lcals/TRIDIAG_ELIM-StdPar.cpp index edeba5a30..08c3e13e9 100644 --- a/src/lcals/TRIDIAG_ELIM-StdPar.cpp +++ b/src/lcals/TRIDIAG_ELIM-StdPar.cpp @@ -76,6 +76,8 @@ void TRIDIAG_ELIM::runStdParVariant(VariantID vid, size_t tune_idx) } +#else + RAJA_UNUSED_VAR(vid); #endif } From 45fc93b77b2209b90c2cb5be104c2cb33edece11 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 13 Oct 2022 12:40:06 +0300 Subject: [PATCH 142/174] fixed unused warnings --- src/basic/INDEXLIST-StdPar.cpp | 3 --- src/lcals/HYDRO_2D.hpp | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/basic/INDEXLIST-StdPar.cpp b/src/basic/INDEXLIST-StdPar.cpp index 1f0bf5cd3..b48bb3c7d 100644 --- a/src/basic/INDEXLIST-StdPar.cpp +++ b/src/basic/INDEXLIST-StdPar.cpp @@ -27,9 +27,6 @@ void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - auto begin = counting_iterator(ibegin); - auto end = counting_iterator(iend); - INDEXLIST_DATA_SETUP; switch ( vid ) { diff --git a/src/lcals/HYDRO_2D.hpp b/src/lcals/HYDRO_2D.hpp index 93cce3305..66f5d12cc 100644 --- a/src/lcals/HYDRO_2D.hpp +++ b/src/lcals/HYDRO_2D.hpp @@ -60,7 +60,7 @@ const Real_type s = m_s; \ const Real_type t = m_t; \ \ - const Index_type kn = m_kn; \ + const Index_type kn = m_kn; (void)kn; \ const Index_type jn = m_jn; #define HYDRO_2D_BODY1 \ From 8074f417d69ac96a51e6b11e3c29881f2f0be1ea Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 13 Oct 2022 12:44:25 +0300 Subject: [PATCH 143/174] disable this, since it requires SLM --- src/basic/MAT_MAT_SHARED.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/basic/MAT_MAT_SHARED.cpp b/src/basic/MAT_MAT_SHARED.cpp index 454bb2eed..2f069d3e0 100644 --- a/src/basic/MAT_MAT_SHARED.cpp +++ b/src/basic/MAT_MAT_SHARED.cpp @@ -61,8 +61,9 @@ MAT_MAT_SHARED::MAT_MAT_SHARED(const RunParams ¶ms) setVariantDefined(Lambda_HIP); setVariantDefined(RAJA_HIP); - setVariantDefined( Base_StdPar ); - setVariantDefined( Lambda_StdPar ); + // MAT_MAT_SHARED is not implementable in StdPar, nor should it be. + //setVariantDefined( Base_StdPar ); + //setVariantDefined( Lambda_StdPar ); } MAT_MAT_SHARED::~MAT_MAT_SHARED() {} From eb3ecee39481321d88291fc999ea5b170f3ddf05 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 13 Oct 2022 12:47:23 +0300 Subject: [PATCH 144/174] remove lambda version - will not bother with this --- src/basic/REDUCE_STRUCT-StdPar.cpp | 40 ------------------------------ 1 file changed, 40 deletions(-) diff --git a/src/basic/REDUCE_STRUCT-StdPar.cpp b/src/basic/REDUCE_STRUCT-StdPar.cpp index b8e15d033..6dea32346 100644 --- a/src/basic/REDUCE_STRUCT-StdPar.cpp +++ b/src/basic/REDUCE_STRUCT-StdPar.cpp @@ -89,46 +89,6 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t break; } - case Lambda_StdPar : { - - auto reduce_struct_x_base_lam = [=](Index_type i) -> Real_type { - return points.x[i]; - }; - - auto reduce_struct_y_base_lam = [=](Index_type i) -> Real_type { - return points.y[i]; - }; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - Real_type xsum = m_init_sum; Real_type ysum = m_init_sum; - Real_type xmin = m_init_min; Real_type ymin = m_init_min; - Real_type xmax = m_init_max; Real_type ymax = m_init_max; - -#warning needs parallel - for (Index_type i = ibegin; i < iend; ++i ) { - xsum += reduce_struct_x_base_lam(i); - xmin = std::min(xmin, reduce_struct_x_base_lam(i)); - xmax = std::max(xmax, reduce_struct_x_base_lam(i)); - ysum += reduce_struct_y_base_lam(i); - ymin = std::min(ymin, reduce_struct_y_base_lam(i)); - ymax = std::max(ymax, reduce_struct_y_base_lam(i)); - } - - points.SetCenter(xsum/(points.N), ysum/(points.N)); - points.SetXMin(xmin); - points.SetXMax(xmax); - points.SetYMin(ymin); - points.SetYMax(ymax); - m_points=points; - - } - stopTimer(); - - break; - } - default : { getCout() << "\n REDUCE_STRUCT : Unknown variant id = " << vid << std::endl; } From c1b4034aa1fe999178523493eb2ca17a867a56c6 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 13 Oct 2022 12:48:31 +0300 Subject: [PATCH 145/174] remove since not implementing this --- src/basic/CMakeLists.txt | 1 - src/basic/MAT_MAT_SHARED-StdPar.cpp | 165 ---------------------------- 2 files changed, 166 deletions(-) delete mode 100644 src/basic/MAT_MAT_SHARED-StdPar.cpp diff --git a/src/basic/CMakeLists.txt b/src/basic/CMakeLists.txt index 24af1abae..a7c63be89 100644 --- a/src/basic/CMakeLists.txt +++ b/src/basic/CMakeLists.txt @@ -65,7 +65,6 @@ blt_add_library( INIT_VIEW1D_OFFSET-OMPTarget.cpp MAT_MAT_SHARED.cpp MAT_MAT_SHARED-Seq.cpp - MAT_MAT_SHARED-StdPar.cpp MAT_MAT_SHARED-Hip.cpp MAT_MAT_SHARED-Cuda.cpp MAT_MAT_SHARED-OMP.cpp diff --git a/src/basic/MAT_MAT_SHARED-StdPar.cpp b/src/basic/MAT_MAT_SHARED-StdPar.cpp deleted file mode 100644 index cc211b719..000000000 --- a/src/basic/MAT_MAT_SHARED-StdPar.cpp +++ /dev/null @@ -1,165 +0,0 @@ -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC -// and RAJA Performance Suite project contributors. -// See the RAJAPerf/LICENSE file for details. -// -// SPDX-License-Identifier: (BSD-3-Clause) -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - -#include "MAT_MAT_SHARED.hpp" - -#include "common/StdParUtils.hpp" - -#include - -namespace rajaperf { -namespace basic { - -void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t tune_idx) -{ -#if defined(RUN_STDPAR) - const Index_type run_reps = getRunReps(); - const Index_type N = m_N; - - MAT_MAT_SHARED_DATA_SETUP; - const Index_type Nx = RAJA_DIVIDE_CEILING_INT(N, TL_SZ); - const Index_type Ny = RAJA_DIVIDE_CEILING_INT(N, TL_SZ); - - switch (vid) { - - case Base_StdPar: { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - -#warning need parallel for - for (Index_type by = 0; by < Ny; ++by) { - for (Index_type bx = 0; bx < Nx; ++bx) { - - MAT_MAT_SHARED_BODY_0(TL_SZ) - - for (Index_type ty = 0; ty < TL_SZ; ++ty) { - for (Index_type tx = 0; tx < TL_SZ; ++tx) { - MAT_MAT_SHARED_BODY_1(TL_SZ) - } - } - - for (Index_type k = 0; k < (TL_SZ + N - 1) / TL_SZ; ++k) { - - for (Index_type ty = 0; ty < TL_SZ; ++ty) { - for (Index_type tx = 0; tx < TL_SZ; ++tx) { - MAT_MAT_SHARED_BODY_2(TL_SZ) - } - } - - for (Index_type ty = 0; ty < TL_SZ; ++ty) { - for (Index_type tx = 0; tx < TL_SZ; ++tx) { - MAT_MAT_SHARED_BODY_3(TL_SZ) - } - } - - } - - for (Index_type ty = 0; ty < TL_SZ; ++ty) { - for (Index_type tx = 0; tx < TL_SZ; ++tx) { - MAT_MAT_SHARED_BODY_4(TL_SZ) - } - } - } - } - } - stopTimer(); - - break; - } - - case Lambda_StdPar: { - - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - auto outer_y = [&](Index_type by) { - auto outer_x = [&](Index_type bx) { - - MAT_MAT_SHARED_BODY_0(TL_SZ) - - auto inner_y_1 = [&](Index_type ty) { - auto inner_x_1 = [&](Index_type tx) { MAT_MAT_SHARED_BODY_1(TL_SZ) }; - - for (Index_type tx = 0; tx < TL_SZ; ++tx) { - if (tx < TL_SZ) - inner_x_1(tx); - } - }; - - for (Index_type ty = 0; ty < TL_SZ; ++ty) { - if (ty < TL_SZ) - inner_y_1(ty); - } - - for (Index_type k = 0; k < (TL_SZ + N - 1) / TL_SZ; ++k) { - - auto inner_y_2 = [&](Index_type ty) { - auto inner_x_2 = [&](Index_type tx) { MAT_MAT_SHARED_BODY_2(TL_SZ) }; - - for (Index_type tx = 0; tx < TL_SZ; ++tx) { - inner_x_2(tx); - } - }; - - for (Index_type ty = 0; ty < TL_SZ; ++ty) { - inner_y_2(ty); - } - - auto inner_y_3 = [&](Index_type ty) { - auto inner_x_3 = [&](Index_type tx) { MAT_MAT_SHARED_BODY_3(TL_SZ) }; - - for (Index_type tx = 0; tx < TL_SZ; ++tx) { - inner_x_3(tx); - } - }; - - for (Index_type ty = 0; ty < TL_SZ; ++ty) { - inner_y_3(ty); - } - } - - auto inner_y_4 = [&](Index_type ty) { - auto inner_x_4 = [&](Index_type tx) { MAT_MAT_SHARED_BODY_4(TL_SZ) }; - - for (Index_type tx = 0; tx < TL_SZ; ++tx) { - inner_x_4(tx); - } - }; - - for (Index_type ty = 0; ty < TL_SZ; ++ty) { - inner_y_4(ty); - } - }; // outer_x - - for (Index_type bx = 0; bx < Nx; ++bx) { - outer_x(bx); - } - }; - -#warning need parallel for - for (Index_type by = 0; by < Ny; ++by) { - outer_y(by); - } - } - stopTimer(); - - break; - } - - default: { - getCout() << "\n MAT_MAT_SHARED : Unknown variant id = " << vid - << std::endl; - } - } -#endif -} - -} // end namespace basic -} // end namespace rajaperf From 9893a3a755584fcf166ae240ad920fe0f757d64a Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 13 Oct 2022 12:49:00 +0300 Subject: [PATCH 146/174] remove since not implementing this --- src/basic/MAT_MAT_SHARED.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/basic/MAT_MAT_SHARED.cpp b/src/basic/MAT_MAT_SHARED.cpp index 2f069d3e0..98cd878ce 100644 --- a/src/basic/MAT_MAT_SHARED.cpp +++ b/src/basic/MAT_MAT_SHARED.cpp @@ -60,10 +60,6 @@ MAT_MAT_SHARED::MAT_MAT_SHARED(const RunParams ¶ms) setVariantDefined(Base_HIP); setVariantDefined(Lambda_HIP); setVariantDefined(RAJA_HIP); - - // MAT_MAT_SHARED is not implementable in StdPar, nor should it be. - //setVariantDefined( Base_StdPar ); - //setVariantDefined( Lambda_StdPar ); } MAT_MAT_SHARED::~MAT_MAT_SHARED() {} From c59a972bacc57fe7872f1fc7b1a7fd34bb821a3b Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 13 Oct 2022 13:15:53 +0300 Subject: [PATCH 147/174] need empty impl after all --- src/basic/CMakeLists.txt | 1 + src/basic/MAT_MAT_SHARED-StdPar.cpp | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 src/basic/MAT_MAT_SHARED-StdPar.cpp diff --git a/src/basic/CMakeLists.txt b/src/basic/CMakeLists.txt index a7c63be89..24af1abae 100644 --- a/src/basic/CMakeLists.txt +++ b/src/basic/CMakeLists.txt @@ -65,6 +65,7 @@ blt_add_library( INIT_VIEW1D_OFFSET-OMPTarget.cpp MAT_MAT_SHARED.cpp MAT_MAT_SHARED-Seq.cpp + MAT_MAT_SHARED-StdPar.cpp MAT_MAT_SHARED-Hip.cpp MAT_MAT_SHARED-Cuda.cpp MAT_MAT_SHARED-OMP.cpp diff --git a/src/basic/MAT_MAT_SHARED-StdPar.cpp b/src/basic/MAT_MAT_SHARED-StdPar.cpp new file mode 100644 index 000000000..26f750e1a --- /dev/null +++ b/src/basic/MAT_MAT_SHARED-StdPar.cpp @@ -0,0 +1,25 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MAT_MAT_SHARED.hpp" + +#include + +namespace rajaperf { +namespace basic { + +void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + default: { + getCout() << "\n MAT_MAT_SHARED : Unknown variant id = " << vid + << std::endl; + } +} + +} // end namespace basic +} // end namespace rajaperf From 83ccc76dbd119c14153b2932abaa5e71d02c71f0 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 13 Oct 2022 13:24:01 +0300 Subject: [PATCH 148/174] need empty impl after all --- src/basic/MAT_MAT_SHARED-StdPar.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/basic/MAT_MAT_SHARED-StdPar.cpp b/src/basic/MAT_MAT_SHARED-StdPar.cpp index 26f750e1a..36671c8f0 100644 --- a/src/basic/MAT_MAT_SHARED-StdPar.cpp +++ b/src/basic/MAT_MAT_SHARED-StdPar.cpp @@ -15,10 +15,13 @@ namespace basic { void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + + switch (vid) { default: { getCout() << "\n MAT_MAT_SHARED : Unknown variant id = " << vid << std::endl; } + } } } // end namespace basic From 1f85e587507989e481854ae954aa9878d6253b2b Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 13 Oct 2022 13:35:02 +0300 Subject: [PATCH 149/174] enable INDEXLIST_3LOOP StdPar --- src/basic/INDEXLIST_3LOOP.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/basic/INDEXLIST_3LOOP.cpp b/src/basic/INDEXLIST_3LOOP.cpp index e7d4215fa..0eb00e772 100644 --- a/src/basic/INDEXLIST_3LOOP.cpp +++ b/src/basic/INDEXLIST_3LOOP.cpp @@ -58,6 +58,9 @@ INDEXLIST_3LOOP::INDEXLIST_3LOOP(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } INDEXLIST_3LOOP::~INDEXLIST_3LOOP() From 1499974a3e5765f5daa181f5d10833ac59d3d2c6 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 13 Oct 2022 13:35:17 +0300 Subject: [PATCH 150/174] fix for_each_n --- src/polybench/POLYBENCH_3MM-StdPar.cpp | 33 ++++++++++++++-------- src/polybench/POLYBENCH_GEMM-StdPar.cpp | 12 +++++--- src/polybench/POLYBENCH_GEMVER-StdPar.cpp | 18 ++++++++---- src/polybench/POLYBENCH_GESUMMV-StdPar.cpp | 6 ++-- 4 files changed, 46 insertions(+), 23 deletions(-) diff --git a/src/polybench/POLYBENCH_3MM-StdPar.cpp b/src/polybench/POLYBENCH_3MM-StdPar.cpp index cb758fe5b..9fa125ffb 100644 --- a/src/polybench/POLYBENCH_3MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_3MM-StdPar.cpp @@ -48,7 +48,8 @@ void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) [=](Index_type j) { #endif POLYBENCH_3MM_BODY1; - std::for_each_n( counting_iterator(0), nk, + std::for_each_n( std::execution::unseq, + counting_iterator(0), nk, [=,&dot](Index_type k) { POLYBENCH_3MM_BODY2; }); @@ -68,11 +69,13 @@ void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) std::for_each_n( std::execution::par_unseq, counting_iterator(0), nj, [=](Index_type j) { - std::for_each_n( counting_iterator(0), nl, + std::for_each_n( std::execution::unseq, + counting_iterator(0), nl, [=](Index_type l) { #endif POLYBENCH_3MM_BODY4; - std::for_each_n( counting_iterator(0), nm, + std::for_each_n( std::execution::unseq, + counting_iterator(0), nm, [=,&dot](Index_type m) { POLYBENCH_3MM_BODY5; }); @@ -92,11 +95,13 @@ void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) std::for_each_n( std::execution::par_unseq, counting_iterator(0), ni, [=](Index_type i) { - std::for_each_n( counting_iterator(0), nl, + std::for_each_n( std::execution::unseq, + counting_iterator(0), nl, [=](Index_type l) { #endif POLYBENCH_3MM_BODY7; - std::for_each_n( counting_iterator(0), nj, + std::for_each_n( std::execution::unseq, + counting_iterator(0), nj, [=,&dot](Index_type j) { POLYBENCH_3MM_BODY8; }); @@ -152,11 +157,13 @@ void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) std::for_each_n( std::execution::par_unseq, counting_iterator(0), ni, [=](Index_type i) { - std::for_each_n( counting_iterator(0), nj, + std::for_each_n( std::execution::unseq, + counting_iterator(0), nj, [=](Index_type j) { #endif POLYBENCH_3MM_BODY1; - std::for_each_n( counting_iterator(0), nk, + std::for_each_n( std::execution::unseq, + counting_iterator(0), nk, [=,&dot](Index_type k) { poly_3mm_base_lam2(i, j, k, dot); }); @@ -176,11 +183,13 @@ void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) std::for_each_n( std::execution::par_unseq, counting_iterator(0), nj, [=](Index_type j) { - std::for_each_n( counting_iterator(0), nl, + std::for_each_n( std::execution::unseq, + counting_iterator(0), nl, [=](Index_type l) { #endif POLYBENCH_3MM_BODY4; - std::for_each_n( counting_iterator(0), nm, + std::for_each_n( std::execution::unseq, + counting_iterator(0), nm, [=,&dot](Index_type m) { poly_3mm_base_lam5(j, l, m, dot); }); @@ -200,11 +209,13 @@ void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) std::for_each_n( std::execution::par_unseq, counting_iterator(0), ni, [=](Index_type i) { - std::for_each_n( counting_iterator(0), nl, + std::for_each_n( std::execution::unseq, + counting_iterator(0), nl, [=](Index_type l) { #endif POLYBENCH_3MM_BODY7; - std::for_each_n( counting_iterator(0), nj, + std::for_each_n( std::execution::unseq, + counting_iterator(0), nj, [=,&dot](Index_type j) { poly_3mm_base_lam8(i, l, j, dot); }); diff --git a/src/polybench/POLYBENCH_GEMM-StdPar.cpp b/src/polybench/POLYBENCH_GEMM-StdPar.cpp index 7c689c497..4c4b9ab8a 100644 --- a/src/polybench/POLYBENCH_GEMM-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMM-StdPar.cpp @@ -44,12 +44,14 @@ void POLYBENCH_GEMM::runStdParVariant(VariantID vid, size_t tune_idx) std::for_each_n( std::execution::par_unseq, counting_iterator(0), ni, [=](Index_type i) { - std::for_each_n( counting_iterator(0), nj, + std::for_each_n( std::execution::unseq, + counting_iterator(0), nj, [=](Index_type j) { #endif POLYBENCH_GEMM_BODY1; POLYBENCH_GEMM_BODY2; - std::for_each_n( counting_iterator(0), nk, + std::for_each_n( std::execution::unseq, + counting_iterator(0), nk, [=,&dot](Index_type k) { POLYBENCH_GEMM_BODY3; }); @@ -90,12 +92,14 @@ void POLYBENCH_GEMM::runStdParVariant(VariantID vid, size_t tune_idx) std::for_each_n( std::execution::par_unseq, counting_iterator(0), ni, [=](Index_type i) { - std::for_each_n( counting_iterator(0), nj, + std::for_each_n( std::execution::unseq, + counting_iterator(0), nj, [=](Index_type j) { #endif POLYBENCH_GEMM_BODY1; poly_gemm_base_lam2(i, j); - std::for_each_n( counting_iterator(0), nk, + std::for_each_n( std::execution::unseq, + counting_iterator(0), nk, [=,&dot](Index_type k) { poly_gemm_base_lam3(i, j, k, dot); }); diff --git a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp index 4fa031e46..218016d5e 100644 --- a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp @@ -37,7 +37,8 @@ void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx) std::for_each_n( std::execution::par_unseq, counting_iterator(0), n, [=](Index_type i) { - std::for_each_n( counting_iterator(0), n, + std::for_each_n( std::execution::unseq, + counting_iterator(0), n, [=](Index_type j) { POLYBENCH_GEMVER_BODY1; }); @@ -47,7 +48,8 @@ void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx) counting_iterator(0), n, [=](Index_type i) { POLYBENCH_GEMVER_BODY2; - std::for_each_n( counting_iterator(0), n, + std::for_each_n( std::execution::unseq, + counting_iterator(0), n, [=,&dot](Index_type j) { POLYBENCH_GEMVER_BODY3; }); @@ -64,7 +66,8 @@ void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx) counting_iterator(0), n, [=](Index_type i) { POLYBENCH_GEMVER_BODY6; - std::for_each_n( counting_iterator(0), n, + std::for_each_n( std::execution::unseq, + counting_iterator(0), n, [=,&dot](Index_type j) { POLYBENCH_GEMVER_BODY7; }); @@ -104,7 +107,8 @@ void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx) std::for_each_n( std::execution::par_unseq, counting_iterator(0), n, [=](Index_type i) { - std::for_each_n( counting_iterator(0), n, + std::for_each_n( std::execution::unseq, + counting_iterator(0), n, [=](Index_type j) { poly_gemver_base_lam1(i, j); }); @@ -114,7 +118,8 @@ void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx) counting_iterator(0), n, [=](Index_type i) { POLYBENCH_GEMVER_BODY2; - std::for_each_n( counting_iterator(0), n, + std::for_each_n( std::execution::unseq, + counting_iterator(0), n, [=,&dot](Index_type j) { poly_gemver_base_lam3(i, j, dot); }); @@ -131,7 +136,8 @@ void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx) counting_iterator(0), n, [=](Index_type i) { POLYBENCH_GEMVER_BODY6; - std::for_each_n( counting_iterator(0), n, + std::for_each_n( std::execution::unseq, + counting_iterator(0), n, [=,&dot](Index_type j) { poly_gemver_base_lam7(i, j, dot); }); diff --git a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp index 574916e3b..b46420597 100644 --- a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp +++ b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp @@ -38,7 +38,8 @@ void POLYBENCH_GESUMMV::runStdParVariant(VariantID vid, size_t tune_idx) counting_iterator(0), N, [=](Index_type i) { POLYBENCH_GESUMMV_BODY1; - std::for_each_n( counting_iterator(0), N, + std::for_each_n( std::execution::unseq, + counting_iterator(0), N, [=,&tmpdot,&ydot](Index_type j) { POLYBENCH_GESUMMV_BODY2; }); @@ -67,7 +68,8 @@ void POLYBENCH_GESUMMV::runStdParVariant(VariantID vid, size_t tune_idx) counting_iterator(0), N, [=](Index_type i) { POLYBENCH_GESUMMV_BODY1; - std::for_each_n( counting_iterator(0), N, + std::for_each_n( std::execution::unseq, + counting_iterator(0), N, [=,&tmpdot,&ydot](Index_type j) { poly_gesummv_base_lam2(i, j, tmpdot, ydot); }); From 6a91b3d35b273b6a666578ebbbf831df4eb0082e Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 13 Oct 2022 13:45:10 +0300 Subject: [PATCH 151/174] INDEXLIST_3LOOP validated manually; Intel busted --- src/basic/INDEXLIST_3LOOP-StdPar.cpp | 45 +++++++++++----------------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/src/basic/INDEXLIST_3LOOP-StdPar.cpp b/src/basic/INDEXLIST_3LOOP-StdPar.cpp index 68da23c56..c443f0468 100644 --- a/src/basic/INDEXLIST_3LOOP-StdPar.cpp +++ b/src/basic/INDEXLIST_3LOOP-StdPar.cpp @@ -52,22 +52,17 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG counts[i] = (INDEXLIST_3LOOP_CONDITIONAL) ? 1 : 0; }); -#if 0 - Index_type count = 0; - - for (Index_type i = ibegin; i < iend+1; ++i ) { - Index_type inc = counts[i]; - counts[i] = count; - count += inc; - } -#else // The validation does not notice if the exscan - // is removed, or otherwise forced to be wrong... -#warning This may be incorrect... - std::exclusive_scan( std::execution::par_unseq, + // is removed, or otherwise forced to be wrong. + // Using brute-force validation (see below): + // Intel outputs 0s when any execution policy is used. + // NVHPC (GPU) is fine. + std::exclusive_scan( +#ifdef __NVCOMPILER + std::execution::par_unseq, +#endif counts+ibegin, counts+iend+1, counts+ibegin, 0); -#endif std::for_each_n( std::execution::par_unseq, counting_iterator(ibegin), iend-ibegin, @@ -77,6 +72,11 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG m_len = counts[iend]; +#if BRUTE_FORCE_VALIDATION + for (Index_type i = ibegin; i < iend+1; ++i ) { + std::cout << "C: " << i << "," << counts[i] << "\n"; + } +#endif } stopTimer(); @@ -106,22 +106,13 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG indexlist_conditional_lam(i); }); -#if 0 - Index_type count = 0; - - for (Index_type i = ibegin; i < iend+1; ++i ) { - Index_type inc = counts[i]; - counts[i] = count; - count += inc; - } -#else - // The validation does not notice if the exscan - // is removed, or otherwise forced to be wrong... -#warning This may be incorrect... - std::exclusive_scan( std::execution::par_unseq, + // See comments above... + std::exclusive_scan( +#ifdef __NVCOMPILER + std::execution::par_unseq, +#endif counts+ibegin, counts+iend+1, counts+ibegin, 0); -#endif std::for_each_n( std::execution::par_unseq, counting_iterator(ibegin), iend-ibegin, From b406723064a0be3d7612d8dbc431156f3aebf5dc Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 13 Oct 2022 15:23:21 +0300 Subject: [PATCH 152/174] GCC broke too --- src/basic/INDEXLIST_3LOOP-StdPar.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/basic/INDEXLIST_3LOOP-StdPar.cpp b/src/basic/INDEXLIST_3LOOP-StdPar.cpp index c443f0468..86237a894 100644 --- a/src/basic/INDEXLIST_3LOOP-StdPar.cpp +++ b/src/basic/INDEXLIST_3LOOP-StdPar.cpp @@ -55,7 +55,7 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG // The validation does not notice if the exscan // is removed, or otherwise forced to be wrong. // Using brute-force validation (see below): - // Intel outputs 0s when any execution policy is used. + // Intel and GCC output 0s when any execution policy is used. // NVHPC (GPU) is fine. std::exclusive_scan( #ifdef __NVCOMPILER From e51cd8c980eea3e9d7ba99167ad7aeae184bc55c Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 26 Oct 2022 13:51:23 +0300 Subject: [PATCH 153/174] suppress misspelled --- README.stdpar | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.stdpar b/README.stdpar index 392bfa9ad..61a388a32 100644 --- a/README.stdpar +++ b/README.stdpar @@ -22,7 +22,7 @@ $ diff /opt/nvidia/hpc_sdk/Linux_x86_64/22.[35]/compilers/include/nvhpc/numeric_ cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 --diag_suppress=volatile_inc_dec_deprecated -stdpar=multicore -acc=multicore -mp=multicore -tp=haswell" -DENABLE_STDPAR=1 && make -j8 -cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 --diag_supress=volatile_inc_dec_deprecated -stdpar=gpu -tp=haswell -acc" -DENABLE_STDPAR=1 && make -j8 +cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 --diag_suppress=volatile_inc_dec_deprecated -stdpar=gpu -tp=haswell -acc" -DENABLE_STDPAR=1 && make -j8 ## CPU From bc5976b4d06db03395464fd4efd8f9c9d464f0c1 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Wed, 26 Oct 2022 04:20:48 -0700 Subject: [PATCH 154/174] cleanup workaround for __throw_bad_array_new_length --- src/algorithm/SORTPAIRS-StdPar.cpp | 3 --- src/apps/HALOEXCHANGE-StdPar.cpp | 3 --- src/common/StdParUtils.hpp | 9 ++++++++- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp index 9cfbf74a8..bc9318486 100644 --- a/src/algorithm/SORTPAIRS-StdPar.cpp +++ b/src/algorithm/SORTPAIRS-StdPar.cpp @@ -11,9 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#if defined(__NVCOMPILER_CUDA__) || defined(_NVHPC_STDPAR_CUDA) -static inline void std::__throw_bad_array_new_length() { std::abort(); } -#endif #include #include diff --git a/src/apps/HALOEXCHANGE-StdPar.cpp b/src/apps/HALOEXCHANGE-StdPar.cpp index 3318e9612..322bf376d 100644 --- a/src/apps/HALOEXCHANGE-StdPar.cpp +++ b/src/apps/HALOEXCHANGE-StdPar.cpp @@ -11,9 +11,6 @@ #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" -#if defined(__NVCOMPILER_CUDA__) || defined(_NVHPC_STDPAR_CUDA) -static inline void std::__throw_bad_array_new_length() { std::abort(); } -#endif #include diff --git a/src/common/StdParUtils.hpp b/src/common/StdParUtils.hpp index 514dc56e2..82ae2fa89 100644 --- a/src/common/StdParUtils.hpp +++ b/src/common/StdParUtils.hpp @@ -33,7 +33,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define NVCXX_GPU_ENABLED #endif -#if defined(_NVHPC_STDPAR_MULTICORE) +#if ( defined(__NVCOMPILER_CUDA__) || defined(_NVHPC_STDPAR_CUDA)) +#define NVHPC_CALC_VERSION(MAJOR, MINOR, PATCH) (((MAJOR) * 10000) + ((MINOR) * 100) + (PATCH)) +#if NVHPC_CALC_VERSION(__NVCOMPILER_MAJOR__,__NVCOMPILER_MINOR__,__NVCOMPILER_PATCHLEVEL__) < 220900 +static inline void std::__throw_bad_array_new_length() { std::abort(); } +#endif +#endif + +#if 0 //defined(_NVHPC_STDPAR_MULTICORE) #warning COLLAPSE (TESTING ONLY - DISABLE IN PRODUCTION) #define USE_STDPAR_COLLAPSE #endif From 85f65ff5c4350eb99678d2406266efec790a707a Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Mon, 31 Oct 2022 12:42:38 +0200 Subject: [PATCH 155/174] rewrite to use transform not for_each_n --- src/stream/TRIAD-StdPar.cpp | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/stream/TRIAD-StdPar.cpp b/src/stream/TRIAD-StdPar.cpp index cb0ab58e5..8f66bc20d 100644 --- a/src/stream/TRIAD-StdPar.cpp +++ b/src/stream/TRIAD-StdPar.cpp @@ -30,10 +30,6 @@ void TRIAD::runStdParVariant(VariantID vid, size_t tune_idx) TRIAD_DATA_SETUP; - auto triad_lam = [=](Index_type i) { - TRIAD_BODY; - }; - switch ( vid ) { case Base_StdPar : { @@ -41,11 +37,17 @@ void TRIAD::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#if 0 std::for_each_n( std::execution::par_unseq, counting_iterator(ibegin), iend-ibegin, [=](Index_type i) { TRIAD_BODY; }); +#else + std::transform( std::execution::par_unseq, + &b[ibegin], &b[iend], &c[ibegin], &a[ibegin], + [=](Real_type b, Real_type c) { return b + alpha * c; }); +#endif } stopTimer(); @@ -55,14 +57,30 @@ void TRIAD::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { +#if 0 + auto triad_lam = [=](Index_type i) { + TRIAD_BODY; + }; +#else + auto triad_lam = [=](Real_type b, Real_type c) { + return b + alpha * c; + }; +#endif + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#if 0 std::for_each_n( std::execution::par_unseq, counting_iterator(ibegin), iend-ibegin, [=](Index_type i) { triad_lam(i); }); +#else + std::transform( std::execution::par_unseq, + &b[ibegin], &b[iend], &c[ibegin], &a[ibegin], + triad_lam ); +#endif } stopTimer(); From 00e86904e459eeb1cf4622e15bb9cb612685c623 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Mon, 31 Oct 2022 12:45:14 +0200 Subject: [PATCH 156/174] rewrite to use transform not for_each_n --- src/stream/ADD-StdPar.cpp | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/stream/ADD-StdPar.cpp b/src/stream/ADD-StdPar.cpp index d3ab6de8d..bd128418e 100644 --- a/src/stream/ADD-StdPar.cpp +++ b/src/stream/ADD-StdPar.cpp @@ -30,10 +30,6 @@ void ADD::runStdParVariant(VariantID vid, size_t tune_idx) ADD_DATA_SETUP; - auto add_lam = [=](Index_type i) { - ADD_BODY; - }; - switch ( vid ) { case Base_StdPar : { @@ -41,11 +37,17 @@ void ADD::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#if 0 std::for_each_n( std::execution::par_unseq, counting_iterator(ibegin), iend-ibegin, [=](Index_type i) { ADD_BODY; }); +#else + std::transform( std::execution::par_unseq, + &b[ibegin], &b[iend], &c[ibegin], &a[ibegin], + [=](Real_type b, Real_type c) { return b + c; }); +#endif } stopTimer(); @@ -55,14 +57,30 @@ void ADD::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { +#if 0 + auto add_lam = [=](Index_type i) { + ADD_BODY; + }; +#else + auto add_lam = [=](Real_type b, Real_type c) { + return b + c; + }; +#endif + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#if 0 std::for_each_n( std::execution::par_unseq, counting_iterator(ibegin), iend-ibegin, [=](Index_type i) { add_lam(i); }); +#else + std::transform( std::execution::par_unseq, + &b[ibegin], &b[iend], &c[ibegin], &a[ibegin], + triad_lam ); +#endif } stopTimer(); From 9d8d3e232ba6b93a8f8dda813d4c489ba6b93325 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 3 Nov 2022 04:21:01 -0700 Subject: [PATCH 157/174] explicit seq because cannot find the implicit one for who knows why --- src/polybench/POLYBENCH_3MM-StdPar.cpp | 3 ++- src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/polybench/POLYBENCH_3MM-StdPar.cpp b/src/polybench/POLYBENCH_3MM-StdPar.cpp index 9fa125ffb..5a6eee985 100644 --- a/src/polybench/POLYBENCH_3MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_3MM-StdPar.cpp @@ -44,7 +44,8 @@ void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) std::for_each_n( std::execution::par_unseq, counting_iterator(0), ni, [=](Index_type i) { - std::for_each_n( counting_iterator(0), nj, + std::for_each_n( std::execution::seq, + counting_iterator(0), nj, [=](Index_type j) { #endif POLYBENCH_3MM_BODY1; diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp index a4c76d7fb..bceb1c9f6 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp @@ -47,7 +47,8 @@ void POLYBENCH_FLOYD_WARSHALL::runStdParVariant(VariantID vid, size_t tune_idx) std::for_each_n( std::execution::par_unseq, counting_iterator(0), N, [=](Index_type i) { - std::for_each_n( counting_iterator(0), N, + std::for_each_n( std::execution::seq, + counting_iterator(0), N, [=](Index_type j) { #endif POLYBENCH_FLOYD_WARSHALL_BODY; @@ -83,7 +84,8 @@ void POLYBENCH_FLOYD_WARSHALL::runStdParVariant(VariantID vid, size_t tune_idx) std::for_each_n( std::execution::par_unseq, counting_iterator(0), N, [=](Index_type i) { - std::for_each_n( counting_iterator(0), N, + std::for_each_n( std::execution::seq, + counting_iterator(0), N, [=](Index_type j) { #endif poly_floydwarshall_base_lam(k, i, j); From 00a54982a5d41462ca655b46d517e6ed96438595 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 3 Nov 2022 13:32:38 +0200 Subject: [PATCH 158/174] use transform instead --- src/stream/ADD-StdPar.cpp | 12 ++++++------ src/stream/MUL-StdPar.cpp | 26 ++++++++++++++++++++++---- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/stream/ADD-StdPar.cpp b/src/stream/ADD-StdPar.cpp index bd128418e..0f944f6e7 100644 --- a/src/stream/ADD-StdPar.cpp +++ b/src/stream/ADD-StdPar.cpp @@ -45,8 +45,8 @@ void ADD::runStdParVariant(VariantID vid, size_t tune_idx) }); #else std::transform( std::execution::par_unseq, - &b[ibegin], &b[iend], &c[ibegin], &a[ibegin], - [=](Real_type b, Real_type c) { return b + c; }); + &a[ibegin], &a[iend], &b[ibegin], &c[ibegin], + [=](Real_type a, Real_type b) { return a + b; }); #endif } @@ -62,8 +62,8 @@ void ADD::runStdParVariant(VariantID vid, size_t tune_idx) ADD_BODY; }; #else - auto add_lam = [=](Real_type b, Real_type c) { - return b + c; + auto add_lam = [=](Real_type a, Real_type b) { + return a + b; }; #endif @@ -78,8 +78,8 @@ void ADD::runStdParVariant(VariantID vid, size_t tune_idx) }); #else std::transform( std::execution::par_unseq, - &b[ibegin], &b[iend], &c[ibegin], &a[ibegin], - triad_lam ); + &a[ibegin], &a[iend], &b[ibegin], &c[ibegin], + add_lam ); #endif } stopTimer(); diff --git a/src/stream/MUL-StdPar.cpp b/src/stream/MUL-StdPar.cpp index 399c62dd7..372e78201 100644 --- a/src/stream/MUL-StdPar.cpp +++ b/src/stream/MUL-StdPar.cpp @@ -30,10 +30,6 @@ void MUL::runStdParVariant(VariantID vid, size_t tune_idx) MUL_DATA_SETUP; - auto mul_lam = [=](Index_type i) { - MUL_BODY; - }; - switch ( vid ) { case Base_StdPar : { @@ -41,11 +37,17 @@ void MUL::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#if 0 std::for_each_n( std::execution::par_unseq, counting_iterator(ibegin), iend-ibegin, [=](Index_type i) { MUL_BODY; }); +#else + std::transform( std::execution::par_unseq, + &c[ibegin], &c[iend], &b[ibegin], + [=](Real_type c) { return alpha * c; }); +#endif } stopTimer(); @@ -55,14 +57,30 @@ void MUL::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { +#if 0 + auto mul_lam = [=](Index_type i) { + MUL_BODY; + }; +#else + auto mul_lam = [=](Real_type c) { + return alpha * c; + }; +#endif + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#if 0 std::for_each_n( std::execution::par_unseq, counting_iterator(ibegin), iend-ibegin, [=](Index_type i) { mul_lam(i); }); +#else + std::transform( std::execution::par_unseq, + &c[ibegin], &c[iend], &b[ibegin], + mul_lam ); +#endif } stopTimer(); From 25ca55d1d39c01ead6c9ed40da202ecb28ba9ea8 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Thu, 3 Nov 2022 13:35:57 +0200 Subject: [PATCH 159/174] use transform instead --- src/stream/COPY-StdPar.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/stream/COPY-StdPar.cpp b/src/stream/COPY-StdPar.cpp index bc25a6a64..b2ea5422d 100644 --- a/src/stream/COPY-StdPar.cpp +++ b/src/stream/COPY-StdPar.cpp @@ -37,8 +37,15 @@ void COPY::runStdParVariant(VariantID vid, size_t tune_idx) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#if 0 std::copy( std::execution::par_unseq, &a[ibegin], &a[iend], &c[ibegin]); +#else + std::transform( std::execution::par_unseq, + &a[ibegin], &a[iend], &c[ibegin], + [=](Real_type a) { return a; }); +#endif + } stopTimer(); @@ -47,11 +54,24 @@ void COPY::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { +#if 1 + auto copy_lam = [=](Real_type a) { + return a; + }; +#endif + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#if 0 std::copy( std::execution::par_unseq, &a[ibegin], &a[iend], &c[ibegin]); +#else + std::transform( std::execution::par_unseq, + &a[ibegin], &a[iend], &c[ibegin], + copy_lam ); +#endif + } stopTimer(); From c9f029501e7f00339e15c22d98bf0d4d5e7864e8 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 28 Mar 2023 11:56:17 +0300 Subject: [PATCH 160/174] dunno --- src/basic/INDEXLIST-StdPar.cpp | 32 +++++++++++++++++++++++++++++++- src/lcals/FIRST_MIN-StdPar.cpp | 11 ++++++++++- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/src/basic/INDEXLIST-StdPar.cpp b/src/basic/INDEXLIST-StdPar.cpp index b48bb3c7d..84d558527 100644 --- a/src/basic/INDEXLIST-StdPar.cpp +++ b/src/basic/INDEXLIST-StdPar.cpp @@ -19,7 +19,6 @@ namespace rajaperf namespace basic { - void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { #if defined(RUN_STDPAR) @@ -33,19 +32,50 @@ void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ case Base_StdPar : { + auto counts = std::vector(iend+1,0); + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +#if 0 Index_type count = 0; #warning needs parallel something for (Index_type i = ibegin; i < iend; ++i ) { if ( x[i] < 0.0 ) { list[count++] = i; + y[i] = 1; } } m_len = count; +#else + std::transform_exclusive_scan( //std::execution:seq, + &x[ibegin], &x[iend], + &counts[0], 0, + std::plus{}, + [=](Real_type x){ return (x < 0.0); }); + + std::for_each_n( //std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + if (counts[i] != counts[i+1]) { \ + list[counts[i]] = i; + } + }); + + m_len = counts[iend+1]; +#endif + + if (irep == 0) { + //printf("\n\n%d\n",counts[iend]); + //for (Index_type i = ibegin, j=0; i < iend && j Date: Tue, 28 Mar 2023 12:15:29 +0300 Subject: [PATCH 161/174] fix bad merge --- src/common/KernelBase.hpp | 3 --- src/lcals/FIRST_MIN-StdPar.cpp | 4 +++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/common/KernelBase.hpp b/src/common/KernelBase.hpp index 75b297399..eb00f8672 100644 --- a/src/common/KernelBase.hpp +++ b/src/common/KernelBase.hpp @@ -336,11 +336,8 @@ class KernelBase #if defined(RAJA_ENABLE_TARGET_OPENMP) virtual void runOpenMPTargetVariant(VariantID vid, size_t tune_idx) = 0; #endif -<<<<<<< HEAD virtual void runStdParVariant(VariantID vid, size_t tune_idx) = 0; -======= ->>>>>>> upstream/develop #if defined(RUN_KOKKOS) virtual void runKokkosVariant(VariantID vid, size_t tune_idx) { diff --git a/src/lcals/FIRST_MIN-StdPar.cpp b/src/lcals/FIRST_MIN-StdPar.cpp index 295b2ce6c..2378e6e23 100644 --- a/src/lcals/FIRST_MIN-StdPar.cpp +++ b/src/lcals/FIRST_MIN-StdPar.cpp @@ -42,16 +42,18 @@ void FIRST_MIN::runStdParVariant(VariantID vid, size_t tune_idx) std::min_element( std::execution::par_unseq, &x[ibegin], &x[iend]); auto loc = std::distance(&x[ibegin], result); + m_minloc = std::max(m_minloc, loc); #else + FIRST_MIN_MINLOC_INIT; for (Index_type i = ibegin; i < iend; ++i ) { if ( x[i] < mymin.val ) { mymin.val = x[i]; mymin.loc = i; } } + m_minloc = mymin.loc; #endif - m_minloc = std::max(m_minloc, loc); } stopTimer(); From 834b519a85a6e36111cb69b35a9fd1614e555499 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 28 Mar 2023 13:32:32 +0300 Subject: [PATCH 162/174] add markdown --- README.stdpar | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/README.stdpar b/README.stdpar index 61a388a32..9b1f6ef99 100644 --- a/README.stdpar +++ b/README.stdpar @@ -1,29 +1,38 @@ # GCC -cmake .. -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-volatile -Wno-unused-parameter" -DENABLE_STDPAR=1 && make -j8 +``` +cmake .. -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-volatile -Wno-unused-parameter" -DENABLE_STDPAR=1 && make -j`nproc` +``` # NVC++ ## Patches -$ diff /opt/nvidia/hpc_sdk/Linux_x86_64/22.[35]/compilers/include/nvhpc/algorithm_execution.hpp +``` +$ diff /opt/nvidia/hpc_sdk/Linux_$(uname -m)/${V}/compilers/include/nvhpc/algorithm_execution.hpp 1066c1066 < _ASSERT_RANDOM_ACCESS(_FIt); --- > //_ASSERT_RANDOM_ACCESS(_FIt); +``` -$ diff /opt/nvidia/hpc_sdk/Linux_x86_64/22.[35]/compilers/include/nvhpc/numeric_execution.hpp +``` +$ diff /opt/nvidia/hpc_sdk/Linux_$(uname -m)/${V}/compilers/include/nvhpc/numeric_execution.hpp 386c386 < _ASSERT_RANDOM_ACCESS(_FIt); --- > //_ASSERT_RANDOM_ACCESS(_FIt); +``` ## OpenMP/OpenACC for atomics +``` cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 --diag_suppress=volatile_inc_dec_deprecated -stdpar=multicore -acc=multicore -mp=multicore -tp=haswell" -DENABLE_STDPAR=1 && make -j8 +``` +``` cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 --diag_suppress=volatile_inc_dec_deprecated -stdpar=gpu -tp=haswell -acc" -DENABLE_STDPAR=1 && make -j8 - +``` ## CPU @@ -92,5 +101,6 @@ Lambda_StdPar-default -6.0464819976872759102e+32 6.0464819976872759102e+32 # Intel +``` cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-unused-parameter -Wno-deprecated-volatile -tbb" -DENABLE_STDPAR=1 && make -j8 - +``` From 74c9fda5882b71cda40c0e830bd975a632958492 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 28 Mar 2023 13:53:18 +0300 Subject: [PATCH 163/174] this was WIP - restore serial fallback --- src/basic/INDEXLIST-StdPar.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/basic/INDEXLIST-StdPar.cpp b/src/basic/INDEXLIST-StdPar.cpp index 84d558527..39f1a6dff 100644 --- a/src/basic/INDEXLIST-StdPar.cpp +++ b/src/basic/INDEXLIST-StdPar.cpp @@ -37,14 +37,14 @@ void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { -#if 0 +#if 1 Index_type count = 0; #warning needs parallel something for (Index_type i = ibegin; i < iend; ++i ) { if ( x[i] < 0.0 ) { list[count++] = i; - y[i] = 1; + //y[i] = 1; } } From 16b9f09baf4b7cb164833db33ed9daa00c68609e Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 28 Mar 2023 15:57:36 +0300 Subject: [PATCH 164/174] give up on std::atomic for now --- src/basic/PI_ATOMIC-StdPar.cpp | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/basic/PI_ATOMIC-StdPar.cpp b/src/basic/PI_ATOMIC-StdPar.cpp index 06eace9a2..17e9b8fbf 100644 --- a/src/basic/PI_ATOMIC-StdPar.cpp +++ b/src/basic/PI_ATOMIC-StdPar.cpp @@ -10,10 +10,15 @@ #include "RAJA/RAJA.hpp" +#ifndef _OPENMP +#error Currently, OpenMP atomics are required here. +#endif + #if defined(__NVCOMPILER_CUDA__) || defined(_NVHPC_STDPAR_CUDA) #include typedef cuda::std::atomic myAtomic; #else +// .fetch_add() for double is not available yet... #include typedef std::atomic myAtomic; #endif @@ -46,15 +51,17 @@ void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) for (RepIndex_type irep = 0; irep < run_reps; ++irep) { //myAtomic a_pi{m_pi_init}; - myAtomic * a_pi = new myAtomic; // i hate this - *a_pi = m_pi_init; + *pi = m_pi_init; std::for_each_n( std::execution::par, counting_iterator(ibegin), iend-ibegin, - [=](Index_type i) { + [=](Index_type i) { double x = (double(i) + 0.5) * dx; - *a_pi = *a_pi + dx / (1.0 + x * x); + _Pragma("omp atomic") + *pi += dx / (1.0 + x * x); + //a_pi.fetch_add(dx / (1.0 + x * x)); }); - *pi = *a_pi * 4.0; + //*pi = a_pi * 4.0; + *pi *= 4.0; } stopTimer(); @@ -64,23 +71,22 @@ void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) case Lambda_StdPar : { - auto piatomic_base_lam = [=](Index_type i, myAtomic * a_pi) { + auto piatomic_base_lam = [=](Index_type i) { double x = (double(i) + 0.5) * dx; - *a_pi = *a_pi + dx / (1.0 + x * x); + _Pragma("omp atomic") + *pi += dx / (1.0 + x * x); }; startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - //myAtomic a_pi{m_pi_init}; - myAtomic * a_pi = new myAtomic; // i hate this - *a_pi = m_pi_init; + *pi = m_pi_init; std::for_each_n( std::execution::par, counting_iterator(ibegin), iend-ibegin, [=](Index_type i) { - piatomic_base_lam(i,a_pi); + piatomic_base_lam(i); }); - *pi = *a_pi * 4.0; + *pi *= 4.0; } stopTimer(); From 043bb5a6e9f90b941b1648bc3db996a1719625e0 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 31 Mar 2023 10:26:19 +0300 Subject: [PATCH 165/174] add RAJA_ENABLE_STDPAR guard --- src/stream/ADD-StdPar.cpp | 5 +++++ src/stream/COPY-StdPar.cpp | 5 +++++ src/stream/DOT-StdPar.cpp | 5 +++++ src/stream/MUL-StdPar.cpp | 5 +++++ src/stream/TRIAD-StdPar.cpp | 5 +++++ 5 files changed, 25 insertions(+) diff --git a/src/stream/ADD-StdPar.cpp b/src/stream/ADD-StdPar.cpp index 0f944f6e7..5e2302869 100644 --- a/src/stream/ADD-StdPar.cpp +++ b/src/stream/ADD-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -98,3 +100,6 @@ void ADD::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace stream } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/stream/COPY-StdPar.cpp b/src/stream/COPY-StdPar.cpp index b2ea5422d..2998e34e8 100644 --- a/src/stream/COPY-StdPar.cpp +++ b/src/stream/COPY-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -89,3 +91,6 @@ void COPY::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace stream } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/stream/DOT-StdPar.cpp b/src/stream/DOT-StdPar.cpp index 23d031d91..0d291b1f2 100644 --- a/src/stream/DOT-StdPar.cpp +++ b/src/stream/DOT-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -89,3 +91,6 @@ void DOT::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace stream } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/stream/MUL-StdPar.cpp b/src/stream/MUL-StdPar.cpp index 372e78201..f2a5bca36 100644 --- a/src/stream/MUL-StdPar.cpp +++ b/src/stream/MUL-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -98,3 +100,6 @@ void MUL::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace stream } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/stream/TRIAD-StdPar.cpp b/src/stream/TRIAD-StdPar.cpp index 8f66bc20d..9de2f7bd3 100644 --- a/src/stream/TRIAD-StdPar.cpp +++ b/src/stream/TRIAD-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -98,3 +100,6 @@ void TRIAD::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace stream } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + From 4c40ab0a41cbc167665b9cea11499c04c75d58b9 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 31 Mar 2023 10:31:02 +0300 Subject: [PATCH 166/174] add RAJA_ENABLE_STDPAR guard --- src/basic/DAXPY-StdPar.cpp | 5 +++++ src/basic/DAXPY_ATOMIC-StdPar.cpp | 5 +++++ src/basic/IF_QUAD-StdPar.cpp | 5 +++++ src/basic/INDEXLIST-StdPar.cpp | 5 +++++ src/basic/INDEXLIST_3LOOP-StdPar.cpp | 5 +++++ src/basic/INIT3-StdPar.cpp | 5 +++++ src/basic/INIT_VIEW1D-StdPar.cpp | 5 +++++ src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp | 5 +++++ src/basic/MAT_MAT_SHARED-StdPar.cpp | 8 ++++++++ src/basic/MULADDSUB-StdPar.cpp | 5 +++++ src/basic/NESTED_INIT-StdPar.cpp | 5 +++++ src/basic/PI_ATOMIC-StdPar.cpp | 5 +++++ src/basic/PI_REDUCE-StdPar.cpp | 5 +++++ src/basic/REDUCE3_INT-StdPar.cpp | 5 +++++ src/basic/REDUCE_STRUCT-StdPar.cpp | 5 +++++ src/basic/TRAP_INT-StdPar.cpp | 5 +++++ 16 files changed, 83 insertions(+) diff --git a/src/basic/DAXPY-StdPar.cpp b/src/basic/DAXPY-StdPar.cpp index 5f255b6e5..9ec6e6671 100644 --- a/src/basic/DAXPY-StdPar.cpp +++ b/src/basic/DAXPY-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -80,3 +82,6 @@ void DAXPY::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/basic/DAXPY_ATOMIC-StdPar.cpp b/src/basic/DAXPY_ATOMIC-StdPar.cpp index c952ec895..dac93568e 100644 --- a/src/basic/DAXPY_ATOMIC-StdPar.cpp +++ b/src/basic/DAXPY_ATOMIC-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -113,3 +115,6 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu } // end namespace basic } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/basic/IF_QUAD-StdPar.cpp b/src/basic/IF_QUAD-StdPar.cpp index 9cb80cd47..9646d06c1 100644 --- a/src/basic/IF_QUAD-StdPar.cpp +++ b/src/basic/IF_QUAD-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -80,3 +82,6 @@ void IF_QUAD::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/basic/INDEXLIST-StdPar.cpp b/src/basic/INDEXLIST-StdPar.cpp index 39f1a6dff..9ca905903 100644 --- a/src/basic/INDEXLIST-StdPar.cpp +++ b/src/basic/INDEXLIST-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -118,3 +120,6 @@ void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ } // end namespace basic } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/basic/INDEXLIST_3LOOP-StdPar.cpp b/src/basic/INDEXLIST_3LOOP-StdPar.cpp index 86237a894..bfece60bd 100644 --- a/src/basic/INDEXLIST_3LOOP-StdPar.cpp +++ b/src/basic/INDEXLIST_3LOOP-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -141,3 +143,6 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG } // end namespace basic } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/basic/INIT3-StdPar.cpp b/src/basic/INIT3-StdPar.cpp index 9db2bfa9c..1aa20bb6a 100644 --- a/src/basic/INIT3-StdPar.cpp +++ b/src/basic/INIT3-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -80,3 +82,6 @@ void INIT3::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/basic/INIT_VIEW1D-StdPar.cpp b/src/basic/INIT_VIEW1D-StdPar.cpp index 4f6655e50..d4928f98c 100644 --- a/src/basic/INIT_VIEW1D-StdPar.cpp +++ b/src/basic/INIT_VIEW1D-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -80,3 +82,6 @@ void INIT_VIEW1D::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp index d5f93350a..e5f2a1f06 100644 --- a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -80,3 +82,6 @@ void INIT_VIEW1D_OFFSET::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/basic/MAT_MAT_SHARED-StdPar.cpp b/src/basic/MAT_MAT_SHARED-StdPar.cpp index 36671c8f0..7ac34d7c6 100644 --- a/src/basic/MAT_MAT_SHARED-StdPar.cpp +++ b/src/basic/MAT_MAT_SHARED-StdPar.cpp @@ -8,6 +8,11 @@ #include "MAT_MAT_SHARED.hpp" +#include "RAJA/RAJA.hpp" + +#if defined(RAJA_ENABLE_STDPAR) + + #include namespace rajaperf { @@ -26,3 +31,6 @@ void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( } // end namespace basic } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/basic/MULADDSUB-StdPar.cpp b/src/basic/MULADDSUB-StdPar.cpp index 42f558c1a..7b9d104c0 100644 --- a/src/basic/MULADDSUB-StdPar.cpp +++ b/src/basic/MULADDSUB-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -80,3 +82,6 @@ void MULADDSUB::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/basic/NESTED_INIT-StdPar.cpp b/src/basic/NESTED_INIT-StdPar.cpp index 7ae128554..ff19d5a5f 100644 --- a/src/basic/NESTED_INIT-StdPar.cpp +++ b/src/basic/NESTED_INIT-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -108,3 +110,6 @@ void NESTED_INIT::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/basic/PI_ATOMIC-StdPar.cpp b/src/basic/PI_ATOMIC-StdPar.cpp index 17e9b8fbf..b862fa3b3 100644 --- a/src/basic/PI_ATOMIC-StdPar.cpp +++ b/src/basic/PI_ATOMIC-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #ifndef _OPENMP #error Currently, OpenMP atomics are required here. #endif @@ -105,3 +107,6 @@ void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/basic/PI_REDUCE-StdPar.cpp b/src/basic/PI_REDUCE-StdPar.cpp index a3fc51531..0cf1e0f56 100644 --- a/src/basic/PI_REDUCE-StdPar.cpp +++ b/src/basic/PI_REDUCE-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -91,3 +93,6 @@ void PI_REDUCE::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/basic/REDUCE3_INT-StdPar.cpp b/src/basic/REDUCE3_INT-StdPar.cpp index c1de02c72..0ae81396c 100644 --- a/src/basic/REDUCE3_INT-StdPar.cpp +++ b/src/basic/REDUCE3_INT-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include #include "common/StdParUtils.hpp" @@ -110,3 +112,6 @@ void REDUCE3_INT::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/basic/REDUCE_STRUCT-StdPar.cpp b/src/basic/REDUCE_STRUCT-StdPar.cpp index 6dea32346..3cb32c8d9 100644 --- a/src/basic/REDUCE_STRUCT-StdPar.cpp +++ b/src/basic/REDUCE_STRUCT-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include #include "common/StdParUtils.hpp" @@ -100,3 +102,6 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t } // end namespace basic } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/basic/TRAP_INT-StdPar.cpp b/src/basic/TRAP_INT-StdPar.cpp index e1e9b4cd0..c8d3725f4 100644 --- a/src/basic/TRAP_INT-StdPar.cpp +++ b/src/basic/TRAP_INT-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -105,3 +107,6 @@ void TRAP_INT::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + From 75a6eb63fe4c190834331ff3dc65d65101120ac1 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 31 Mar 2023 10:33:08 +0300 Subject: [PATCH 167/174] add RAJA_ENABLE_STDPAR guard --- src/apps/CONVECTION3DPA-StdPar.cpp | 5 +++++ src/apps/DEL_DOT_VEC_2D-StdPar.cpp | 5 +++++ src/apps/DIFFUSION3DPA-StdPar.cpp | 5 +++++ src/apps/ENERGY-StdPar.cpp | 5 +++++ src/apps/FIR-StdPar.cpp | 5 +++++ src/apps/HALOEXCHANGE-StdPar.cpp | 5 +++++ src/apps/HALOEXCHANGE_FUSED-StdPar.cpp | 5 +++++ src/apps/LTIMES-StdPar.cpp | 5 +++++ src/apps/LTIMES_NOVIEW-StdPar.cpp | 5 +++++ src/apps/MASS3DPA-StdPar.cpp | 5 +++++ src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp | 5 +++++ src/apps/PRESSURE-StdPar.cpp | 5 +++++ src/apps/VOL3D-StdPar.cpp | 5 +++++ 13 files changed, 65 insertions(+) diff --git a/src/apps/CONVECTION3DPA-StdPar.cpp b/src/apps/CONVECTION3DPA-StdPar.cpp index 9405b81f8..068c37b89 100644 --- a/src/apps/CONVECTION3DPA-StdPar.cpp +++ b/src/apps/CONVECTION3DPA-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -120,3 +122,6 @@ void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( } // end namespace apps } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp index e10ea9eff..2467501e7 100644 --- a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp +++ b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include "AppsData.hpp" @@ -93,3 +95,6 @@ void DEL_DOT_VEC_2D::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace apps } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/apps/DIFFUSION3DPA-StdPar.cpp b/src/apps/DIFFUSION3DPA-StdPar.cpp index 2b4cf5797..caa5e537d 100644 --- a/src/apps/DIFFUSION3DPA-StdPar.cpp +++ b/src/apps/DIFFUSION3DPA-StdPar.cpp @@ -11,6 +11,8 @@ #include "DIFFUSION3DPA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "RAJA/RAJA.hpp" #include "common/StdParUtils.hpp" @@ -130,3 +132,6 @@ void DIFFUSION3DPA::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace apps } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/apps/ENERGY-StdPar.cpp b/src/apps/ENERGY-StdPar.cpp index 4c2c15456..293aa5763 100644 --- a/src/apps/ENERGY-StdPar.cpp +++ b/src/apps/ENERGY-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -156,3 +158,6 @@ void ENERGY::runStdParVariant(VariantID vid, size_t tune_idx) } // iend-ibegin namespace apps } // iend-ibegin namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/apps/FIR-StdPar.cpp b/src/apps/FIR-StdPar.cpp index a7d688df7..5131548f1 100644 --- a/src/apps/FIR-StdPar.cpp +++ b/src/apps/FIR-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -88,3 +90,6 @@ void FIR::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace apps } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/apps/HALOEXCHANGE-StdPar.cpp b/src/apps/HALOEXCHANGE-StdPar.cpp index 322bf376d..f431069a2 100644 --- a/src/apps/HALOEXCHANGE-StdPar.cpp +++ b/src/apps/HALOEXCHANGE-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -131,3 +133,6 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) } // iend-ibegin namespace apps } // iend-ibegin namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp index db96fb3b2..ad2ad7af9 100644 --- a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp +++ b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -172,3 +174,6 @@ void HALOEXCHANGE_FUSED::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace apps } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/apps/LTIMES-StdPar.cpp b/src/apps/LTIMES-StdPar.cpp index ce37f3bf1..a8a6aa428 100644 --- a/src/apps/LTIMES-StdPar.cpp +++ b/src/apps/LTIMES-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -106,3 +108,6 @@ void LTIMES::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace apps } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/apps/LTIMES_NOVIEW-StdPar.cpp b/src/apps/LTIMES_NOVIEW-StdPar.cpp index 7d6102d60..d8e6e8d7c 100644 --- a/src/apps/LTIMES_NOVIEW-StdPar.cpp +++ b/src/apps/LTIMES_NOVIEW-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -106,3 +108,6 @@ void LTIMES_NOVIEW::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace apps } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/apps/MASS3DPA-StdPar.cpp b/src/apps/MASS3DPA-StdPar.cpp index 1ca1e6d6e..779cd0700 100644 --- a/src/apps/MASS3DPA-StdPar.cpp +++ b/src/apps/MASS3DPA-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -108,3 +110,6 @@ void MASS3DPA::runStdParVariant(VariantID vid, size_t tune_idx) { } // end namespace apps } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp b/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp index 1be53f986..3f7ffe740 100644 --- a/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp +++ b/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include "AppsData.hpp" @@ -84,3 +86,6 @@ void NODAL_ACCUMULATION_3D::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUS } // end namespace apps } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/apps/PRESSURE-StdPar.cpp b/src/apps/PRESSURE-StdPar.cpp index 637db4904..b0fe0f147 100644 --- a/src/apps/PRESSURE-StdPar.cpp +++ b/src/apps/PRESSURE-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -96,3 +98,6 @@ void PRESSURE::runStdParVariant(VariantID vid, size_t tune_idx) } // iend-ibegin namespace apps } // iend-ibegin namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/apps/VOL3D-StdPar.cpp b/src/apps/VOL3D-StdPar.cpp index 1aa9dfdd0..1b30edb79 100644 --- a/src/apps/VOL3D-StdPar.cpp +++ b/src/apps/VOL3D-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "AppsData.hpp" #include "common/StdParUtils.hpp" @@ -89,3 +91,6 @@ void VOL3D::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace apps } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + From 6770f1173a5045bbea9ffb5fcbc84140b236202b Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 31 Mar 2023 10:33:46 +0300 Subject: [PATCH 168/174] add RAJA_ENABLE_STDPAR guard --- src/algorithm/MEMCPY-StdPar.cpp | 5 +++++ src/algorithm/MEMSET-StdPar.cpp | 5 +++++ src/algorithm/REDUCE_SUM-StdPar.cpp | 5 +++++ src/algorithm/SCAN-StdPar.cpp | 5 +++++ src/algorithm/SORT-StdPar.cpp | 5 +++++ src/algorithm/SORTPAIRS-StdPar.cpp | 5 +++++ 6 files changed, 30 insertions(+) diff --git a/src/algorithm/MEMCPY-StdPar.cpp b/src/algorithm/MEMCPY-StdPar.cpp index 4ee637961..061c15ff1 100644 --- a/src/algorithm/MEMCPY-StdPar.cpp +++ b/src/algorithm/MEMCPY-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -147,3 +149,6 @@ void MEMCPY::setStdParTuningDefinitions(VariantID vid) } // end namespace algorithm } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/algorithm/MEMSET-StdPar.cpp b/src/algorithm/MEMSET-StdPar.cpp index 3b4c4edab..75116cf46 100644 --- a/src/algorithm/MEMSET-StdPar.cpp +++ b/src/algorithm/MEMSET-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -147,3 +149,6 @@ void MEMSET::setStdParTuningDefinitions(VariantID vid) } // end namespace algorithm } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/algorithm/REDUCE_SUM-StdPar.cpp b/src/algorithm/REDUCE_SUM-StdPar.cpp index c2605250a..73f1e94a6 100644 --- a/src/algorithm/REDUCE_SUM-StdPar.cpp +++ b/src/algorithm/REDUCE_SUM-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -87,3 +89,6 @@ void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune } // end namespace algorithm } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/algorithm/SCAN-StdPar.cpp b/src/algorithm/SCAN-StdPar.cpp index 0c99ae9d9..113fc09fb 100644 --- a/src/algorithm/SCAN-StdPar.cpp +++ b/src/algorithm/SCAN-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -62,3 +64,6 @@ void SCAN::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) } // end namespace algorithm } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/algorithm/SORT-StdPar.cpp b/src/algorithm/SORT-StdPar.cpp index 5a6fd384c..c3a20a355 100644 --- a/src/algorithm/SORT-StdPar.cpp +++ b/src/algorithm/SORT-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -56,3 +58,6 @@ void SORT::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace algorithm } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp index bc9318486..3572544cf 100644 --- a/src/algorithm/SORTPAIRS-StdPar.cpp +++ b/src/algorithm/SORTPAIRS-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -94,3 +96,6 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace algorithm } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + From 8402b34d93295c5b79afa24b8a9e6182bd54cd50 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 31 Mar 2023 10:34:32 +0300 Subject: [PATCH 169/174] add RAJA_ENABLE_STDPAR guard --- src/lcals/DIFF_PREDICT-StdPar.cpp | 5 +++++ src/lcals/EOS-StdPar.cpp | 5 +++++ src/lcals/FIRST_DIFF-StdPar.cpp | 5 +++++ src/lcals/FIRST_MIN-StdPar.cpp | 5 +++++ src/lcals/FIRST_SUM-StdPar.cpp | 5 +++++ src/lcals/GEN_LIN_RECUR-StdPar.cpp | 5 +++++ src/lcals/HYDRO_1D-StdPar.cpp | 5 +++++ src/lcals/HYDRO_2D-StdPar.cpp | 5 +++++ src/lcals/INT_PREDICT-StdPar.cpp | 5 +++++ src/lcals/PLANCKIAN-StdPar.cpp | 5 +++++ src/lcals/TRIDIAG_ELIM-StdPar.cpp | 5 +++++ 11 files changed, 55 insertions(+) diff --git a/src/lcals/DIFF_PREDICT-StdPar.cpp b/src/lcals/DIFF_PREDICT-StdPar.cpp index fd2d786ce..df3eafdd0 100644 --- a/src/lcals/DIFF_PREDICT-StdPar.cpp +++ b/src/lcals/DIFF_PREDICT-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -81,3 +83,6 @@ void DIFF_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/lcals/EOS-StdPar.cpp b/src/lcals/EOS-StdPar.cpp index 21484dc6e..ac7f43ed8 100644 --- a/src/lcals/EOS-StdPar.cpp +++ b/src/lcals/EOS-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -81,3 +83,6 @@ void EOS::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/lcals/FIRST_DIFF-StdPar.cpp b/src/lcals/FIRST_DIFF-StdPar.cpp index ca3c851dd..cd4242d36 100644 --- a/src/lcals/FIRST_DIFF-StdPar.cpp +++ b/src/lcals/FIRST_DIFF-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -81,3 +83,6 @@ void FIRST_DIFF::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/lcals/FIRST_MIN-StdPar.cpp b/src/lcals/FIRST_MIN-StdPar.cpp index 2378e6e23..460ba7b3c 100644 --- a/src/lcals/FIRST_MIN-StdPar.cpp +++ b/src/lcals/FIRST_MIN-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -98,3 +100,6 @@ void FIRST_MIN::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/lcals/FIRST_SUM-StdPar.cpp b/src/lcals/FIRST_SUM-StdPar.cpp index 7f99509de..9593c359a 100644 --- a/src/lcals/FIRST_SUM-StdPar.cpp +++ b/src/lcals/FIRST_SUM-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -81,3 +83,6 @@ void FIRST_SUM::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/lcals/GEN_LIN_RECUR-StdPar.cpp b/src/lcals/GEN_LIN_RECUR-StdPar.cpp index 991b01b26..ec0514145 100644 --- a/src/lcals/GEN_LIN_RECUR-StdPar.cpp +++ b/src/lcals/GEN_LIN_RECUR-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -94,3 +96,6 @@ void GEN_LIN_RECUR::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/lcals/HYDRO_1D-StdPar.cpp b/src/lcals/HYDRO_1D-StdPar.cpp index cdd2a43bc..08e925599 100644 --- a/src/lcals/HYDRO_1D-StdPar.cpp +++ b/src/lcals/HYDRO_1D-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -81,3 +83,6 @@ void HYDRO_1D::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/lcals/HYDRO_2D-StdPar.cpp b/src/lcals/HYDRO_2D-StdPar.cpp index 0fc0a48e7..e1477c7a3 100644 --- a/src/lcals/HYDRO_2D-StdPar.cpp +++ b/src/lcals/HYDRO_2D-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -204,3 +206,6 @@ void HYDRO_2D::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/lcals/INT_PREDICT-StdPar.cpp b/src/lcals/INT_PREDICT-StdPar.cpp index 095662ac6..e44306b95 100644 --- a/src/lcals/INT_PREDICT-StdPar.cpp +++ b/src/lcals/INT_PREDICT-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -81,3 +83,6 @@ void INT_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/lcals/PLANCKIAN-StdPar.cpp b/src/lcals/PLANCKIAN-StdPar.cpp index 1a278765d..1250419e4 100644 --- a/src/lcals/PLANCKIAN-StdPar.cpp +++ b/src/lcals/PLANCKIAN-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -82,3 +84,6 @@ void PLANCKIAN::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/lcals/TRIDIAG_ELIM-StdPar.cpp b/src/lcals/TRIDIAG_ELIM-StdPar.cpp index 08c3e13e9..a171c50a4 100644 --- a/src/lcals/TRIDIAG_ELIM-StdPar.cpp +++ b/src/lcals/TRIDIAG_ELIM-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -83,3 +85,6 @@ void TRIDIAG_ELIM::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + From d4db911aec15ab9561f2a0dc8a4cf1294c96746a Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 31 Mar 2023 10:36:46 +0300 Subject: [PATCH 170/174] N namespace oops --- src/polybench/POLYBENCH_ATAX-StdPar.cpp | 9 +++++++-- src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp | 9 +++++++-- src/polybench/POLYBENCH_GEMVER-StdPar.cpp | 9 +++++++-- src/polybench/POLYBENCH_GESUMMV-StdPar.cpp | 9 +++++++-- 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/polybench/POLYBENCH_ATAX-StdPar.cpp b/src/polybench/POLYBENCH_ATAX-StdPar.cpp index 8aa00c5a0..63295a7cc 100644 --- a/src/polybench/POLYBENCH_ATAX-StdPar.cpp +++ b/src/polybench/POLYBENCH_ATAX-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -121,5 +123,8 @@ void POLYBENCH_ATAX::runStdParVariant(VariantID vid, size_t tune_idx) #endif } -} // N namespace polybench -} // N namespace rajaperf +} // end namespace polybench +} // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp index bceb1c9f6..d4f41d487 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -110,5 +112,8 @@ void POLYBENCH_FLOYD_WARSHALL::runStdParVariant(VariantID vid, size_t tune_idx) #endif } -} // N namespace polybench -} // N namespace rajaperf +} // end namespace polybench +} // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp index 218016d5e..6ac291ab8 100644 --- a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -159,5 +161,8 @@ void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx) #endif } -} // n namespace basic -} // n namespace rajaperf +} // end namespace basic +} // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp index b46420597..02e3f0930 100644 --- a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp +++ b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -91,5 +93,8 @@ void POLYBENCH_GESUMMV::runStdParVariant(VariantID vid, size_t tune_idx) #endif } -} // N namespace polybench -} // N namespace rajaperf +} // end namespace polybench +} // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + From 75b16b9a34f215e913784c9273fc14b6f5b5a4ff Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 31 Mar 2023 10:36:54 +0300 Subject: [PATCH 171/174] add RAJA_ENABLE_STDPAR guard --- src/polybench/POLYBENCH_2MM-StdPar.cpp | 5 +++++ src/polybench/POLYBENCH_3MM-StdPar.cpp | 5 +++++ src/polybench/POLYBENCH_ADI-StdPar.cpp | 5 +++++ src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp | 5 +++++ src/polybench/POLYBENCH_GEMM-StdPar.cpp | 5 +++++ src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp | 5 +++++ src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp | 5 +++++ src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp | 5 +++++ src/polybench/POLYBENCH_MVT-StdPar.cpp | 5 +++++ 9 files changed, 45 insertions(+) diff --git a/src/polybench/POLYBENCH_2MM-StdPar.cpp b/src/polybench/POLYBENCH_2MM-StdPar.cpp index 8c4a85741..a9175089e 100644 --- a/src/polybench/POLYBENCH_2MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_2MM-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -197,3 +199,6 @@ void POLYBENCH_2MM::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace polybench } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/polybench/POLYBENCH_3MM-StdPar.cpp b/src/polybench/POLYBENCH_3MM-StdPar.cpp index 5a6eee985..a972f4d28 100644 --- a/src/polybench/POLYBENCH_3MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_3MM-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -243,3 +245,6 @@ void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace polybench } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/polybench/POLYBENCH_ADI-StdPar.cpp b/src/polybench/POLYBENCH_ADI-StdPar.cpp index b8a75b305..f50f2b19d 100644 --- a/src/polybench/POLYBENCH_ADI-StdPar.cpp +++ b/src/polybench/POLYBENCH_ADI-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -147,3 +149,6 @@ void POLYBENCH_ADI::runStdParVariant(VariantID vid, size_t tune_idx) } // n-2 namespace polybench } // n-2 namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp index 64b2ae0d2..618f4b8bc 100644 --- a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -156,3 +158,6 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace polybench } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/polybench/POLYBENCH_GEMM-StdPar.cpp b/src/polybench/POLYBENCH_GEMM-StdPar.cpp index 4c4b9ab8a..c826b5e6c 100644 --- a/src/polybench/POLYBENCH_GEMM-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMM-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -126,3 +128,6 @@ void POLYBENCH_GEMM::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace polybench } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp index aae0c085b..d83daaa00 100644 --- a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -191,3 +193,6 @@ void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace polybench } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp index fa63d259f..629d16bb4 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -103,3 +105,6 @@ void POLYBENCH_JACOBI_1D::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace polybench } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp index 36ccba5af..51bcc1a99 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" #include @@ -162,3 +164,6 @@ void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace polybench } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + diff --git a/src/polybench/POLYBENCH_MVT-StdPar.cpp b/src/polybench/POLYBENCH_MVT-StdPar.cpp index d94b105dd..2e94dbbb7 100644 --- a/src/polybench/POLYBENCH_MVT-StdPar.cpp +++ b/src/polybench/POLYBENCH_MVT-StdPar.cpp @@ -10,6 +10,8 @@ #include "RAJA/RAJA.hpp" +#if defined(RAJA_ENABLE_STDPAR) + #include "common/StdParUtils.hpp" namespace rajaperf @@ -125,3 +127,6 @@ void POLYBENCH_MVT::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace polybench } // end namespace rajaperf + +#endif // RAJA_ENABLE_STDPAR + From 23d04fe16adeec2f5db0bef8d41f832c09802fad Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 31 Mar 2023 10:37:39 +0300 Subject: [PATCH 172/174] end namespace oops --- src/polybench/POLYBENCH_ADI-StdPar.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/polybench/POLYBENCH_ADI-StdPar.cpp b/src/polybench/POLYBENCH_ADI-StdPar.cpp index f50f2b19d..a73b0b5f0 100644 --- a/src/polybench/POLYBENCH_ADI-StdPar.cpp +++ b/src/polybench/POLYBENCH_ADI-StdPar.cpp @@ -147,8 +147,8 @@ void POLYBENCH_ADI::runStdParVariant(VariantID vid, size_t tune_idx) #endif } -} // n-2 namespace polybench -} // n-2 namespace rajaperf +} // end namespace polybench +} // end namespace rajaperf #endif // RAJA_ENABLE_STDPAR From bbe72dd1936a00a8c88b87f973e42f53406be829 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 31 Mar 2023 10:48:47 +0300 Subject: [PATCH 173/174] change guard name to BUILD_STDPAR --- CMakeLists.txt | 1 + src/algorithm/MEMCPY-StdPar.cpp | 4 ++-- src/algorithm/MEMSET-StdPar.cpp | 4 ++-- src/algorithm/REDUCE_SUM-StdPar.cpp | 4 ++-- src/algorithm/SCAN-StdPar.cpp | 4 ++-- src/algorithm/SORT-StdPar.cpp | 4 ++-- src/algorithm/SORTPAIRS-StdPar.cpp | 4 ++-- src/apps/CONVECTION3DPA-StdPar.cpp | 4 ++-- src/apps/DEL_DOT_VEC_2D-StdPar.cpp | 4 ++-- src/apps/DIFFUSION3DPA-StdPar.cpp | 4 ++-- src/apps/ENERGY-StdPar.cpp | 4 ++-- src/apps/FIR-StdPar.cpp | 4 ++-- src/apps/HALOEXCHANGE-StdPar.cpp | 4 ++-- src/apps/HALOEXCHANGE_FUSED-StdPar.cpp | 4 ++-- src/apps/LTIMES-StdPar.cpp | 4 ++-- src/apps/LTIMES_NOVIEW-StdPar.cpp | 4 ++-- src/apps/MASS3DPA-StdPar.cpp | 4 ++-- src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp | 4 ++-- src/apps/PRESSURE-StdPar.cpp | 4 ++-- src/apps/VOL3D-StdPar.cpp | 4 ++-- src/basic/DAXPY-StdPar.cpp | 4 ++-- src/basic/DAXPY_ATOMIC-StdPar.cpp | 4 ++-- src/basic/IF_QUAD-StdPar.cpp | 4 ++-- src/basic/INDEXLIST-StdPar.cpp | 4 ++-- src/basic/INDEXLIST_3LOOP-StdPar.cpp | 4 ++-- src/basic/INIT3-StdPar.cpp | 4 ++-- src/basic/INIT_VIEW1D-StdPar.cpp | 4 ++-- src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp | 4 ++-- src/basic/MAT_MAT_SHARED-StdPar.cpp | 4 ++-- src/basic/MULADDSUB-StdPar.cpp | 4 ++-- src/basic/NESTED_INIT-StdPar.cpp | 4 ++-- src/basic/PI_ATOMIC-StdPar.cpp | 4 ++-- src/basic/PI_REDUCE-StdPar.cpp | 4 ++-- src/basic/REDUCE3_INT-StdPar.cpp | 4 ++-- src/basic/REDUCE_STRUCT-StdPar.cpp | 4 ++-- src/basic/TRAP_INT-StdPar.cpp | 4 ++-- src/lcals/DIFF_PREDICT-StdPar.cpp | 4 ++-- src/lcals/EOS-StdPar.cpp | 4 ++-- src/lcals/FIRST_DIFF-StdPar.cpp | 4 ++-- src/lcals/FIRST_MIN-StdPar.cpp | 4 ++-- src/lcals/FIRST_SUM-StdPar.cpp | 4 ++-- src/lcals/GEN_LIN_RECUR-StdPar.cpp | 4 ++-- src/lcals/HYDRO_1D-StdPar.cpp | 4 ++-- src/lcals/HYDRO_2D-StdPar.cpp | 4 ++-- src/lcals/INT_PREDICT-StdPar.cpp | 4 ++-- src/lcals/PLANCKIAN-StdPar.cpp | 4 ++-- src/lcals/TRIDIAG_ELIM-StdPar.cpp | 4 ++-- src/polybench/POLYBENCH_2MM-StdPar.cpp | 4 ++-- src/polybench/POLYBENCH_3MM-StdPar.cpp | 4 ++-- src/polybench/POLYBENCH_ADI-StdPar.cpp | 4 ++-- src/polybench/POLYBENCH_ATAX-StdPar.cpp | 4 ++-- src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp | 4 ++-- src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp | 4 ++-- src/polybench/POLYBENCH_GEMM-StdPar.cpp | 4 ++-- src/polybench/POLYBENCH_GEMVER-StdPar.cpp | 4 ++-- src/polybench/POLYBENCH_GESUMMV-StdPar.cpp | 4 ++-- src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp | 4 ++-- src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp | 4 ++-- src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp | 4 ++-- src/polybench/POLYBENCH_MVT-StdPar.cpp | 4 ++-- src/stream/ADD-StdPar.cpp | 6 ++++-- src/stream/COPY-StdPar.cpp | 4 ++-- src/stream/DOT-StdPar.cpp | 4 ++-- src/stream/MUL-StdPar.cpp | 4 ++-- src/stream/TRIAD-StdPar.cpp | 4 ++-- 65 files changed, 131 insertions(+), 128 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3bcde1969..cda39d3ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,6 +33,7 @@ if (ENABLE_KOKKOS) elseif (ENABLE_STDPAR) set(CMAKE_CXX_STANDARD 20) set(BLT_CXX_STD c++14) + add_definitions(-DBUILD_STDPAR) else() set(CMAKE_CXX_STANDARD 14) set(BLT_CXX_STD c++14) diff --git a/src/algorithm/MEMCPY-StdPar.cpp b/src/algorithm/MEMCPY-StdPar.cpp index 061c15ff1..4d36f161b 100644 --- a/src/algorithm/MEMCPY-StdPar.cpp +++ b/src/algorithm/MEMCPY-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -150,5 +150,5 @@ void MEMCPY::setStdParTuningDefinitions(VariantID vid) } // end namespace algorithm } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/algorithm/MEMSET-StdPar.cpp b/src/algorithm/MEMSET-StdPar.cpp index 75116cf46..e6903ec3b 100644 --- a/src/algorithm/MEMSET-StdPar.cpp +++ b/src/algorithm/MEMSET-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -150,5 +150,5 @@ void MEMSET::setStdParTuningDefinitions(VariantID vid) } // end namespace algorithm } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/algorithm/REDUCE_SUM-StdPar.cpp b/src/algorithm/REDUCE_SUM-StdPar.cpp index 73f1e94a6..c35a6657a 100644 --- a/src/algorithm/REDUCE_SUM-StdPar.cpp +++ b/src/algorithm/REDUCE_SUM-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -90,5 +90,5 @@ void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune } // end namespace algorithm } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/algorithm/SCAN-StdPar.cpp b/src/algorithm/SCAN-StdPar.cpp index 113fc09fb..510f6e181 100644 --- a/src/algorithm/SCAN-StdPar.cpp +++ b/src/algorithm/SCAN-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -65,5 +65,5 @@ void SCAN::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) } // end namespace algorithm } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/algorithm/SORT-StdPar.cpp b/src/algorithm/SORT-StdPar.cpp index c3a20a355..2f45b62ab 100644 --- a/src/algorithm/SORT-StdPar.cpp +++ b/src/algorithm/SORT-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -59,5 +59,5 @@ void SORT::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace algorithm } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp index 3572544cf..0a75f028a 100644 --- a/src/algorithm/SORTPAIRS-StdPar.cpp +++ b/src/algorithm/SORTPAIRS-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -97,5 +97,5 @@ void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace algorithm } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/apps/CONVECTION3DPA-StdPar.cpp b/src/apps/CONVECTION3DPA-StdPar.cpp index 068c37b89..2b36d2dc3 100644 --- a/src/apps/CONVECTION3DPA-StdPar.cpp +++ b/src/apps/CONVECTION3DPA-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -123,5 +123,5 @@ void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( } // end namespace apps } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp index 2467501e7..bbe987735 100644 --- a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp +++ b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -96,5 +96,5 @@ void DEL_DOT_VEC_2D::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace apps } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/apps/DIFFUSION3DPA-StdPar.cpp b/src/apps/DIFFUSION3DPA-StdPar.cpp index caa5e537d..a05a4370a 100644 --- a/src/apps/DIFFUSION3DPA-StdPar.cpp +++ b/src/apps/DIFFUSION3DPA-StdPar.cpp @@ -11,7 +11,7 @@ #include "DIFFUSION3DPA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "RAJA/RAJA.hpp" @@ -133,5 +133,5 @@ void DIFFUSION3DPA::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace apps } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/apps/ENERGY-StdPar.cpp b/src/apps/ENERGY-StdPar.cpp index 293aa5763..6d797f8ed 100644 --- a/src/apps/ENERGY-StdPar.cpp +++ b/src/apps/ENERGY-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -159,5 +159,5 @@ void ENERGY::runStdParVariant(VariantID vid, size_t tune_idx) } // iend-ibegin namespace apps } // iend-ibegin namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/apps/FIR-StdPar.cpp b/src/apps/FIR-StdPar.cpp index 5131548f1..2e70b8a38 100644 --- a/src/apps/FIR-StdPar.cpp +++ b/src/apps/FIR-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -91,5 +91,5 @@ void FIR::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace apps } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/apps/HALOEXCHANGE-StdPar.cpp b/src/apps/HALOEXCHANGE-StdPar.cpp index f431069a2..6f549bd03 100644 --- a/src/apps/HALOEXCHANGE-StdPar.cpp +++ b/src/apps/HALOEXCHANGE-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -134,5 +134,5 @@ void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) } // iend-ibegin namespace apps } // iend-ibegin namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp index ad2ad7af9..47c531e50 100644 --- a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp +++ b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -175,5 +175,5 @@ void HALOEXCHANGE_FUSED::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace apps } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/apps/LTIMES-StdPar.cpp b/src/apps/LTIMES-StdPar.cpp index a8a6aa428..3ccd3c987 100644 --- a/src/apps/LTIMES-StdPar.cpp +++ b/src/apps/LTIMES-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -109,5 +109,5 @@ void LTIMES::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace apps } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/apps/LTIMES_NOVIEW-StdPar.cpp b/src/apps/LTIMES_NOVIEW-StdPar.cpp index d8e6e8d7c..37087f20d 100644 --- a/src/apps/LTIMES_NOVIEW-StdPar.cpp +++ b/src/apps/LTIMES_NOVIEW-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -109,5 +109,5 @@ void LTIMES_NOVIEW::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace apps } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/apps/MASS3DPA-StdPar.cpp b/src/apps/MASS3DPA-StdPar.cpp index 779cd0700..8e18cd50b 100644 --- a/src/apps/MASS3DPA-StdPar.cpp +++ b/src/apps/MASS3DPA-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -111,5 +111,5 @@ void MASS3DPA::runStdParVariant(VariantID vid, size_t tune_idx) { } // end namespace apps } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp b/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp index 3f7ffe740..81f5ef3d1 100644 --- a/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp +++ b/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -87,5 +87,5 @@ void NODAL_ACCUMULATION_3D::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUS } // end namespace apps } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/apps/PRESSURE-StdPar.cpp b/src/apps/PRESSURE-StdPar.cpp index b0fe0f147..551c8c730 100644 --- a/src/apps/PRESSURE-StdPar.cpp +++ b/src/apps/PRESSURE-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -99,5 +99,5 @@ void PRESSURE::runStdParVariant(VariantID vid, size_t tune_idx) } // iend-ibegin namespace apps } // iend-ibegin namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/apps/VOL3D-StdPar.cpp b/src/apps/VOL3D-StdPar.cpp index 1b30edb79..087ebd577 100644 --- a/src/apps/VOL3D-StdPar.cpp +++ b/src/apps/VOL3D-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "AppsData.hpp" @@ -92,5 +92,5 @@ void VOL3D::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace apps } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/basic/DAXPY-StdPar.cpp b/src/basic/DAXPY-StdPar.cpp index 9ec6e6671..6ee417e53 100644 --- a/src/basic/DAXPY-StdPar.cpp +++ b/src/basic/DAXPY-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -83,5 +83,5 @@ void DAXPY::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/basic/DAXPY_ATOMIC-StdPar.cpp b/src/basic/DAXPY_ATOMIC-StdPar.cpp index dac93568e..6ad5f90a7 100644 --- a/src/basic/DAXPY_ATOMIC-StdPar.cpp +++ b/src/basic/DAXPY_ATOMIC-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -116,5 +116,5 @@ void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu } // end namespace basic } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/basic/IF_QUAD-StdPar.cpp b/src/basic/IF_QUAD-StdPar.cpp index 9646d06c1..c36a7fcaa 100644 --- a/src/basic/IF_QUAD-StdPar.cpp +++ b/src/basic/IF_QUAD-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -83,5 +83,5 @@ void IF_QUAD::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/basic/INDEXLIST-StdPar.cpp b/src/basic/INDEXLIST-StdPar.cpp index 9ca905903..2da1c38b9 100644 --- a/src/basic/INDEXLIST-StdPar.cpp +++ b/src/basic/INDEXLIST-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -121,5 +121,5 @@ void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ } // end namespace basic } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/basic/INDEXLIST_3LOOP-StdPar.cpp b/src/basic/INDEXLIST_3LOOP-StdPar.cpp index bfece60bd..770632cd2 100644 --- a/src/basic/INDEXLIST_3LOOP-StdPar.cpp +++ b/src/basic/INDEXLIST_3LOOP-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -144,5 +144,5 @@ void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG } // end namespace basic } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/basic/INIT3-StdPar.cpp b/src/basic/INIT3-StdPar.cpp index 1aa20bb6a..1817a1ee1 100644 --- a/src/basic/INIT3-StdPar.cpp +++ b/src/basic/INIT3-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -83,5 +83,5 @@ void INIT3::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/basic/INIT_VIEW1D-StdPar.cpp b/src/basic/INIT_VIEW1D-StdPar.cpp index d4928f98c..1bbfce7f0 100644 --- a/src/basic/INIT_VIEW1D-StdPar.cpp +++ b/src/basic/INIT_VIEW1D-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -83,5 +83,5 @@ void INIT_VIEW1D::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp index e5f2a1f06..e841874a3 100644 --- a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -83,5 +83,5 @@ void INIT_VIEW1D_OFFSET::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/basic/MAT_MAT_SHARED-StdPar.cpp b/src/basic/MAT_MAT_SHARED-StdPar.cpp index 7ac34d7c6..b7814c1ae 100644 --- a/src/basic/MAT_MAT_SHARED-StdPar.cpp +++ b/src/basic/MAT_MAT_SHARED-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include @@ -32,5 +32,5 @@ void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( } // end namespace basic } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/basic/MULADDSUB-StdPar.cpp b/src/basic/MULADDSUB-StdPar.cpp index 7b9d104c0..7b71fb648 100644 --- a/src/basic/MULADDSUB-StdPar.cpp +++ b/src/basic/MULADDSUB-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -83,5 +83,5 @@ void MULADDSUB::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/basic/NESTED_INIT-StdPar.cpp b/src/basic/NESTED_INIT-StdPar.cpp index ff19d5a5f..575c6e9e3 100644 --- a/src/basic/NESTED_INIT-StdPar.cpp +++ b/src/basic/NESTED_INIT-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -111,5 +111,5 @@ void NESTED_INIT::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/basic/PI_ATOMIC-StdPar.cpp b/src/basic/PI_ATOMIC-StdPar.cpp index b862fa3b3..f33bc369b 100644 --- a/src/basic/PI_ATOMIC-StdPar.cpp +++ b/src/basic/PI_ATOMIC-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #ifndef _OPENMP #error Currently, OpenMP atomics are required here. @@ -108,5 +108,5 @@ void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/basic/PI_REDUCE-StdPar.cpp b/src/basic/PI_REDUCE-StdPar.cpp index 0cf1e0f56..cd466a225 100644 --- a/src/basic/PI_REDUCE-StdPar.cpp +++ b/src/basic/PI_REDUCE-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -94,5 +94,5 @@ void PI_REDUCE::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/basic/REDUCE3_INT-StdPar.cpp b/src/basic/REDUCE3_INT-StdPar.cpp index 0ae81396c..ac4abcf0d 100644 --- a/src/basic/REDUCE3_INT-StdPar.cpp +++ b/src/basic/REDUCE3_INT-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include #include "common/StdParUtils.hpp" @@ -113,5 +113,5 @@ void REDUCE3_INT::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/basic/REDUCE_STRUCT-StdPar.cpp b/src/basic/REDUCE_STRUCT-StdPar.cpp index 3cb32c8d9..e82cc98ee 100644 --- a/src/basic/REDUCE_STRUCT-StdPar.cpp +++ b/src/basic/REDUCE_STRUCT-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include #include "common/StdParUtils.hpp" @@ -103,5 +103,5 @@ void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t } // end namespace basic } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/basic/TRAP_INT-StdPar.cpp b/src/basic/TRAP_INT-StdPar.cpp index c8d3725f4..c2f22206b 100644 --- a/src/basic/TRAP_INT-StdPar.cpp +++ b/src/basic/TRAP_INT-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -108,5 +108,5 @@ void TRAP_INT::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/lcals/DIFF_PREDICT-StdPar.cpp b/src/lcals/DIFF_PREDICT-StdPar.cpp index df3eafdd0..873703bcf 100644 --- a/src/lcals/DIFF_PREDICT-StdPar.cpp +++ b/src/lcals/DIFF_PREDICT-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -84,5 +84,5 @@ void DIFF_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/lcals/EOS-StdPar.cpp b/src/lcals/EOS-StdPar.cpp index ac7f43ed8..eb74b434f 100644 --- a/src/lcals/EOS-StdPar.cpp +++ b/src/lcals/EOS-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -84,5 +84,5 @@ void EOS::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/lcals/FIRST_DIFF-StdPar.cpp b/src/lcals/FIRST_DIFF-StdPar.cpp index cd4242d36..9bf083a19 100644 --- a/src/lcals/FIRST_DIFF-StdPar.cpp +++ b/src/lcals/FIRST_DIFF-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -84,5 +84,5 @@ void FIRST_DIFF::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/lcals/FIRST_MIN-StdPar.cpp b/src/lcals/FIRST_MIN-StdPar.cpp index 460ba7b3c..0f9067ea7 100644 --- a/src/lcals/FIRST_MIN-StdPar.cpp +++ b/src/lcals/FIRST_MIN-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -101,5 +101,5 @@ void FIRST_MIN::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/lcals/FIRST_SUM-StdPar.cpp b/src/lcals/FIRST_SUM-StdPar.cpp index 9593c359a..8c2881b02 100644 --- a/src/lcals/FIRST_SUM-StdPar.cpp +++ b/src/lcals/FIRST_SUM-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -84,5 +84,5 @@ void FIRST_SUM::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/lcals/GEN_LIN_RECUR-StdPar.cpp b/src/lcals/GEN_LIN_RECUR-StdPar.cpp index ec0514145..bcf188c70 100644 --- a/src/lcals/GEN_LIN_RECUR-StdPar.cpp +++ b/src/lcals/GEN_LIN_RECUR-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -97,5 +97,5 @@ void GEN_LIN_RECUR::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/lcals/HYDRO_1D-StdPar.cpp b/src/lcals/HYDRO_1D-StdPar.cpp index 08e925599..c458fdf64 100644 --- a/src/lcals/HYDRO_1D-StdPar.cpp +++ b/src/lcals/HYDRO_1D-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -84,5 +84,5 @@ void HYDRO_1D::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/lcals/HYDRO_2D-StdPar.cpp b/src/lcals/HYDRO_2D-StdPar.cpp index e1477c7a3..da131a2b6 100644 --- a/src/lcals/HYDRO_2D-StdPar.cpp +++ b/src/lcals/HYDRO_2D-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -207,5 +207,5 @@ void HYDRO_2D::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/lcals/INT_PREDICT-StdPar.cpp b/src/lcals/INT_PREDICT-StdPar.cpp index e44306b95..aeecccddc 100644 --- a/src/lcals/INT_PREDICT-StdPar.cpp +++ b/src/lcals/INT_PREDICT-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -84,5 +84,5 @@ void INT_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/lcals/PLANCKIAN-StdPar.cpp b/src/lcals/PLANCKIAN-StdPar.cpp index 1250419e4..cb55f5869 100644 --- a/src/lcals/PLANCKIAN-StdPar.cpp +++ b/src/lcals/PLANCKIAN-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -85,5 +85,5 @@ void PLANCKIAN::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/lcals/TRIDIAG_ELIM-StdPar.cpp b/src/lcals/TRIDIAG_ELIM-StdPar.cpp index a171c50a4..9ad42dd9c 100644 --- a/src/lcals/TRIDIAG_ELIM-StdPar.cpp +++ b/src/lcals/TRIDIAG_ELIM-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -86,5 +86,5 @@ void TRIDIAG_ELIM::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace lcals } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/polybench/POLYBENCH_2MM-StdPar.cpp b/src/polybench/POLYBENCH_2MM-StdPar.cpp index a9175089e..ed89ff4fe 100644 --- a/src/polybench/POLYBENCH_2MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_2MM-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -200,5 +200,5 @@ void POLYBENCH_2MM::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace polybench } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/polybench/POLYBENCH_3MM-StdPar.cpp b/src/polybench/POLYBENCH_3MM-StdPar.cpp index a972f4d28..36ead6be7 100644 --- a/src/polybench/POLYBENCH_3MM-StdPar.cpp +++ b/src/polybench/POLYBENCH_3MM-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -246,5 +246,5 @@ void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace polybench } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/polybench/POLYBENCH_ADI-StdPar.cpp b/src/polybench/POLYBENCH_ADI-StdPar.cpp index a73b0b5f0..7ea88960a 100644 --- a/src/polybench/POLYBENCH_ADI-StdPar.cpp +++ b/src/polybench/POLYBENCH_ADI-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -150,5 +150,5 @@ void POLYBENCH_ADI::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace polybench } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/polybench/POLYBENCH_ATAX-StdPar.cpp b/src/polybench/POLYBENCH_ATAX-StdPar.cpp index 63295a7cc..88866ca63 100644 --- a/src/polybench/POLYBENCH_ATAX-StdPar.cpp +++ b/src/polybench/POLYBENCH_ATAX-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -126,5 +126,5 @@ void POLYBENCH_ATAX::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace polybench } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp index 618f4b8bc..64c50c34a 100644 --- a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -159,5 +159,5 @@ void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace polybench } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp index d4f41d487..b17f9f9f4 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -115,5 +115,5 @@ void POLYBENCH_FLOYD_WARSHALL::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace polybench } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/polybench/POLYBENCH_GEMM-StdPar.cpp b/src/polybench/POLYBENCH_GEMM-StdPar.cpp index c826b5e6c..1c1687471 100644 --- a/src/polybench/POLYBENCH_GEMM-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMM-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -129,5 +129,5 @@ void POLYBENCH_GEMM::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace polybench } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp index 6ac291ab8..32d715002 100644 --- a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp +++ b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -164,5 +164,5 @@ void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace basic } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp index 02e3f0930..23afa5f2b 100644 --- a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp +++ b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -96,5 +96,5 @@ void POLYBENCH_GESUMMV::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace polybench } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp index d83daaa00..1b70e2441 100644 --- a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -194,5 +194,5 @@ void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace polybench } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp index 629d16bb4..ba3a65f9d 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -106,5 +106,5 @@ void POLYBENCH_JACOBI_1D::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace polybench } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp index 51bcc1a99..948113937 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -165,5 +165,5 @@ void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace polybench } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/polybench/POLYBENCH_MVT-StdPar.cpp b/src/polybench/POLYBENCH_MVT-StdPar.cpp index 2e94dbbb7..27867a184 100644 --- a/src/polybench/POLYBENCH_MVT-StdPar.cpp +++ b/src/polybench/POLYBENCH_MVT-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -128,5 +128,5 @@ void POLYBENCH_MVT::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace polybench } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/stream/ADD-StdPar.cpp b/src/stream/ADD-StdPar.cpp index 5e2302869..318e47b22 100644 --- a/src/stream/ADD-StdPar.cpp +++ b/src/stream/ADD-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -101,5 +101,7 @@ void ADD::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace stream } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#else +#error JEFF +#endif // BUILD_STDPAR diff --git a/src/stream/COPY-StdPar.cpp b/src/stream/COPY-StdPar.cpp index 2998e34e8..488350a56 100644 --- a/src/stream/COPY-StdPar.cpp +++ b/src/stream/COPY-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -92,5 +92,5 @@ void COPY::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace stream } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/stream/DOT-StdPar.cpp b/src/stream/DOT-StdPar.cpp index 0d291b1f2..b00a9c5a7 100644 --- a/src/stream/DOT-StdPar.cpp +++ b/src/stream/DOT-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -92,5 +92,5 @@ void DOT::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace stream } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/stream/MUL-StdPar.cpp b/src/stream/MUL-StdPar.cpp index f2a5bca36..731cee15d 100644 --- a/src/stream/MUL-StdPar.cpp +++ b/src/stream/MUL-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -101,5 +101,5 @@ void MUL::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace stream } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR diff --git a/src/stream/TRIAD-StdPar.cpp b/src/stream/TRIAD-StdPar.cpp index 9de2f7bd3..93d08a2dd 100644 --- a/src/stream/TRIAD-StdPar.cpp +++ b/src/stream/TRIAD-StdPar.cpp @@ -10,7 +10,7 @@ #include "RAJA/RAJA.hpp" -#if defined(RAJA_ENABLE_STDPAR) +#if defined(BUILD_STDPAR) #include "common/StdParUtils.hpp" @@ -101,5 +101,5 @@ void TRIAD::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace stream } // end namespace rajaperf -#endif // RAJA_ENABLE_STDPAR +#endif // BUILD_STDPAR From 38bf9bd2a4cf28d33238c061610d89242bb0e5a7 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 31 Mar 2023 19:47:58 +0300 Subject: [PATCH 174/174] Update ADD-StdPar.cpp Remove debug print preprocessing --- src/stream/ADD-StdPar.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/stream/ADD-StdPar.cpp b/src/stream/ADD-StdPar.cpp index 318e47b22..968a24f58 100644 --- a/src/stream/ADD-StdPar.cpp +++ b/src/stream/ADD-StdPar.cpp @@ -101,7 +101,5 @@ void ADD::runStdParVariant(VariantID vid, size_t tune_idx) } // end namespace stream } // end namespace rajaperf -#else -#error JEFF #endif // BUILD_STDPAR