diff --git a/CMakeLists.txt b/CMakeLists.txt index f34947517..cda39d3ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,6 +30,10 @@ endif() if (ENABLE_KOKKOS) set(CMAKE_CXX_STANDARD 17) set(BLT_CXX_STD c++17) +elseif (ENABLE_STDPAR) + set(CMAKE_CXX_STANDARD 20) + set(BLT_CXX_STD c++14) + add_definitions(-DBUILD_STDPAR) else() set(CMAKE_CXX_STANDARD 14) set(BLT_CXX_STD c++14) @@ -94,6 +98,9 @@ endif () if (ENABLE_OPENMP) add_definitions(-DRUN_OPENMP) endif () +if (ENABLE_STDPAR) + add_definitions(-DRUN_STDPAR) +endif () set(RAJA_PERFSUITE_VERSION_MAJOR 2022) set(RAJA_PERFSUITE_VERSION_MINOR 10) diff --git a/README.stdpar b/README.stdpar new file mode 100644 index 000000000..9b1f6ef99 --- /dev/null +++ b/README.stdpar @@ -0,0 +1,106 @@ +# GCC + +``` +cmake .. -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-volatile -Wno-unused-parameter" -DENABLE_STDPAR=1 && make -j`nproc` +``` + +# NVC++ + +## Patches + +``` +$ diff /opt/nvidia/hpc_sdk/Linux_$(uname -m)/${V}/compilers/include/nvhpc/algorithm_execution.hpp +1066c1066 +< _ASSERT_RANDOM_ACCESS(_FIt); +--- +> //_ASSERT_RANDOM_ACCESS(_FIt); +``` + +``` +$ diff /opt/nvidia/hpc_sdk/Linux_$(uname -m)/${V}/compilers/include/nvhpc/numeric_execution.hpp +386c386 +< _ASSERT_RANDOM_ACCESS(_FIt); +--- +> //_ASSERT_RANDOM_ACCESS(_FIt); +``` + +## OpenMP/OpenACC for atomics + +``` +cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 --diag_suppress=volatile_inc_dec_deprecated -stdpar=multicore -acc=multicore -mp=multicore -tp=haswell" -DENABLE_STDPAR=1 && make -j8 +``` + +``` +cmake .. -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++ -DCMAKE_CXX_FLAGS="-std=c++20 --diag_suppress=volatile_inc_dec_deprecated -stdpar=gpu -tp=haswell -acc" -DENABLE_STDPAR=1 && make -j8 +``` + +## CPU + +Just disable the lambda one I guess... + +------------------------------------------------------- +Basic_MAT_MAT_SHARED +........................................................ +Base_StdPar-default 1136.6199452543779141 0.0000000000000000000 +Lambda_StdPar-default -nan -nan + +Probably just not atomic... + +------------------------------------------------------- +Basic_PI_ATOMIC +........................................................ +Base_StdPar-default 0.55899274342205662602 2.5825999101679185666 +Lambda_StdPar-default 3.1415926535899751926 0.0000000000000000000 + +Check these to make sure no stupid float<->double stuff happening. + +------------------------------------------------------- +Polybench_GEMVER +........................................................ +Base_Seq-default 16695345.016927006001 0.0000000000000000000 +Lambda_Seq-default 16695345.016927005882 1.1914380593225359917e-10 +RAJA_Seq-default 16695345.016927006608 -6.0663296608254313469e-10 +Base_StdPar-default 16695345.016927005745 2.5647750589996576309e-10 +Lambda_StdPar-default 16695345.016927006608 -6.0663296608254313469e-10 + +------------------------------------------------------- +Polybench_MVT +........................................................ +Base_Seq-default 6821556.1519041797419 0.0000000000000000000 +Lambda_Seq-default 6821556.1519041797419 0.0000000000000000000 +RAJA_Seq-default 6821556.1519041792999 4.4201442506164312363e-10 +Base_StdPar-default 6821556.1519041792999 4.4201442506164312363e-10 +Lambda_StdPar-default 6821556.1519041792999 4.4201442506164312363e-10 + +------------------------------------------------------- +Stream_DOT +........................................................ +Base_Seq-default 39999973.379841431975 0.0000000000000000000 +Lambda_Seq-default 39999973.379841439426 -7.4505805969238281250e-09 +RAJA_Seq-default 39999973.379841662943 -2.3096799850463867188e-07 +Base_StdPar-default 39999973.379841439426 -7.4505805969238281250e-09 +Lambda_StdPar-default 39999973.379841439426 -7.4505805969238281250e-09 + +------------------------------------------------------- +Algorithm_REDUCE_SUM +........................................................ +RAJA_Seq-default 268294.10758353886195 1.5483237802982330322e-08 + +## GPU + +Lambda_Seq has the bug too so just disable the Lambda versions... + +------------------------------------------------------- +Basic_MAT_MAT_SHARED +........................................................ +Base_Seq-default 1136.6199452543779141 0.0000000000000000000 +Lambda_Seq-default -6.0464819976872759102e+32 6.0464819976872759102e+32 +RAJA_Seq-default 1136.6199452543779141 0.0000000000000000000 +Base_StdPar-default 1136.6199452543779141 0.0000000000000000000 +Lambda_StdPar-default -6.0464819976872759102e+32 6.0464819976872759102e+32 + +# Intel + +``` +cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DCMAKE_CXX_FLAGS="-std=c++20 -Wno-unused-parameter -Wno-deprecated-volatile -tbb" -DENABLE_STDPAR=1 && make -j8 +``` diff --git a/src/algorithm/CMakeLists.txt b/src/algorithm/CMakeLists.txt index 54334242e..232a1635d 100644 --- a/src/algorithm/CMakeLists.txt +++ b/src/algorithm/CMakeLists.txt @@ -10,34 +10,40 @@ blt_add_library( NAME algorithm SOURCES SCAN.cpp SCAN-Seq.cpp + SCAN-StdPar.cpp SCAN-Hip.cpp SCAN-Cuda.cpp SCAN-OMP.cpp SCAN-OMPTarget.cpp SORT.cpp SORT-Seq.cpp + SORT-StdPar.cpp SORT-Hip.cpp SORT-Cuda.cpp SORT-OMP.cpp SORTPAIRS.cpp SORTPAIRS-Seq.cpp + SORTPAIRS-StdPar.cpp SORTPAIRS-Hip.cpp SORTPAIRS-Cuda.cpp SORTPAIRS-OMP.cpp REDUCE_SUM.cpp REDUCE_SUM-Seq.cpp + REDUCE_SUM-StdPar.cpp REDUCE_SUM-Hip.cpp REDUCE_SUM-Cuda.cpp REDUCE_SUM-OMP.cpp REDUCE_SUM-OMPTarget.cpp MEMSET.cpp MEMSET-Seq.cpp + MEMSET-StdPar.cpp MEMSET-Hip.cpp MEMSET-Cuda.cpp MEMSET-OMP.cpp MEMSET-OMPTarget.cpp MEMCPY.cpp MEMCPY-Seq.cpp + MEMCPY-StdPar.cpp MEMCPY-Hip.cpp MEMCPY-Cuda.cpp MEMCPY-OMP.cpp diff --git a/src/algorithm/MEMCPY-StdPar.cpp b/src/algorithm/MEMCPY-StdPar.cpp new file mode 100644 index 000000000..4d36f161b --- /dev/null +++ b/src/algorithm/MEMCPY-StdPar.cpp @@ -0,0 +1,154 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MEMCPY.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void MEMCPY::runStdParVariantLibrary(VariantID vid) +{ +#if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMCPY_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::copy_n(std::execution::par_unseq, + x+ibegin, iend-ibegin, y+ibegin); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n MEMCPY : Unknown variant id = " << vid << std::endl; + } + + } +#endif +} + +void MEMCPY::runStdParVariantDefault(VariantID vid) +{ +#if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMCPY_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + MEMCPY_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto memcpy_lambda = [=](Index_type i) { + MEMCPY_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + memcpy_lambda(i); + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n MEMCPY : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +void MEMCPY::runStdParVariant(VariantID vid, size_t tune_idx) +{ + size_t t = 0; + + if (vid == Base_StdPar) { + + if (tune_idx == t) { + + runStdParVariantLibrary(vid); + + } + + t += 1; + + } + + if (tune_idx == t) { + + runStdParVariantDefault(vid); + + } + + t += 1; +} + +void MEMCPY::setStdParTuningDefinitions(VariantID vid) +{ + if (vid == Base_StdPar) { + addVariantTuningName(vid, "library"); + } + + addVariantTuningName(vid, "default"); +} + +} // end namespace algorithm +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/algorithm/MEMCPY.cpp b/src/algorithm/MEMCPY.cpp index 583a19dea..9e4017054 100644 --- a/src/algorithm/MEMCPY.cpp +++ b/src/algorithm/MEMCPY.cpp @@ -51,6 +51,9 @@ MEMCPY::MEMCPY(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } MEMCPY::~MEMCPY() diff --git a/src/algorithm/MEMCPY.hpp b/src/algorithm/MEMCPY.hpp index 9fa46ae9e..b35f4faaa 100644 --- a/src/algorithm/MEMCPY.hpp +++ b/src/algorithm/MEMCPY.hpp @@ -54,12 +54,16 @@ class MEMCPY : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setSeqTuningDefinitions(VariantID vid); + void setStdParTuningDefinitions(VariantID vid); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); void runSeqVariantDefault(VariantID vid); void runSeqVariantLibrary(VariantID vid); + void runStdParVariantDefault(VariantID vid); + void runStdParVariantLibrary(VariantID vid); template < size_t block_size > void runCudaVariantBlock(VariantID vid); diff --git a/src/algorithm/MEMSET-StdPar.cpp b/src/algorithm/MEMSET-StdPar.cpp new file mode 100644 index 000000000..e6903ec3b --- /dev/null +++ b/src/algorithm/MEMSET-StdPar.cpp @@ -0,0 +1,154 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MEMSET.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void MEMSET::runStdParVariantLibrary(VariantID vid) +{ +#if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMSET_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::fill_n(std::execution::par_unseq, + x+ibegin, iend-ibegin, val); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n MEMSET : Unknown variant id = " << vid << std::endl; + } + + } +#endif +} + +void MEMSET::runStdParVariantDefault(VariantID vid) +{ +#if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MEMSET_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + MEMSET_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto memset_lambda = [=](Index_type i) { + MEMSET_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + memset_lambda(i); + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n MEMSET : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +void MEMSET::runStdParVariant(VariantID vid, size_t tune_idx) +{ + size_t t = 0; + + if (vid == Base_StdPar) { + + if (tune_idx == t) { + + runStdParVariantLibrary(vid); + + } + + t += 1; + + } + + if (tune_idx == t) { + + runStdParVariantDefault(vid); + + } + + t += 1; +} + +void MEMSET::setStdParTuningDefinitions(VariantID vid) +{ + if (vid == Base_StdPar) { + addVariantTuningName(vid, "library"); + } + + addVariantTuningName(vid, "default"); +} + +} // end namespace algorithm +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/algorithm/MEMSET.cpp b/src/algorithm/MEMSET.cpp index fdc98b3fe..332fa0100 100644 --- a/src/algorithm/MEMSET.cpp +++ b/src/algorithm/MEMSET.cpp @@ -52,6 +52,9 @@ MEMSET::MEMSET(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } MEMSET::~MEMSET() diff --git a/src/algorithm/MEMSET.hpp b/src/algorithm/MEMSET.hpp index ebf2f867b..01687b35c 100644 --- a/src/algorithm/MEMSET.hpp +++ b/src/algorithm/MEMSET.hpp @@ -54,12 +54,16 @@ class MEMSET : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setSeqTuningDefinitions(VariantID vid); + void setStdParTuningDefinitions(VariantID vid); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); void runSeqVariantDefault(VariantID vid); void runSeqVariantLibrary(VariantID vid); + void runStdParVariantDefault(VariantID vid); + void runStdParVariantLibrary(VariantID vid); template < size_t block_size > void runCudaVariantBlock(VariantID vid); diff --git a/src/algorithm/REDUCE_SUM-StdPar.cpp b/src/algorithm/REDUCE_SUM-StdPar.cpp new file mode 100644 index 000000000..c35a6657a --- /dev/null +++ b/src/algorithm/REDUCE_SUM-StdPar.cpp @@ -0,0 +1,94 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "REDUCE_SUM.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void REDUCE_SUM::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ +#if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + REDUCE_SUM_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type sum = m_sum_init; + + sum += std::reduce( std::execution::par_unseq, + x+ibegin, x+iend, + Real_type(0), std::plus() ); + + m_sum = sum; + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto reduce_sum_base_lam = [=](Index_type i) { + return x[i]; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type sum = m_sum_init; + + sum += std::transform_reduce( std::execution::par_unseq, + begin, end, + Real_type(0), std::plus(), reduce_sum_base_lam); + + m_sum = sum; + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n REDUCE_SUM : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace algorithm +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/algorithm/REDUCE_SUM.cpp b/src/algorithm/REDUCE_SUM.cpp index f2f2b25d2..f72fd5005 100644 --- a/src/algorithm/REDUCE_SUM.cpp +++ b/src/algorithm/REDUCE_SUM.cpp @@ -51,6 +51,9 @@ REDUCE_SUM::REDUCE_SUM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + //setVariantDefined( Lambda_StdPar ); // exists but is not interesting } REDUCE_SUM::~REDUCE_SUM() diff --git a/src/algorithm/REDUCE_SUM.hpp b/src/algorithm/REDUCE_SUM.hpp index ba9e9308b..247c3efa6 100644 --- a/src/algorithm/REDUCE_SUM.hpp +++ b/src/algorithm/REDUCE_SUM.hpp @@ -58,6 +58,7 @@ class REDUCE_SUM : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/algorithm/SCAN-StdPar.cpp b/src/algorithm/SCAN-StdPar.cpp new file mode 100644 index 000000000..510f6e181 --- /dev/null +++ b/src/algorithm/SCAN-StdPar.cpp @@ -0,0 +1,69 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "SCAN.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void SCAN::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ +#if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + SCAN_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::exclusive_scan( +#ifdef NVCXX_GPU_ENABLED +// GPU implementation is wrong + std::execution::seq, +#else + std::execution::par_unseq, +#endif + x+ibegin, x+iend, y, (Real_type)0 ); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n SCAN : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace algorithm +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/algorithm/SCAN.cpp b/src/algorithm/SCAN.cpp index 7b2933084..e5fcc9a62 100644 --- a/src/algorithm/SCAN.cpp +++ b/src/algorithm/SCAN.cpp @@ -55,6 +55,8 @@ SCAN::SCAN(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); } SCAN::~SCAN() diff --git a/src/algorithm/SCAN.hpp b/src/algorithm/SCAN.hpp index 519789a55..51cc13325 100644 --- a/src/algorithm/SCAN.hpp +++ b/src/algorithm/SCAN.hpp @@ -61,6 +61,7 @@ class SCAN : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); private: static const size_t default_gpu_block_size = 0; diff --git a/src/algorithm/SORT-StdPar.cpp b/src/algorithm/SORT-StdPar.cpp new file mode 100644 index 000000000..2f45b62ab --- /dev/null +++ b/src/algorithm/SORT-StdPar.cpp @@ -0,0 +1,63 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "SORT.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void SORT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + SORT_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::sort( std::execution::par_unseq, + STD_SORT_ARGS); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n SORT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace algorithm +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/algorithm/SORT.cpp b/src/algorithm/SORT.cpp index 049c03304..44828f3ad 100644 --- a/src/algorithm/SORT.cpp +++ b/src/algorithm/SORT.cpp @@ -41,6 +41,8 @@ SORT::SORT(const RunParams& params) setVariantDefined( RAJA_CUDA ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); } SORT::~SORT() diff --git a/src/algorithm/SORT.hpp b/src/algorithm/SORT.hpp index b51bf12f9..6ca3d877a 100644 --- a/src/algorithm/SORT.hpp +++ b/src/algorithm/SORT.hpp @@ -54,6 +54,7 @@ class SORT : public KernelBase { getCout() << "\n SORT : Unknown OMP Target variant id = " << vid << std::endl; } + void runStdParVariant(VariantID vid, size_t tune_idx); private: static const size_t default_gpu_block_size = 0; diff --git a/src/algorithm/SORTPAIRS-StdPar.cpp b/src/algorithm/SORTPAIRS-StdPar.cpp new file mode 100644 index 000000000..0a75f028a --- /dev/null +++ b/src/algorithm/SORTPAIRS-StdPar.cpp @@ -0,0 +1,101 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "SORTPAIRS.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include +#include +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void SORTPAIRS::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + SORTPAIRS_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + using pair_type = std::pair; + + std::vector vector_of_pairs; + +#if 0 + vector_of_pairs.reserve(iend-ibegin); + + std::for_each_n( //std::execution::par, // parallelism leads to incorrectness + counting_iterator(ibegin), iend-ibegin, + [=,&vector_of_pairs](Index_type iemp) noexcept { + vector_of_pairs.emplace_back(x[iend*irep + iemp], i[iend*irep + iemp]); + }); +#else + vector_of_pairs.resize(iend-ibegin); + + auto p = vector_of_pairs.data(); + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type iemp) noexcept { + p[iemp] = std::make_pair(x[iend*irep + iemp], i[iend*irep + iemp]); + }); +#endif + + std::sort( std::execution::par_unseq, + vector_of_pairs.begin(), vector_of_pairs.end(), + [](pair_type const& lhs, pair_type const& rhs) noexcept { + return lhs.first < rhs.first; + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type iemp) noexcept { + //const pair_type &pair = vector_of_pairs[iemp - ibegin]; + const pair_type &pair = p[iemp - ibegin]; + x[iend*irep + iemp] = pair.first; + i[iend*irep + iemp] = pair.second; + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n SORTPAIRS : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace algorithm +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/algorithm/SORTPAIRS.cpp b/src/algorithm/SORTPAIRS.cpp index 96d79a7df..26f8e83be 100644 --- a/src/algorithm/SORTPAIRS.cpp +++ b/src/algorithm/SORTPAIRS.cpp @@ -41,6 +41,8 @@ SORTPAIRS::SORTPAIRS(const RunParams& params) setVariantDefined( RAJA_CUDA ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); } SORTPAIRS::~SORTPAIRS() diff --git a/src/algorithm/SORTPAIRS.hpp b/src/algorithm/SORTPAIRS.hpp index 4cfc3eb36..f79b3f39b 100644 --- a/src/algorithm/SORTPAIRS.hpp +++ b/src/algorithm/SORTPAIRS.hpp @@ -53,6 +53,7 @@ class SORTPAIRS : public KernelBase { getCout() << "\n SORTPAIRS : Unknown OMP Target variant id = " << vid << std::endl; } + void runStdParVariant(VariantID vid, size_t tune_idx); private: static const size_t default_gpu_block_size = 0; diff --git a/src/apps/CMakeLists.txt b/src/apps/CMakeLists.txt index 6d521d1df..372e4dce5 100644 --- a/src/apps/CMakeLists.txt +++ b/src/apps/CMakeLists.txt @@ -13,6 +13,7 @@ blt_add_library( CONVECTION3DPA-Cuda.cpp CONVECTION3DPA-Hip.cpp CONVECTION3DPA-Seq.cpp + CONVECTION3DPA-StdPar.cpp CONVECTION3DPA-OMP.cpp CONVECTION3DPA-OMPTarget.cpp DEL_DOT_VEC_2D.cpp @@ -71,6 +72,7 @@ blt_add_library( MASS3DPA-OMPTarget.cpp NODAL_ACCUMULATION_3D.cpp NODAL_ACCUMULATION_3D-Seq.cpp + NODAL_ACCUMULATION_3D-StdPar.cpp NODAL_ACCUMULATION_3D-Hip.cpp NODAL_ACCUMULATION_3D-Cuda.cpp NODAL_ACCUMULATION_3D-OMP.cpp @@ -88,5 +90,16 @@ blt_add_library( VOL3D-OMP.cpp VOL3D-OMPTarget.cpp WIP-COUPLE.cpp + DEL_DOT_VEC_2D-StdPar.cpp + ENERGY-StdPar.cpp + FIR-StdPar.cpp + HALOEXCHANGE-StdPar.cpp + HALOEXCHANGE_FUSED-StdPar.cpp + LTIMES-StdPar.cpp + LTIMES_NOVIEW-StdPar.cpp + MASS3DPA-StdPar.cpp + PRESSURE-StdPar.cpp + VOL3D-StdPar.cpp + DIFFUSION3DPA-StdPar.cpp DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} ) diff --git a/src/apps/CONVECTION3DPA-StdPar.cpp b/src/apps/CONVECTION3DPA-StdPar.cpp new file mode 100644 index 000000000..2b36d2dc3 --- /dev/null +++ b/src/apps/CONVECTION3DPA-StdPar.cpp @@ -0,0 +1,127 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "CONVECTION3DPA.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf { +namespace apps { + +void CONVECTION3DPA::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ +#if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); + + CONVECTION3DPA_DATA_SETUP; + + switch (vid) { + + case Base_StdPar: { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), NE, + [=](int e) { + + CONVECTION3DPA_0_CPU; + + CPU_FOREACH(dz,z,CPA_D1D) { + CPU_FOREACH(dy,y,CPA_D1D) { + CPU_FOREACH(dx,x,CPA_D1D) { + CONVECTION3DPA_1; + } + } + } + + CPU_FOREACH(dz,z,CPA_D1D) { + CPU_FOREACH(dy,y,CPA_D1D) { + CPU_FOREACH(qx,x,CPA_Q1D) { + CONVECTION3DPA_2; + } + } + } + + CPU_FOREACH(dz,z,CPA_D1D) { + CPU_FOREACH(qx,x,CPA_Q1D) { + CPU_FOREACH(qy,y,CPA_Q1D) { + CONVECTION3DPA_3; + } + } + } + + CPU_FOREACH(qx,x,CPA_Q1D) { + CPU_FOREACH(qy,y,CPA_Q1D) { + CPU_FOREACH(qz,z,CPA_Q1D) { + CONVECTION3DPA_4; + } + } + } + + CPU_FOREACH(qz,z,CPA_Q1D) { + CPU_FOREACH(qy,y,CPA_Q1D) { + CPU_FOREACH(qx,x,CPA_Q1D) { + CONVECTION3DPA_5; + } + } + } + + CPU_FOREACH(qx,x,CPA_Q1D) { + CPU_FOREACH(qy,y,CPA_Q1D) { + CPU_FOREACH(dz,z,CPA_D1D) { + CONVECTION3DPA_6; + } + } + } + + CPU_FOREACH(dz,z,CPA_D1D) { + CPU_FOREACH(qx,x,CPA_Q1D) { + CPU_FOREACH(dy,y,CPA_D1D) { + CONVECTION3DPA_7; + } + } + } + + CPU_FOREACH(dz,z,CPA_D1D) { + CPU_FOREACH(dy,y,CPA_D1D) { + CPU_FOREACH(dx,x,CPA_D1D) { + CONVECTION3DPA_8; + } + } + } + + }); // element loop + + } + stopTimer(); + + break; + } + + default: + getCout() << "\n CONVECTION3DPA : Unknown StdPar variant id = " << vid << std::endl; + } + +#else + RAJA_UNUSED_VAR(vid); +#endif +} + +} // end namespace apps +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/apps/CONVECTION3DPA.cpp b/src/apps/CONVECTION3DPA.cpp index dc4823482..bf52a6dcc 100644 --- a/src/apps/CONVECTION3DPA.cpp +++ b/src/apps/CONVECTION3DPA.cpp @@ -64,6 +64,7 @@ CONVECTION3DPA::CONVECTION3DPA(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); } CONVECTION3DPA::~CONVECTION3DPA() diff --git a/src/apps/CONVECTION3DPA.hpp b/src/apps/CONVECTION3DPA.hpp index 784b2d4cd..bb9e716a1 100644 --- a/src/apps/CONVECTION3DPA.hpp +++ b/src/apps/CONVECTION3DPA.hpp @@ -378,6 +378,7 @@ class CONVECTION3DPA : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/DEL_DOT_VEC_2D-StdPar.cpp b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp new file mode 100644 index 000000000..bbe987735 --- /dev/null +++ b/src/apps/DEL_DOT_VEC_2D-StdPar.cpp @@ -0,0 +1,100 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DEL_DOT_VEC_2D.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include "AppsData.hpp" + +#include "camp/resource.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void DEL_DOT_VEC_2D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = m_domain->n_real_zones; + + + DEL_DOT_VEC_2D_DATA_SETUP; + + NDSET2D(m_domain->jp, x,x1,x2,x3,x4) ; + NDSET2D(m_domain->jp, y,y1,y2,y3,y4) ; + NDSET2D(m_domain->jp, xdot,fx1,fx2,fx3,fx4) ; + NDSET2D(m_domain->jp, ydot,fy1,fy2,fy3,fy4) ; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type ii) { + DEL_DOT_VEC_2D_BODY_INDEX; + DEL_DOT_VEC_2D_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto deldotvec2d_base_lam = [=](Index_type ii) { + DEL_DOT_VEC_2D_BODY_INDEX; + DEL_DOT_VEC_2D_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type ii) { + deldotvec2d_base_lam(ii); + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n DEL_DOT_VEC_2D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace apps +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/apps/DEL_DOT_VEC_2D.cpp b/src/apps/DEL_DOT_VEC_2D.cpp index 9fe3c3e85..07858084f 100644 --- a/src/apps/DEL_DOT_VEC_2D.cpp +++ b/src/apps/DEL_DOT_VEC_2D.cpp @@ -62,6 +62,9 @@ DEL_DOT_VEC_2D::DEL_DOT_VEC_2D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } DEL_DOT_VEC_2D::~DEL_DOT_VEC_2D() diff --git a/src/apps/DEL_DOT_VEC_2D.hpp b/src/apps/DEL_DOT_VEC_2D.hpp index 0e22bb399..2a3ab63be 100644 --- a/src/apps/DEL_DOT_VEC_2D.hpp +++ b/src/apps/DEL_DOT_VEC_2D.hpp @@ -113,6 +113,7 @@ class DEL_DOT_VEC_2D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/DIFFUSION3DPA-StdPar.cpp b/src/apps/DIFFUSION3DPA-StdPar.cpp new file mode 100644 index 000000000..a05a4370a --- /dev/null +++ b/src/apps/DIFFUSION3DPA-StdPar.cpp @@ -0,0 +1,137 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +// Uncomment to add compiler directives for loop unrolling +//#define USE_RAJAPERF_UNROLL + +#include "DIFFUSION3DPA.hpp" + +#if defined(BUILD_STDPAR) + +#include "RAJA/RAJA.hpp" + +#include "common/StdParUtils.hpp" + +#include + +// This is used below, which is bad for GPU +//#define CPU_FOREACH(i, k, N) for (int i = 0; i < N; i++) + +namespace rajaperf { +namespace apps { + +void DIFFUSION3DPA::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); + + DIFFUSION3DPA_DATA_SETUP; + + switch (vid) { + + case Base_StdPar: { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), NE, + [=](int e) { + + DIFFUSION3DPA_0_CPU; + + CPU_FOREACH(dz, z, DPA_D1D) { + CPU_FOREACH(dy, y, DPA_D1D) { + CPU_FOREACH(dx, x, DPA_D1D) { + DIFFUSION3DPA_1; + } + } + } + + CPU_FOREACH(dy, y, DPA_D1D) { + CPU_FOREACH(qx, x, DPA_Q1D) { + DIFFUSION3DPA_2; + } + } + + CPU_FOREACH(dz, z, DPA_D1D) { + CPU_FOREACH(dy, y, DPA_D1D) { + CPU_FOREACH(qx, x, DPA_Q1D) { + DIFFUSION3DPA_3; + } + } + } + + CPU_FOREACH(dz, z, DPA_D1D) { + CPU_FOREACH(qy, y, DPA_Q1D) { + CPU_FOREACH(qx, x, DPA_Q1D) { + DIFFUSION3DPA_4; + } + } + } + + CPU_FOREACH(qz, z, DPA_Q1D) { + CPU_FOREACH(qy, y, DPA_Q1D) { + CPU_FOREACH(qx, x, DPA_Q1D) { + DIFFUSION3DPA_5; + } + } + } + + CPU_FOREACH(d, y, DPA_D1D) { + CPU_FOREACH(q, x, DPA_Q1D) { + DIFFUSION3DPA_6; + } + } + + CPU_FOREACH(qz, z, DPA_Q1D) { + CPU_FOREACH(qy, y, DPA_Q1D) { + CPU_FOREACH(dx, x, DPA_D1D) { + DIFFUSION3DPA_7; + } + } + } + + CPU_FOREACH(qz, z, DPA_Q1D) { + CPU_FOREACH(dy, y, DPA_D1D) { + CPU_FOREACH(dx, x, DPA_D1D) { + DIFFUSION3DPA_8; + } + } + } + + CPU_FOREACH(dz, z, DPA_D1D) { + CPU_FOREACH(dy, y, DPA_D1D) { + CPU_FOREACH(dx, x, DPA_D1D) { + DIFFUSION3DPA_9; + } + } + } + + }); // element loop + + } + stopTimer(); + + break; + } + + default: + getCout() << "\n DIFFUSION3DPA : Unknown StdPar variant id = " << vid << std::endl; + } + +#else + RAJA_UNUSED_VAR(vid); +#endif +} + +} // end namespace apps +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/apps/DIFFUSION3DPA.cpp b/src/apps/DIFFUSION3DPA.cpp index 1f78cafe3..d243bf330 100644 --- a/src/apps/DIFFUSION3DPA.cpp +++ b/src/apps/DIFFUSION3DPA.cpp @@ -65,6 +65,7 @@ DIFFUSION3DPA::DIFFUSION3DPA(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); } DIFFUSION3DPA::~DIFFUSION3DPA() diff --git a/src/apps/DIFFUSION3DPA.hpp b/src/apps/DIFFUSION3DPA.hpp index 62967d5c0..5149f8d78 100644 --- a/src/apps/DIFFUSION3DPA.hpp +++ b/src/apps/DIFFUSION3DPA.hpp @@ -481,6 +481,7 @@ class DIFFUSION3DPA : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/ENERGY-StdPar.cpp b/src/apps/ENERGY-StdPar.cpp new file mode 100644 index 000000000..6d797f8ed --- /dev/null +++ b/src/apps/ENERGY-StdPar.cpp @@ -0,0 +1,163 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "ENERGY.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void ENERGY::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + ENERGY_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + ENERGY_BODY1; + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + ENERGY_BODY2; + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + ENERGY_BODY3; + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + ENERGY_BODY4; + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + ENERGY_BODY5; + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + ENERGY_BODY6; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto energy_lam1 = [=](Index_type i) { + ENERGY_BODY1; + }; + auto energy_lam2 = [=](Index_type i) { + ENERGY_BODY2; + }; + auto energy_lam3 = [=](Index_type i) { + ENERGY_BODY3; + }; + auto energy_lam4 = [=](Index_type i) { + ENERGY_BODY4; + }; + auto energy_lam5 = [=](Index_type i) { + ENERGY_BODY5; + }; + auto energy_lam6 = [=](Index_type i) { + ENERGY_BODY6; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + energy_lam1(i); + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + energy_lam2(i); + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + energy_lam3(i); + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + energy_lam4(i); + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + energy_lam5(i); + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + energy_lam6(i); + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n ENERGY : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // iend-ibegin namespace apps +} // iend-ibegin namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/apps/ENERGY.cpp b/src/apps/ENERGY.cpp index 8e77961b6..7c363bb4d 100644 --- a/src/apps/ENERGY.cpp +++ b/src/apps/ENERGY.cpp @@ -62,6 +62,9 @@ ENERGY::ENERGY(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } ENERGY::~ENERGY() diff --git a/src/apps/ENERGY.hpp b/src/apps/ENERGY.hpp index 22af34867..dbb2141e4 100644 --- a/src/apps/ENERGY.hpp +++ b/src/apps/ENERGY.hpp @@ -203,6 +203,7 @@ class ENERGY : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/FIR-StdPar.cpp b/src/apps/FIR-StdPar.cpp new file mode 100644 index 000000000..2e70b8a38 --- /dev/null +++ b/src/apps/FIR-StdPar.cpp @@ -0,0 +1,95 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "FIR.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void FIR::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize() - m_coefflen; + + FIR_COEFF; + + FIR_DATA_SETUP; + + Real_type coeff[FIR_COEFFLEN]; + std::copy(std::begin(coeff_array), std::end(coeff_array), std::begin(coeff)); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { + FIR_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto fir_lam = [=](Index_type i) { + FIR_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { + fir_lam(i); + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n FIR : Unknown variant id = " << vid << std::endl; + } + + } + +#else + RAJA_UNUSED_VAR(vid); +#endif +} + +} // end namespace apps +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/apps/FIR.cpp b/src/apps/FIR.cpp index 792f015d0..f8ba2239b 100644 --- a/src/apps/FIR.cpp +++ b/src/apps/FIR.cpp @@ -56,6 +56,9 @@ FIR::FIR(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } FIR::~FIR() diff --git a/src/apps/FIR.hpp b/src/apps/FIR.hpp index 3ca8a1cef..9a43b3d1f 100644 --- a/src/apps/FIR.hpp +++ b/src/apps/FIR.hpp @@ -78,6 +78,7 @@ class FIR : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/HALOEXCHANGE-StdPar.cpp b/src/apps/HALOEXCHANGE-StdPar.cpp new file mode 100644 index 000000000..6f549bd03 --- /dev/null +++ b/src/apps/HALOEXCHANGE-StdPar.cpp @@ -0,0 +1,138 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "HALOEXCHANGE.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void HALOEXCHANGE::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + HALOEXCHANGE_DATA_SETUP; + + auto ibegin = 0; + auto iend = num_neighbors; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type l) noexcept { + Real_ptr buffer = buffers[l]; + Int_ptr list = pack_index_lists[l]; + Index_type len = pack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + for (Index_type i = 0; i < len; i++) { + HALOEXCHANGE_PACK_BODY; + } + buffer += len; + } + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type l) noexcept { + Real_ptr buffer = buffers[l]; + Int_ptr list = unpack_index_lists[l]; + Index_type len = unpack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + for (Index_type i = 0; i < len; i++) { + HALOEXCHANGE_UNPACK_BODY; + } + buffer += len; + } + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type l) noexcept { + Real_ptr buffer = buffers[l]; + Int_ptr list = pack_index_lists[l]; + Index_type len = pack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + auto haloexchange_pack_base_lam = [=](Index_type i) { + HALOEXCHANGE_PACK_BODY; + }; + for (Index_type i = 0; i < len; i++) { + haloexchange_pack_base_lam(i); + } + buffer += len; + } + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type l) noexcept { + Real_ptr buffer = buffers[l]; + Int_ptr list = unpack_index_lists[l]; + Index_type len = unpack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + auto haloexchange_unpack_base_lam = [=](Index_type i) { + HALOEXCHANGE_UNPACK_BODY; + }; + for (Index_type i = 0; i < len; i++) { + haloexchange_unpack_base_lam(i); + } + buffer += len; + } + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n HALOEXCHANGE : Unknown variant id = " << vid << std::endl; + } + + } +#endif +} + +} // iend-ibegin namespace apps +} // iend-ibegin namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/apps/HALOEXCHANGE.cpp b/src/apps/HALOEXCHANGE.cpp index 53ec0ecc1..1b7ef9aa8 100644 --- a/src/apps/HALOEXCHANGE.cpp +++ b/src/apps/HALOEXCHANGE.cpp @@ -76,6 +76,9 @@ HALOEXCHANGE::HALOEXCHANGE(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } HALOEXCHANGE::~HALOEXCHANGE() diff --git a/src/apps/HALOEXCHANGE.hpp b/src/apps/HALOEXCHANGE.hpp index 1f21d9616..b9fec003c 100644 --- a/src/apps/HALOEXCHANGE.hpp +++ b/src/apps/HALOEXCHANGE.hpp @@ -93,6 +93,7 @@ class HALOEXCHANGE : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp new file mode 100644 index 000000000..47c531e50 --- /dev/null +++ b/src/apps/HALOEXCHANGE_FUSED-StdPar.cpp @@ -0,0 +1,179 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "HALOEXCHANGE_FUSED.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void HALOEXCHANGE_FUSED::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + HALOEXCHANGE_FUSED_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + HALOEXCHANGE_FUSED_MANUAL_FUSER_SETUP; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Index_type pack_index = 0; + + for (Index_type l = 0; l < num_neighbors; ++l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = pack_index_lists[l]; + Index_type len = pack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + pack_ptr_holders[pack_index] = ptr_holder{buffer, list, var}; + pack_lens[pack_index] = len; + pack_index += 1; + buffer += len; + } + } + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), pack_index, + [=](Index_type j) { + Real_ptr buffer = pack_ptr_holders[j].buffer; + Int_ptr list = pack_ptr_holders[j].list; + Real_ptr var = pack_ptr_holders[j].var; + Index_type len = pack_lens[j]; + for (Index_type i = 0; i < len; i++) { + HALOEXCHANGE_FUSED_PACK_BODY; + } + }); + + Index_type unpack_index = 0; + + for (Index_type l = 0; l < num_neighbors; ++l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = unpack_index_lists[l]; + Index_type len = unpack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + unpack_ptr_holders[unpack_index] = ptr_holder{buffer, list, var}; + unpack_lens[unpack_index] = len; + unpack_index += 1; + buffer += len; + } + } + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), unpack_index, + [=](Index_type j) { + Real_ptr buffer = unpack_ptr_holders[j].buffer; + Int_ptr list = unpack_ptr_holders[j].list; + Real_ptr var = unpack_ptr_holders[j].var; + Index_type len = unpack_lens[j]; + for (Index_type i = 0; i < len; i++) { + HALOEXCHANGE_FUSED_UNPACK_BODY; + } + }); + + } + stopTimer(); + + HALOEXCHANGE_FUSED_MANUAL_FUSER_TEARDOWN; + + break; + } + + case Lambda_StdPar : { + + HALOEXCHANGE_FUSED_MANUAL_LAMBDA_FUSER_SETUP; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Index_type pack_index = 0; + + for (Index_type l = 0; l < num_neighbors; ++l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = pack_index_lists[l]; + Index_type len = pack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + new(&pack_lambdas[pack_index]) pack_lambda_type(make_pack_lambda(buffer, list, var)); + pack_lens[pack_index] = len; + pack_index += 1; + buffer += len; + } + } + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), pack_index, + [=](Index_type j) { + auto pack_lambda = pack_lambdas[j]; + Index_type len = pack_lens[j]; + for (Index_type i = 0; i < len; i++) { + pack_lambda(i); + } + }); + + Index_type unpack_index = 0; + + for (Index_type l = 0; l < num_neighbors; ++l) { + Real_ptr buffer = buffers[l]; + Int_ptr list = unpack_index_lists[l]; + Index_type len = unpack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + Real_ptr var = vars[v]; + new(&unpack_lambdas[unpack_index]) unpack_lambda_type(make_unpack_lambda(buffer, list, var)); + unpack_lens[unpack_index] = len; + unpack_index += 1; + buffer += len; + } + } + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), unpack_index, + [=](Index_type j) { + auto unpack_lambda = unpack_lambdas[j]; + Index_type len = unpack_lens[j]; + for (Index_type i = 0; i < len; i++) { + unpack_lambda(i); + } + }); + + } + stopTimer(); + + HALOEXCHANGE_FUSED_MANUAL_LAMBDA_FUSER_TEARDOWN; + + break; + } + + default : { + getCout() << "\n HALOEXCHANGE_FUSED : Unknown variant id = " << vid << std::endl; + } + + } +#endif +} + +} // end namespace apps +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/apps/HALOEXCHANGE_FUSED.cpp b/src/apps/HALOEXCHANGE_FUSED.cpp index 5486c3645..54b00790d 100644 --- a/src/apps/HALOEXCHANGE_FUSED.cpp +++ b/src/apps/HALOEXCHANGE_FUSED.cpp @@ -76,6 +76,9 @@ HALOEXCHANGE_FUSED::HALOEXCHANGE_FUSED(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } HALOEXCHANGE_FUSED::~HALOEXCHANGE_FUSED() diff --git a/src/apps/HALOEXCHANGE_FUSED.hpp b/src/apps/HALOEXCHANGE_FUSED.hpp index b0af7e60e..116596d6c 100644 --- a/src/apps/HALOEXCHANGE_FUSED.hpp +++ b/src/apps/HALOEXCHANGE_FUSED.hpp @@ -137,6 +137,7 @@ class HALOEXCHANGE_FUSED : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/LTIMES-StdPar.cpp b/src/apps/LTIMES-StdPar.cpp new file mode 100644 index 000000000..3ccd3c987 --- /dev/null +++ b/src/apps/LTIMES-StdPar.cpp @@ -0,0 +1,113 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "LTIMES.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void LTIMES::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + LTIMES_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), num_z*num_g*num_m, + [=](Index_type zgm) { + const auto z = zgm / (num_g*num_m); + const auto gm = zgm % (num_g*num_m); + const auto g = gm / num_m; + const auto m = gm % num_m; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), num_z, + [=](Index_type z) { + for (Index_type g = 0; g < num_g; ++g ) + for (Index_type m = 0; m < num_m; ++m ) +#endif + for (Index_type d = 0; d < num_d; ++d ) { + LTIMES_BODY; + } + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto ltimes_base_lam = [=](Index_type d, Index_type z, Index_type g, Index_type m) { + LTIMES_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), num_z*num_g*num_m, + [=](Index_type zgm) { + const auto z = zgm / (num_g*num_m); + const auto gm = zgm % (num_g*num_m); + const auto g = gm / num_m; + const auto m = gm % num_m; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), num_z, + [=](Index_type z) { + for (Index_type g = 0; g < num_g; ++g ) + for (Index_type m = 0; m < num_m; ++m ) +#endif + for (Index_type d = 0; d < num_d; ++d ) { + ltimes_base_lam(d, z, g, m); + } + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n LTIMES : Unknown variant id = " << vid << std::endl; + } + + } +#endif +} + +} // end namespace apps +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/apps/LTIMES.cpp b/src/apps/LTIMES.cpp index b920631dd..5f13966b7 100644 --- a/src/apps/LTIMES.cpp +++ b/src/apps/LTIMES.cpp @@ -77,6 +77,9 @@ LTIMES::LTIMES(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } LTIMES::~LTIMES() diff --git a/src/apps/LTIMES.hpp b/src/apps/LTIMES.hpp index 2f3f0ca6d..5fd360936 100644 --- a/src/apps/LTIMES.hpp +++ b/src/apps/LTIMES.hpp @@ -116,6 +116,7 @@ class LTIMES : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/LTIMES_NOVIEW-StdPar.cpp b/src/apps/LTIMES_NOVIEW-StdPar.cpp new file mode 100644 index 000000000..37087f20d --- /dev/null +++ b/src/apps/LTIMES_NOVIEW-StdPar.cpp @@ -0,0 +1,113 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "LTIMES_NOVIEW.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void LTIMES_NOVIEW::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + LTIMES_NOVIEW_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), num_z*num_g*num_m, + [=](Index_type zgm) { + const auto z = zgm / (num_g*num_m); + const auto gm = zgm % (num_g*num_m); + const auto g = gm / num_m; + const auto m = gm % num_m; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), num_z, + [=](Index_type z) { + for (Index_type g = 0; g < num_g; ++g ) + for (Index_type m = 0; m < num_m; ++m ) +#endif + for (Index_type d = 0; d < num_d; ++d ) { + LTIMES_NOVIEW_BODY; + } + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto ltimesnoview_lam = [=](Index_type d, Index_type z, Index_type g, Index_type m) { + LTIMES_NOVIEW_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), num_z*num_g*num_m, + [=](Index_type zgm) { + const auto z = zgm / (num_g*num_m); + const auto gm = zgm % (num_g*num_m); + const auto g = gm / num_m; + const auto m = gm % num_m; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), num_z, + [=](Index_type z) { + for (Index_type g = 0; g < num_g; ++g ) + for (Index_type m = 0; m < num_m; ++m ) +#endif + for (Index_type d = 0; d < num_d; ++d ) { + ltimesnoview_lam(d, z, g, m); + } + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n LTIMES_NOVIEW : Unknown variant id = " << vid << std::endl; + } + + } +#endif +} + +} // end namespace apps +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/apps/LTIMES_NOVIEW.cpp b/src/apps/LTIMES_NOVIEW.cpp index 2f8dd4b40..c37128622 100644 --- a/src/apps/LTIMES_NOVIEW.cpp +++ b/src/apps/LTIMES_NOVIEW.cpp @@ -76,6 +76,9 @@ LTIMES_NOVIEW::LTIMES_NOVIEW(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } LTIMES_NOVIEW::~LTIMES_NOVIEW() diff --git a/src/apps/LTIMES_NOVIEW.hpp b/src/apps/LTIMES_NOVIEW.hpp index 96a296366..d00bd0da2 100644 --- a/src/apps/LTIMES_NOVIEW.hpp +++ b/src/apps/LTIMES_NOVIEW.hpp @@ -66,6 +66,7 @@ class LTIMES_NOVIEW : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/MASS3DPA-StdPar.cpp b/src/apps/MASS3DPA-StdPar.cpp new file mode 100644 index 000000000..8e18cd50b --- /dev/null +++ b/src/apps/MASS3DPA-StdPar.cpp @@ -0,0 +1,115 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MASS3DPA.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf { +namespace apps { + +#define CPU_FOREACH(i, k, N) for (int i = 0; i < N; i++) + +void MASS3DPA::runStdParVariant(VariantID vid, size_t tune_idx) { +#if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); + + MASS3DPA_DATA_SETUP; + + switch (vid) { + + case Base_StdPar: { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), NE, + [=](int e) { + + MASS3DPA_0_CPU + + CPU_FOREACH(dy, y, MPA_D1D) { + CPU_FOREACH(dx, x, MPA_D1D){ + MASS3DPA_1 + } + CPU_FOREACH(dx, x, MPA_Q1D) { + MASS3DPA_2 + } + } + + CPU_FOREACH(dy, y, MPA_D1D) { + CPU_FOREACH(qx, x, MPA_Q1D) { + MASS3DPA_3 + } + } + + CPU_FOREACH(qy, y, MPA_Q1D) { + CPU_FOREACH(qx, x, MPA_Q1D) { + MASS3DPA_4 + } + } + + CPU_FOREACH(qy, y, MPA_Q1D) { + CPU_FOREACH(qx, x, MPA_Q1D) { + MASS3DPA_5 + } + } + + CPU_FOREACH(d, y, MPA_D1D) { + CPU_FOREACH(q, x, MPA_Q1D) { + MASS3DPA_6 + } + } + + CPU_FOREACH(qy, y, MPA_Q1D) { + CPU_FOREACH(dx, x, MPA_D1D) { + MASS3DPA_7 + } + } + + CPU_FOREACH(dy, y, MPA_D1D) { + CPU_FOREACH(dx, x, MPA_D1D) { + MASS3DPA_8 + } + } + + CPU_FOREACH(dy, y, MPA_D1D) { + CPU_FOREACH(dx, x, MPA_D1D) { + MASS3DPA_9 + } + } + + }); // element loop + + } + stopTimer(); + + break; + } + + default: + getCout() << "\n MASS3DPA : Unknown StdPar variant id = " << vid << std::endl; + } + +#else + RAJA_UNUSED_VAR(vid); +#endif +} + +} // end namespace apps +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/apps/MASS3DPA.cpp b/src/apps/MASS3DPA.cpp index a70e98847..f8c9bc47f 100644 --- a/src/apps/MASS3DPA.cpp +++ b/src/apps/MASS3DPA.cpp @@ -61,6 +61,7 @@ MASS3DPA::MASS3DPA(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); } MASS3DPA::~MASS3DPA() diff --git a/src/apps/MASS3DPA.hpp b/src/apps/MASS3DPA.hpp index 7365fa011..be53a625f 100644 --- a/src/apps/MASS3DPA.hpp +++ b/src/apps/MASS3DPA.hpp @@ -363,6 +363,7 @@ class MASS3DPA : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp b/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp new file mode 100644 index 000000000..81f5ef3d1 --- /dev/null +++ b/src/apps/NODAL_ACCUMULATION_3D-StdPar.cpp @@ -0,0 +1,91 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "NODAL_ACCUMULATION_3D.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include "AppsData.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void NODAL_ACCUMULATION_3D::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ +#if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = m_domain->n_real_zones; + + NODAL_ACCUMULATION_3D_DATA_SETUP; + + NDPTRSET(m_domain->jp, m_domain->kp, x,x0,x1,x2,x3,x4,x5,x6,x7) ; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#warning needs parallel for+atomic or reduce + for (Index_type ii = ibegin ; ii < iend ; ++ii ) { + NODAL_ACCUMULATION_3D_BODY_INDEX; + NODAL_ACCUMULATION_3D_BODY; + } + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto nodal_accumulation_3d_lam = [=](Index_type ii) { + NODAL_ACCUMULATION_3D_BODY_INDEX; + NODAL_ACCUMULATION_3D_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#warning needs parallel for+atomic or reduce + for (Index_type ii = ibegin ; ii < iend ; ++ii ) { + nodal_accumulation_3d_lam(ii); + } + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n NODAL_ACCUMULATION_3D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace apps +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/apps/NODAL_ACCUMULATION_3D.cpp b/src/apps/NODAL_ACCUMULATION_3D.cpp index 7ed2f0399..e9e11a81a 100644 --- a/src/apps/NODAL_ACCUMULATION_3D.cpp +++ b/src/apps/NODAL_ACCUMULATION_3D.cpp @@ -67,6 +67,9 @@ NODAL_ACCUMULATION_3D::NODAL_ACCUMULATION_3D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } NODAL_ACCUMULATION_3D::~NODAL_ACCUMULATION_3D() diff --git a/src/apps/NODAL_ACCUMULATION_3D.hpp b/src/apps/NODAL_ACCUMULATION_3D.hpp index 5b0ce0d77..05f83268f 100644 --- a/src/apps/NODAL_ACCUMULATION_3D.hpp +++ b/src/apps/NODAL_ACCUMULATION_3D.hpp @@ -95,6 +95,7 @@ class NODAL_ACCUMULATION_3D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/PRESSURE-StdPar.cpp b/src/apps/PRESSURE-StdPar.cpp new file mode 100644 index 000000000..551c8c730 --- /dev/null +++ b/src/apps/PRESSURE-StdPar.cpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "PRESSURE.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void PRESSURE::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + PRESSURE_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + PRESSURE_BODY1; + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + PRESSURE_BODY2; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto pressure_lam1 = [=](Index_type i) { + PRESSURE_BODY1; + }; + auto pressure_lam2 = [=](Index_type i) { + PRESSURE_BODY2; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + pressure_lam1(i); + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + pressure_lam2(i); + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n PRESSURE : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // iend-ibegin namespace apps +} // iend-ibegin namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/apps/PRESSURE.cpp b/src/apps/PRESSURE.cpp index 18979f3bd..64c15ccc4 100644 --- a/src/apps/PRESSURE.cpp +++ b/src/apps/PRESSURE.cpp @@ -52,6 +52,9 @@ PRESSURE::PRESSURE(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } PRESSURE::~PRESSURE() diff --git a/src/apps/PRESSURE.hpp b/src/apps/PRESSURE.hpp index c0568a8e0..d1ad4e874 100644 --- a/src/apps/PRESSURE.hpp +++ b/src/apps/PRESSURE.hpp @@ -72,6 +72,7 @@ class PRESSURE : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/VOL3D-StdPar.cpp b/src/apps/VOL3D-StdPar.cpp new file mode 100644 index 000000000..087ebd577 --- /dev/null +++ b/src/apps/VOL3D-StdPar.cpp @@ -0,0 +1,96 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "VOL3D.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "AppsData.hpp" + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void VOL3D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = m_domain->fpz; + const Index_type iend = m_domain->lpz+1; + + VOL3D_DATA_SETUP; + + NDPTRSET(m_domain->jp, m_domain->kp, x,x0,x1,x2,x3,x4,x5,x6,x7) ; + NDPTRSET(m_domain->jp, m_domain->kp, y,y0,y1,y2,y3,y4,y5,y6,y7) ; + NDPTRSET(m_domain->jp, m_domain->kp, z,z0,z1,z2,z3,z4,z5,z6,z7) ; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + VOL3D_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto vol3d_lam = [=](Index_type i) { + VOL3D_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + vol3d_lam(i); + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n VOL3D : Unknown variant id = " << vid << std::endl; + } + + } + +#else + RAJA_UNUSED_VAR(vid); +#endif +} + +} // end namespace apps +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/apps/VOL3D.cpp b/src/apps/VOL3D.cpp index a1097163a..7f5edf7d1 100644 --- a/src/apps/VOL3D.cpp +++ b/src/apps/VOL3D.cpp @@ -64,6 +64,9 @@ VOL3D::VOL3D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } VOL3D::~VOL3D() diff --git a/src/apps/VOL3D.hpp b/src/apps/VOL3D.hpp index aa6701855..82fe191ae 100644 --- a/src/apps/VOL3D.hpp +++ b/src/apps/VOL3D.hpp @@ -169,6 +169,7 @@ class VOL3D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/apps/WIP-COUPLE.hpp b/src/apps/WIP-COUPLE.hpp index 33faa85cc..0b5b67ed0 100644 --- a/src/apps/WIP-COUPLE.hpp +++ b/src/apps/WIP-COUPLE.hpp @@ -171,6 +171,7 @@ class COUPLE : public KernelBase void runCudaVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) {(void) vid;} void runHipVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) {(void) vid;} void runOpenMPTargetVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) {(void) vid;} + void runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) {(void) vid;} private: Complex_ptr m_t0; diff --git a/src/basic-kokkos/DAXPY-Kokkos.cpp b/src/basic-kokkos/DAXPY-Kokkos.cpp index b8ab91cd1..76ac5331a 100644 --- a/src/basic-kokkos/DAXPY-Kokkos.cpp +++ b/src/basic-kokkos/DAXPY-Kokkos.cpp @@ -53,7 +53,7 @@ void DAXPY::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx) break; } default: { - std::cout << "\n DAXPY : Unknown variant id = " << vid << std::endl; + getCout() << "\n DAXPY : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic-kokkos/IF_QUAD-Kokkos.cpp b/src/basic-kokkos/IF_QUAD-Kokkos.cpp index 19e916dac..c38a15581 100644 --- a/src/basic-kokkos/IF_QUAD-Kokkos.cpp +++ b/src/basic-kokkos/IF_QUAD-Kokkos.cpp @@ -61,7 +61,7 @@ void IF_QUAD::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_id } default: { - std::cout << "\n IF_QUAD : Unknown variant id = " << vid << std::endl; + getCout() << "\n IF_QUAD : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic-kokkos/INIT3-Kokkos.cpp b/src/basic-kokkos/INIT3-Kokkos.cpp index 661180c7b..b8ffd4551 100644 --- a/src/basic-kokkos/INIT3-Kokkos.cpp +++ b/src/basic-kokkos/INIT3-Kokkos.cpp @@ -57,7 +57,7 @@ void INIT3::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx) } default: { - std::cout << "\n INIT3 : Unknown variant id = " << vid << std::endl; + getCout() << "\n INIT3 : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp index 8c775a3b0..fddc36ed2 100644 --- a/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp +++ b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp @@ -46,7 +46,7 @@ void INIT_VIEW1D::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tun } default: { - std::cout << "\n INIT_VIEW1D : Unknown variant id = " << vid << std::endl; + getCout() << "\n INIT_VIEW1D : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp index 9df018264..7a39f711d 100644 --- a/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp +++ b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp @@ -46,7 +46,7 @@ void INIT_VIEW1D_OFFSET::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ } default: { - std::cout << "\n INIT_VIEW1D_OFFSET : Unknown variant id = " << vid + getCout() << "\n INIT_VIEW1D_OFFSET : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic-kokkos/MULADDSUB-Kokkos.cpp b/src/basic-kokkos/MULADDSUB-Kokkos.cpp index 49e890315..f913410b5 100644 --- a/src/basic-kokkos/MULADDSUB-Kokkos.cpp +++ b/src/basic-kokkos/MULADDSUB-Kokkos.cpp @@ -56,7 +56,7 @@ void MULADDSUB::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ } default: { - std::cout << "\n MULADDSUB : Unknown variant id = " << vid << std::endl; + getCout() << "\n MULADDSUB : Unknown variant id = " << vid << std::endl; } } moveDataToHostFromKokkosView(out1, out1_view, iend); diff --git a/src/basic-kokkos/NESTED_INIT-Kokkos.cpp b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp index 36929cead..aa212a724 100644 --- a/src/basic-kokkos/NESTED_INIT-Kokkos.cpp +++ b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp @@ -68,7 +68,7 @@ void NESTED_INIT::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tun } default: { - std::cout << "\n NESTED_INIT : Unknown variant id = " << vid << std::endl; + getCout() << "\n NESTED_INIT : Unknown variant id = " << vid << std::endl; } } } diff --git a/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp index 233ca71af..86d5c9e38 100644 --- a/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp +++ b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp @@ -59,7 +59,7 @@ void PI_ATOMIC::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ } default: { - std::cout << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; + getCout() << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; } } } diff --git a/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp index 23c0ab6f4..661badb8e 100644 --- a/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp +++ b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp @@ -65,7 +65,7 @@ void REDUCE3_INT::runKokkosVariant(VariantID vid, } default: { - std::cout << "\n REDUCE3_INT : Unknown variant id = " << vid << std::endl; + getCout() << "\n REDUCE3_INT : Unknown variant id = " << vid << std::endl; } } diff --git a/src/basic-kokkos/TRAP_INT-Kokkos.cpp b/src/basic-kokkos/TRAP_INT-Kokkos.cpp index 5cdb9060f..09f79e5fe 100644 --- a/src/basic-kokkos/TRAP_INT-Kokkos.cpp +++ b/src/basic-kokkos/TRAP_INT-Kokkos.cpp @@ -59,7 +59,7 @@ void TRAP_INT::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_i } default: { - std::cout << "\n TRAP_INT : Unknown variant id = " << vid << std::endl; + getCout() << "\n TRAP_INT : Unknown variant id = " << vid << std::endl; } } } diff --git a/src/basic/CMakeLists.txt b/src/basic/CMakeLists.txt index 3be6e0c3c..90bd7262d 100644 --- a/src/basic/CMakeLists.txt +++ b/src/basic/CMakeLists.txt @@ -10,30 +10,35 @@ blt_add_library( NAME basic SOURCES DAXPY.cpp DAXPY-Seq.cpp + DAXPY-StdPar.cpp DAXPY-Hip.cpp DAXPY-Cuda.cpp DAXPY-OMP.cpp DAXPY-OMPTarget.cpp DAXPY_ATOMIC.cpp DAXPY_ATOMIC-Seq.cpp + DAXPY_ATOMIC-StdPar.cpp DAXPY_ATOMIC-Hip.cpp DAXPY_ATOMIC-Cuda.cpp DAXPY_ATOMIC-OMP.cpp DAXPY_ATOMIC-OMPTarget.cpp IF_QUAD.cpp IF_QUAD-Seq.cpp + IF_QUAD-StdPar.cpp IF_QUAD-Hip.cpp IF_QUAD-Cuda.cpp IF_QUAD-OMP.cpp IF_QUAD-OMPTarget.cpp INDEXLIST.cpp INDEXLIST-Seq.cpp + INDEXLIST-StdPar.cpp INDEXLIST-Hip.cpp INDEXLIST-Cuda.cpp INDEXLIST-OMP.cpp INDEXLIST-OMPTarget.cpp INDEXLIST_3LOOP.cpp INDEXLIST_3LOOP-Seq.cpp + INDEXLIST_3LOOP-StdPar.cpp INDEXLIST_3LOOP-Hip.cpp INDEXLIST_3LOOP-Cuda.cpp INDEXLIST_3LOOP-OMP.cpp @@ -46,63 +51,74 @@ blt_add_library( INIT3-OMPTarget.cpp INIT_VIEW1D.cpp INIT_VIEW1D-Seq.cpp + INIT_VIEW1D-StdPar.cpp INIT_VIEW1D-Hip.cpp INIT_VIEW1D-Cuda.cpp INIT_VIEW1D-OMP.cpp INIT_VIEW1D-OMPTarget.cpp INIT_VIEW1D_OFFSET.cpp INIT_VIEW1D_OFFSET-Seq.cpp + INIT_VIEW1D_OFFSET-StdPar.cpp INIT_VIEW1D_OFFSET-Hip.cpp INIT_VIEW1D_OFFSET-Cuda.cpp INIT_VIEW1D_OFFSET-OMP.cpp INIT_VIEW1D_OFFSET-OMPTarget.cpp MAT_MAT_SHARED.cpp MAT_MAT_SHARED-Seq.cpp + MAT_MAT_SHARED-StdPar.cpp MAT_MAT_SHARED-Hip.cpp MAT_MAT_SHARED-Cuda.cpp MAT_MAT_SHARED-OMP.cpp MAT_MAT_SHARED-OMPTarget.cpp MULADDSUB.cpp MULADDSUB-Seq.cpp + MULADDSUB-StdPar.cpp MULADDSUB-Hip.cpp MULADDSUB-Cuda.cpp MULADDSUB-OMP.cpp MULADDSUB-OMPTarget.cpp NESTED_INIT.cpp NESTED_INIT-Seq.cpp + NESTED_INIT-StdPar.cpp NESTED_INIT-Hip.cpp NESTED_INIT-Cuda.cpp NESTED_INIT-OMP.cpp NESTED_INIT-OMPTarget.cpp PI_ATOMIC.cpp PI_ATOMIC-Seq.cpp + PI_ATOMIC-StdPar.cpp PI_ATOMIC-Hip.cpp PI_ATOMIC-Cuda.cpp PI_ATOMIC-OMP.cpp PI_ATOMIC-OMPTarget.cpp PI_REDUCE.cpp PI_REDUCE-Seq.cpp + PI_REDUCE-StdPar.cpp PI_REDUCE-Hip.cpp PI_REDUCE-Cuda.cpp PI_REDUCE-OMP.cpp PI_REDUCE-OMPTarget.cpp REDUCE3_INT.cpp REDUCE3_INT-Seq.cpp + REDUCE3_INT-StdPar.cpp REDUCE3_INT-Hip.cpp REDUCE3_INT-Cuda.cpp REDUCE3_INT-OMP.cpp REDUCE3_INT-OMPTarget.cpp REDUCE_STRUCT.cpp REDUCE_STRUCT-Seq.cpp + REDUCE_STRUCT-StdPar.cpp REDUCE_STRUCT-Hip.cpp REDUCE_STRUCT-Cuda.cpp REDUCE_STRUCT-OMP.cpp REDUCE_STRUCT-OMPTarget.cpp TRAP_INT.cpp TRAP_INT-Seq.cpp + TRAP_INT-StdPar.cpp TRAP_INT-Hip.cpp TRAP_INT-Cuda.cpp TRAP_INT-OMPTarget.cpp TRAP_INT-OMP.cpp + INIT3-StdPar.cpp DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} ) diff --git a/src/basic/DAXPY-StdPar.cpp b/src/basic/DAXPY-StdPar.cpp new file mode 100644 index 000000000..6ee417e53 --- /dev/null +++ b/src/basic/DAXPY-StdPar.cpp @@ -0,0 +1,87 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DAXPY.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void DAXPY::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + DAXPY_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + DAXPY_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto daxpy_lam = [=](Index_type i) { + DAXPY_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + daxpy_lam(i); + }); + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n DAXPY : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/basic/DAXPY.cpp b/src/basic/DAXPY.cpp index 67f4b0eb0..8b47600d8 100644 --- a/src/basic/DAXPY.cpp +++ b/src/basic/DAXPY.cpp @@ -52,6 +52,9 @@ DAXPY::DAXPY(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/DAXPY.hpp b/src/basic/DAXPY.hpp index bcaca8054..bb19f0022 100644 --- a/src/basic/DAXPY.hpp +++ b/src/basic/DAXPY.hpp @@ -52,6 +52,7 @@ class DAXPY : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/DAXPY_ATOMIC-StdPar.cpp b/src/basic/DAXPY_ATOMIC-StdPar.cpp new file mode 100644 index 000000000..6ad5f90a7 --- /dev/null +++ b/src/basic/DAXPY_ATOMIC-StdPar.cpp @@ -0,0 +1,120 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DAXPY_ATOMIC.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include +#include + +namespace rajaperf +{ +namespace basic +{ + + +void DAXPY_ATOMIC::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + DAXPY_ATOMIC_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { +#if defined(NVCXX_GPU_ENABLED) + //atomicAdd(&y[i],a * x[i]); + atomicaddd(&y[i],a * x[i]); +#elif defined(_OPENMP) + #pragma omp atomic + y[i] += a * x[i]; +#elif defined(_OPENACC) + #pragma acc atomic + y[i] += a * x[i]; +#elif __cpp_lib_atomic_ref + auto px = std::atomic_ref(x[i]); + auto py = std::atomic_ref(y[i]); + py += a * px; +#else +#warning No atomic + y[i] += a * x[i]; +#endif + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto daxpy_atomic_lam = [=](Index_type i) { +#if defined(NVCXX_GPU_ENABLED) + //atomicAdd(&y[i],a * x[i]); + atomicaddd(&y[i],a * x[i]); +#elif defined(_OPENMP) + #pragma omp atomic + y[i] += a * x[i]; +#elif defined(_OPENACC) + #pragma acc atomic + y[i] += a * x[i]; +#elif __cpp_lib_atomic_ref + auto px = std::atomic_ref(x[i]); + auto py = std::atomic_ref(y[i]); + py += a * px; +#else +#warning No atomic + y[i] += a * x[i]; +#endif + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + daxpy_atomic_lam(i); + }); + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n DAXPY_ATOMIC : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/basic/DAXPY_ATOMIC.cpp b/src/basic/DAXPY_ATOMIC.cpp index e58516d2f..b27dfedce 100644 --- a/src/basic/DAXPY_ATOMIC.cpp +++ b/src/basic/DAXPY_ATOMIC.cpp @@ -52,6 +52,9 @@ DAXPY_ATOMIC::DAXPY_ATOMIC(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + //setVariantDefined( Lambda_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/DAXPY_ATOMIC.hpp b/src/basic/DAXPY_ATOMIC.hpp index 9c2890e48..dc35161fd 100644 --- a/src/basic/DAXPY_ATOMIC.hpp +++ b/src/basic/DAXPY_ATOMIC.hpp @@ -55,6 +55,7 @@ class DAXPY_ATOMIC : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/IF_QUAD-StdPar.cpp b/src/basic/IF_QUAD-StdPar.cpp new file mode 100644 index 000000000..c36a7fcaa --- /dev/null +++ b/src/basic/IF_QUAD-StdPar.cpp @@ -0,0 +1,87 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "IF_QUAD.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void IF_QUAD::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + IF_QUAD_DATA_SETUP; + + auto ifquad_lam = [=](Index_type i) { + IF_QUAD_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + IF_QUAD_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + ifquad_lam(i); + }); + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n IF_QUAD : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/basic/IF_QUAD.cpp b/src/basic/IF_QUAD.cpp index b0fe3469b..c51d2ca47 100644 --- a/src/basic/IF_QUAD.cpp +++ b/src/basic/IF_QUAD.cpp @@ -56,6 +56,9 @@ IF_QUAD::IF_QUAD(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/IF_QUAD.hpp b/src/basic/IF_QUAD.hpp index f1f3e12a8..3bd4888b0 100644 --- a/src/basic/IF_QUAD.hpp +++ b/src/basic/IF_QUAD.hpp @@ -69,6 +69,7 @@ class IF_QUAD : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/INDEXLIST-StdPar.cpp b/src/basic/INDEXLIST-StdPar.cpp new file mode 100644 index 000000000..2da1c38b9 --- /dev/null +++ b/src/basic/INDEXLIST-StdPar.cpp @@ -0,0 +1,125 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INDEXLIST.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + +void INDEXLIST::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ +#if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + INDEXLIST_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + auto counts = std::vector(iend+1,0); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#if 1 + Index_type count = 0; + +#warning needs parallel something + for (Index_type i = ibegin; i < iend; ++i ) { + if ( x[i] < 0.0 ) { + list[count++] = i; + //y[i] = 1; + } + } + + m_len = count; +#else + std::transform_exclusive_scan( //std::execution:seq, + &x[ibegin], &x[iend], + &counts[0], 0, + std::plus{}, + [=](Real_type x){ return (x < 0.0); }); + + std::for_each_n( //std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + if (counts[i] != counts[i+1]) { \ + list[counts[i]] = i; + } + }); + + m_len = counts[iend+1]; +#endif + + if (irep == 0) { + //printf("\n\n%d\n",counts[iend]); + //for (Index_type i = ibegin, j=0; i < iend && j + +namespace rajaperf +{ +namespace basic +{ + +#define INDEXLIST_3LOOP_DATA_SETUP_StdPar \ + Index_type* counts = new Index_type[iend+1]; + +#define INDEXLIST_3LOOP_DATA_TEARDOWN_StdPar \ + delete[] counts; counts = nullptr; + + + +void INDEXLIST_3LOOP::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + INDEXLIST_3LOOP_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + INDEXLIST_3LOOP_DATA_SETUP_StdPar; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + counts[i] = (INDEXLIST_3LOOP_CONDITIONAL) ? 1 : 0; + }); + + // The validation does not notice if the exscan + // is removed, or otherwise forced to be wrong. + // Using brute-force validation (see below): + // Intel and GCC output 0s when any execution policy is used. + // NVHPC (GPU) is fine. + std::exclusive_scan( +#ifdef __NVCOMPILER + std::execution::par_unseq, +#endif + counts+ibegin, counts+iend+1, + counts+ibegin, 0); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + INDEXLIST_3LOOP_MAKE_LIST; + }); + + m_len = counts[iend]; + +#if BRUTE_FORCE_VALIDATION + for (Index_type i = ibegin; i < iend+1; ++i ) { + std::cout << "C: " << i << "," << counts[i] << "\n"; + } +#endif + } + stopTimer(); + + INDEXLIST_3LOOP_DATA_TEARDOWN_StdPar; + + break; + } + + case Lambda_StdPar : { + + INDEXLIST_3LOOP_DATA_SETUP_StdPar; + + auto indexlist_conditional_lam = [=](Index_type i) { + counts[i] = (INDEXLIST_3LOOP_CONDITIONAL) ? 1 : 0; + }; + + auto indexlist_make_list_lam = [=](Index_type i) { + INDEXLIST_3LOOP_MAKE_LIST; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + indexlist_conditional_lam(i); + }); + + // See comments above... + std::exclusive_scan( +#ifdef __NVCOMPILER + std::execution::par_unseq, +#endif + counts+ibegin, counts+iend+1, + counts+ibegin, 0); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + indexlist_make_list_lam(i); + }); + + m_len = counts[iend]; + + } + stopTimer(); + + INDEXLIST_3LOOP_DATA_TEARDOWN_StdPar; + + break; + } + + default : { + getCout() << "\n INDEXLIST_3LOOP : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/basic/INDEXLIST_3LOOP.cpp b/src/basic/INDEXLIST_3LOOP.cpp index 3ddb3fc0c..311477a53 100644 --- a/src/basic/INDEXLIST_3LOOP.cpp +++ b/src/basic/INDEXLIST_3LOOP.cpp @@ -58,6 +58,9 @@ INDEXLIST_3LOOP::INDEXLIST_3LOOP(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } INDEXLIST_3LOOP::~INDEXLIST_3LOOP() diff --git a/src/basic/INDEXLIST_3LOOP.hpp b/src/basic/INDEXLIST_3LOOP.hpp index e19ee5508..408c6483c 100644 --- a/src/basic/INDEXLIST_3LOOP.hpp +++ b/src/basic/INDEXLIST_3LOOP.hpp @@ -71,6 +71,7 @@ class INDEXLIST_3LOOP : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/INIT3-StdPar.cpp b/src/basic/INIT3-StdPar.cpp new file mode 100644 index 000000000..1817a1ee1 --- /dev/null +++ b/src/basic/INIT3-StdPar.cpp @@ -0,0 +1,87 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INIT3.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void INIT3::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + INIT3_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + INIT3_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto init3_lam = [=](Index_type i) { + INIT3_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + init3_lam(i); + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n INIT3 : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/basic/INIT3.cpp b/src/basic/INIT3.cpp index b2be64f84..a0fdb4763 100644 --- a/src/basic/INIT3.cpp +++ b/src/basic/INIT3.cpp @@ -52,6 +52,9 @@ INIT3::INIT3(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/INIT3.hpp b/src/basic/INIT3.hpp index aed67bfeb..d46abd1b9 100644 --- a/src/basic/INIT3.hpp +++ b/src/basic/INIT3.hpp @@ -55,6 +55,7 @@ class INIT3 : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/INIT_VIEW1D-StdPar.cpp b/src/basic/INIT_VIEW1D-StdPar.cpp new file mode 100644 index 000000000..1bbfce7f0 --- /dev/null +++ b/src/basic/INIT_VIEW1D-StdPar.cpp @@ -0,0 +1,87 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INIT_VIEW1D.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void INIT_VIEW1D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + INIT_VIEW1D_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + INIT_VIEW1D_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto initview1d_base_lam = [=](Index_type i) { + INIT_VIEW1D_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + initview1d_base_lam(i); + }); + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n INIT_VIEW1D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/basic/INIT_VIEW1D.cpp b/src/basic/INIT_VIEW1D.cpp index de34c5a28..9a470d387 100644 --- a/src/basic/INIT_VIEW1D.cpp +++ b/src/basic/INIT_VIEW1D.cpp @@ -53,6 +53,9 @@ INIT_VIEW1D::INIT_VIEW1D(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/INIT_VIEW1D.hpp b/src/basic/INIT_VIEW1D.hpp index f3770f69a..84d2f89a8 100644 --- a/src/basic/INIT_VIEW1D.hpp +++ b/src/basic/INIT_VIEW1D.hpp @@ -66,6 +66,7 @@ class INIT_VIEW1D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp new file mode 100644 index 000000000..e841874a3 --- /dev/null +++ b/src/basic/INIT_VIEW1D_OFFSET-StdPar.cpp @@ -0,0 +1,87 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INIT_VIEW1D_OFFSET.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void INIT_VIEW1D_OFFSET::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 1; + const Index_type iend = getActualProblemSize()+1; + + INIT_VIEW1D_OFFSET_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + INIT_VIEW1D_OFFSET_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto initview1doffset_base_lam = [=](Index_type i) { + INIT_VIEW1D_OFFSET_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + initview1doffset_base_lam(i); + }); + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n INIT_VIEW1D_OFFSET : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/basic/INIT_VIEW1D_OFFSET.cpp b/src/basic/INIT_VIEW1D_OFFSET.cpp index fe1867698..299a4ea27 100644 --- a/src/basic/INIT_VIEW1D_OFFSET.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET.cpp @@ -53,6 +53,9 @@ INIT_VIEW1D_OFFSET::INIT_VIEW1D_OFFSET(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/INIT_VIEW1D_OFFSET.hpp b/src/basic/INIT_VIEW1D_OFFSET.hpp index d32f59c7b..74c5e82a3 100644 --- a/src/basic/INIT_VIEW1D_OFFSET.hpp +++ b/src/basic/INIT_VIEW1D_OFFSET.hpp @@ -65,6 +65,7 @@ class INIT_VIEW1D_OFFSET : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/MAT_MAT_SHARED-StdPar.cpp b/src/basic/MAT_MAT_SHARED-StdPar.cpp new file mode 100644 index 000000000..b7814c1ae --- /dev/null +++ b/src/basic/MAT_MAT_SHARED-StdPar.cpp @@ -0,0 +1,36 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MAT_MAT_SHARED.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + + +#include + +namespace rajaperf { +namespace basic { + +void MAT_MAT_SHARED::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + + switch (vid) { + default: { + getCout() << "\n MAT_MAT_SHARED : Unknown variant id = " << vid + << std::endl; + } + } +} + +} // end namespace basic +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/basic/MAT_MAT_SHARED.hpp b/src/basic/MAT_MAT_SHARED.hpp index 095721c27..c18682960 100644 --- a/src/basic/MAT_MAT_SHARED.hpp +++ b/src/basic/MAT_MAT_SHARED.hpp @@ -139,6 +139,7 @@ class MAT_MAT_SHARED : public KernelBase { void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/MULADDSUB-StdPar.cpp b/src/basic/MULADDSUB-StdPar.cpp new file mode 100644 index 000000000..7b71fb648 --- /dev/null +++ b/src/basic/MULADDSUB-StdPar.cpp @@ -0,0 +1,87 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MULADDSUB.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void MULADDSUB::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MULADDSUB_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + MULADDSUB_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto mas_lam = [=](Index_type i) { + MULADDSUB_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + mas_lam(i); + }); + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n MULADDSUB : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/basic/MULADDSUB.cpp b/src/basic/MULADDSUB.cpp index 4ae8d6868..714cad544 100644 --- a/src/basic/MULADDSUB.cpp +++ b/src/basic/MULADDSUB.cpp @@ -52,6 +52,9 @@ MULADDSUB::MULADDSUB(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/MULADDSUB.hpp b/src/basic/MULADDSUB.hpp index e604a34c8..97b800569 100644 --- a/src/basic/MULADDSUB.hpp +++ b/src/basic/MULADDSUB.hpp @@ -58,6 +58,7 @@ class MULADDSUB : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/NESTED_INIT-Seq.cpp b/src/basic/NESTED_INIT-Seq.cpp index 0a9c81ff6..917c40315 100644 --- a/src/basic/NESTED_INIT-Seq.cpp +++ b/src/basic/NESTED_INIT-Seq.cpp @@ -39,6 +39,7 @@ void NESTED_INIT::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_i for (Index_type j = 0; j < nj; ++j ) { for (Index_type i = 0; i < ni; ++i ) { NESTED_INIT_BODY; + //std::cout << i << "," << j << "," << k << ";" << k*nj*ni+j*ni+i << " SEQ\n"; } } } diff --git a/src/basic/NESTED_INIT-StdPar.cpp b/src/basic/NESTED_INIT-StdPar.cpp new file mode 100644 index 000000000..575c6e9e3 --- /dev/null +++ b/src/basic/NESTED_INIT-StdPar.cpp @@ -0,0 +1,115 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "NESTED_INIT.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void NESTED_INIT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + NESTED_INIT_DATA_SETUP; + + auto nestedinit_lam = [=](Index_type i, Index_type j, Index_type k) { + NESTED_INIT_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nj*nk, + [=](Index_type ijk) { + const auto k = ijk / (nj*ni); + const auto ij = ijk % (nj*ni); + const auto j = ij / ni; + const auto i = ij % ni; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nk, + [=](Index_type k) { + for (Index_type j = 0; j < nj; ++j ) + for (Index_type i = 0; i < ni; ++i ) +#endif + { + NESTED_INIT_BODY; + //getCout() << i << "," << j << "," << k << ";" << ijk << " PAR\n"; + } + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nj*nk, + [=](Index_type ijk) { + const auto k = ijk / (nj*ni); + const auto ij = ijk % (nj*ni); + const auto j = ij / ni; + const auto i = ij % ni; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nk, + [=](Index_type k) { + for (Index_type j = 0; j < nj; ++j ) + for (Index_type i = 0; i < ni; ++i ) +#endif + { + nestedinit_lam(i, j, k); + } + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n NESTED_INIT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/basic/NESTED_INIT.cpp b/src/basic/NESTED_INIT.cpp index 4b9183245..1bec8a9fd 100644 --- a/src/basic/NESTED_INIT.cpp +++ b/src/basic/NESTED_INIT.cpp @@ -63,6 +63,9 @@ NESTED_INIT::NESTED_INIT(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/NESTED_INIT.hpp b/src/basic/NESTED_INIT.hpp index ccaf7079e..a4f2c00c0 100644 --- a/src/basic/NESTED_INIT.hpp +++ b/src/basic/NESTED_INIT.hpp @@ -58,6 +58,7 @@ class NESTED_INIT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/PI_ATOMIC-StdPar.cpp b/src/basic/PI_ATOMIC-StdPar.cpp new file mode 100644 index 000000000..f33bc369b --- /dev/null +++ b/src/basic/PI_ATOMIC-StdPar.cpp @@ -0,0 +1,112 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "PI_ATOMIC.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#ifndef _OPENMP +#error Currently, OpenMP atomics are required here. +#endif + +#if defined(__NVCOMPILER_CUDA__) || defined(_NVHPC_STDPAR_CUDA) +#include +typedef cuda::std::atomic myAtomic; +#else +// .fetch_add() for double is not available yet... +#include +typedef std::atomic myAtomic; +#endif + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void PI_ATOMIC::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + PI_ATOMIC_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + //myAtomic a_pi{m_pi_init}; + *pi = m_pi_init; + std::for_each_n( std::execution::par, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + double x = (double(i) + 0.5) * dx; + _Pragma("omp atomic") + *pi += dx / (1.0 + x * x); + //a_pi.fetch_add(dx / (1.0 + x * x)); + }); + //*pi = a_pi * 4.0; + *pi *= 4.0; + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto piatomic_base_lam = [=](Index_type i) { + double x = (double(i) + 0.5) * dx; + _Pragma("omp atomic") + *pi += dx / (1.0 + x * x); + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + *pi = m_pi_init; + std::for_each_n( std::execution::par, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + piatomic_base_lam(i); + }); + *pi *= 4.0; + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/basic/PI_ATOMIC.cpp b/src/basic/PI_ATOMIC.cpp index e1f93dd2f..bcdf59440 100644 --- a/src/basic/PI_ATOMIC.cpp +++ b/src/basic/PI_ATOMIC.cpp @@ -54,6 +54,9 @@ PI_ATOMIC::PI_ATOMIC(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/PI_ATOMIC.hpp b/src/basic/PI_ATOMIC.hpp index 803d6202f..20cf38a68 100644 --- a/src/basic/PI_ATOMIC.hpp +++ b/src/basic/PI_ATOMIC.hpp @@ -54,6 +54,7 @@ class PI_ATOMIC : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/PI_REDUCE-StdPar.cpp b/src/basic/PI_REDUCE-StdPar.cpp new file mode 100644 index 000000000..cd466a225 --- /dev/null +++ b/src/basic/PI_REDUCE-StdPar.cpp @@ -0,0 +1,98 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "PI_REDUCE.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void PI_REDUCE::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + PI_REDUCE_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type pi = m_pi_init; + pi += std::transform_reduce( std::execution::par_unseq, + begin, end, + Real_type(0), std::plus(), + [=](Index_type i) { + Real_type x = (Real_type(i) + 0.5) * dx; + return dx / (1.0 + x * x); + }); + m_pi = 4.0 * pi; + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto pireduce_base_lam = [=](Index_type i) -> Real_type { + Real_type x = (Real_type(i) + 0.5) * dx; + return dx / (1.0 + x * x); + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type pi = m_pi_init; + + pi += std::transform_reduce( std::execution::par_unseq, + begin, end, + Real_type(0), std::plus(), pireduce_base_lam); + + m_pi = 4.0 * pi; + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n PI_REDUCE : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/basic/PI_REDUCE.cpp b/src/basic/PI_REDUCE.cpp index 84c38ce67..482fc5811 100644 --- a/src/basic/PI_REDUCE.cpp +++ b/src/basic/PI_REDUCE.cpp @@ -51,6 +51,9 @@ PI_REDUCE::PI_REDUCE(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } PI_REDUCE::~PI_REDUCE() diff --git a/src/basic/PI_REDUCE.hpp b/src/basic/PI_REDUCE.hpp index 49fca096d..2c2548909 100644 --- a/src/basic/PI_REDUCE.hpp +++ b/src/basic/PI_REDUCE.hpp @@ -56,6 +56,7 @@ class PI_REDUCE : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/REDUCE3_INT-StdPar.cpp b/src/basic/REDUCE3_INT-StdPar.cpp new file mode 100644 index 000000000..ac4abcf0d --- /dev/null +++ b/src/basic/REDUCE3_INT-StdPar.cpp @@ -0,0 +1,117 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "REDUCE3_INT.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void REDUCE3_INT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + REDUCE3_INT_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + typedef std::array Reduce_type; + Reduce_type result = + std::transform_reduce( std::execution::par_unseq, + begin, end, + Reduce_type{m_vsum_init,m_vmin_init,m_vmax_init}, + [=](Reduce_type a, Reduce_type b) -> Reduce_type { + auto plus = a[0] + b[0]; + auto min = std::min(a[1],b[1]); + auto max = std::max(a[2],b[2]); + Reduce_type red{ plus, min, max }; + return red; + }, + [=](Index_type i) -> std::array{ + Reduce_type val{ vec[i], vec[i], vec[i] }; + return val; + + } + ); + + m_vsum += result[0]; + m_vmin = std::min(m_vmin, result[1]); + m_vmax = std::max(m_vmax, result[2]); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto init3_base_lam = [=](Index_type i) -> Int_type { + return vec[i]; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Int_type vsum = m_vsum_init; + Int_type vmin = m_vmin_init; + Int_type vmax = m_vmax_init; + + for (Index_type i = ibegin; i < iend; ++i ) { + vsum += init3_base_lam(i); + vmin = std::min(vmin, init3_base_lam(i)); + vmax = std::max(vmax, init3_base_lam(i)); + } + + m_vsum += vsum; + m_vmin = std::min(m_vmin, vmin); + m_vmax = std::max(m_vmax, vmax); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n REDUCE3_INT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/basic/REDUCE3_INT.cpp b/src/basic/REDUCE3_INT.cpp index 975bf8f24..0fc262ea9 100644 --- a/src/basic/REDUCE3_INT.cpp +++ b/src/basic/REDUCE3_INT.cpp @@ -57,6 +57,9 @@ REDUCE3_INT::REDUCE3_INT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/REDUCE3_INT.hpp b/src/basic/REDUCE3_INT.hpp index e82c2cf05..a89435750 100644 --- a/src/basic/REDUCE3_INT.hpp +++ b/src/basic/REDUCE3_INT.hpp @@ -70,6 +70,7 @@ class REDUCE3_INT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/REDUCE_STRUCT-StdPar.cpp b/src/basic/REDUCE_STRUCT-StdPar.cpp new file mode 100644 index 000000000..e82cc98ee --- /dev/null +++ b/src/basic/REDUCE_STRUCT-StdPar.cpp @@ -0,0 +1,107 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "REDUCE_STRUCT.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void REDUCE_STRUCT::runStdParVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + REDUCE_STRUCT_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type xsum = m_init_sum; Real_type ysum = m_init_sum; + Real_type xmin = m_init_min; Real_type ymin = m_init_min; + Real_type xmax = m_init_max; Real_type ymax = m_init_max; + + using Reduce_type = std::array; + Reduce_type result = + std::transform_reduce( std::execution::par_unseq, + begin, end, + Reduce_type{ m_init_sum, m_init_min, m_init_max, // x + m_init_sum, m_init_min, m_init_max }, // y + [=](Reduce_type a, Reduce_type b) -> Reduce_type { + auto xsum = a[0] + b[0]; + auto xmin = std::min(a[1],b[1]); + auto xmax = std::max(a[2],b[2]); + auto ysum = a[3] + b[3]; + auto ymin = std::min(a[4],b[4]); + auto ymax = std::max(a[5],b[5]); + Reduce_type red{ xsum, xmin, xmax, ysum, ymin, ymax }; + return red; + }, + [=](Index_type i) -> Reduce_type { + Reduce_type val{ points.x[i], points.x[i], points.x[i], + points.y[i], points.y[i], points.y[i] }; + return val; + + } + ); + + xsum = result[0]; + xmin = result[1]; + xmax = result[2]; + ysum = result[3]; + ymin = result[4]; + ymax = result[5]; + + points.SetCenter(xsum/(points.N), ysum/(points.N)); + points.SetXMin(xmin); + points.SetXMax(xmax); + points.SetYMin(ymin); + points.SetYMax(ymax); + m_points=points; + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n REDUCE_STRUCT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/basic/REDUCE_STRUCT.cpp b/src/basic/REDUCE_STRUCT.cpp index 5edaadede..eb78bb21b 100644 --- a/src/basic/REDUCE_STRUCT.cpp +++ b/src/basic/REDUCE_STRUCT.cpp @@ -56,6 +56,9 @@ REDUCE_STRUCT::REDUCE_STRUCT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + //setVariantDefined( Lambda_StdPar ); } REDUCE_STRUCT::~REDUCE_STRUCT() diff --git a/src/basic/REDUCE_STRUCT.hpp b/src/basic/REDUCE_STRUCT.hpp index 425e7796e..440964704 100644 --- a/src/basic/REDUCE_STRUCT.hpp +++ b/src/basic/REDUCE_STRUCT.hpp @@ -86,6 +86,7 @@ class REDUCE_STRUCT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/TRAP_INT-StdPar.cpp b/src/basic/TRAP_INT-StdPar.cpp new file mode 100644 index 000000000..c2f22206b --- /dev/null +++ b/src/basic/TRAP_INT-StdPar.cpp @@ -0,0 +1,112 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "TRAP_INT.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + +// +// Function used in TRAP_INT loop. +// +RAJA_INLINE +Real_type trap_int_func(Real_type x, + Real_type y, + Real_type xp, + Real_type yp) +{ + Real_type denom = (x - xp)*(x - xp) + (y - yp)*(y - yp); + denom = 1.0/sqrt(denom); + return denom; +} + +void TRAP_INT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + TRAP_INT_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type sumx = m_sumx_init; + + sumx += std::transform_reduce( std::execution::par_unseq, + begin, end, + Real_type(0), std::plus(), + [=](Index_type i) { + Real_type x = x0 + i*h; + return trap_int_func(x, y, xp, yp); + }); + m_sumx += sumx * h; + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto trapint_base_lam = [=](Index_type i) -> Real_type { + Real_type x = x0 + i*h; + return trap_int_func(x, y, xp, yp); + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type sumx = m_sumx_init; + + sumx += std::transform_reduce( std::execution::par_unseq, + begin, end, + Real_type(0), std::plus(), trapint_base_lam); + + m_sumx += sumx * h; + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n TRAP_INT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/basic/TRAP_INT.cpp b/src/basic/TRAP_INT.cpp index eaac3ffda..6b4dc8a3c 100644 --- a/src/basic/TRAP_INT.cpp +++ b/src/basic/TRAP_INT.cpp @@ -52,6 +52,9 @@ TRAP_INT::TRAP_INT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/basic/TRAP_INT.hpp b/src/basic/TRAP_INT.hpp index e64932dbe..53650e761 100644 --- a/src/basic/TRAP_INT.hpp +++ b/src/basic/TRAP_INT.hpp @@ -67,6 +67,7 @@ class TRAP_INT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/common/Executor.cpp b/src/common/Executor.cpp index a41e3fd00..40d6ecd9d 100644 --- a/src/common/Executor.cpp +++ b/src/common/Executor.cpp @@ -1071,10 +1071,10 @@ void Executor::writeCSVReport(ostream& file, CSVRepMode mode, if ( (mode == CSVRepMode::Speedup) && (!kern->hasVariantTuningDefined(reference_vid, reference_tune_idx) || !kern->hasVariantTuningDefined(vid, tuning_name)) ) { - file << "Not run"; + file << "NotRun"; } else if ( (mode == CSVRepMode::Timing) && !kern->hasVariantTuningDefined(vid, tuning_name) ) { - file << "Not run"; + file << "NotRun"; } else { file << setprecision(prec) << std::fixed << getReportDataEntry(mode, combiner, kern, vid, diff --git a/src/common/KernelBase.cpp b/src/common/KernelBase.cpp index c620c4880..7a86651b1 100644 --- a/src/common/KernelBase.cpp +++ b/src/common/KernelBase.cpp @@ -129,6 +129,14 @@ void KernelBase::setVariantDefined(VariantID vid) #endif break; } + + case Base_StdPar : + case Lambda_StdPar : + { + setStdParTuningDefinitions(vid); + break; + } + // Required for running Kokkos case Kokkos_Lambda : { @@ -246,6 +254,14 @@ void KernelBase::runKernel(VariantID vid, size_t tune_idx) #endif break; } + + case Base_StdPar : + case Lambda_StdPar : + { + runStdParVariant(vid, tune_idx); + break; + } + case Kokkos_Lambda : { #if defined(RUN_KOKKOS) diff --git a/src/common/KernelBase.hpp b/src/common/KernelBase.hpp index cec79e2eb..eb00f8672 100644 --- a/src/common/KernelBase.hpp +++ b/src/common/KernelBase.hpp @@ -101,6 +101,9 @@ class KernelBase { addVariantTuningName(vid, getDefaultTuningName()); } #endif + virtual void setStdParTuningDefinitions(VariantID vid) + { addVariantTuningName(vid, getDefaultTuningName()); } + // // Getter methods used to generate kernel execution summary // and kernel details report ouput. @@ -333,6 +336,7 @@ class KernelBase #if defined(RAJA_ENABLE_TARGET_OPENMP) virtual void runOpenMPTargetVariant(VariantID vid, size_t tune_idx) = 0; #endif + virtual void runStdParVariant(VariantID vid, size_t tune_idx) = 0; #if defined(RUN_KOKKOS) virtual void runKokkosVariant(VariantID vid, size_t tune_idx) diff --git a/src/common/RAJAPerfSuite.cpp b/src/common/RAJAPerfSuite.cpp index da6683907..5516359ed 100644 --- a/src/common/RAJAPerfSuite.cpp +++ b/src/common/RAJAPerfSuite.cpp @@ -278,6 +278,9 @@ static const std::string VariantNames [] = std::string("Lambda_HIP"), std::string("RAJA_HIP"), + std::string("Base_StdPar"), + std::string("Lambda_StdPar"), + std::string("Kokkos_Lambda"), std::string("Unknown Variant") // Keep this at the end and DO NOT remove.... @@ -424,6 +427,11 @@ bool isVariantAvailable(VariantID vid) } #endif + if ( vid == Base_StdPar || + vid == Lambda_StdPar) { + ret_val = true; + } + #if defined(RUN_KOKKOS) if ( vid == Kokkos_Lambda ) { ret_val = true; @@ -485,6 +493,11 @@ bool isVariantGPU(VariantID vid) } #endif + if ( vid == Base_StdPar || + vid == Lambda_StdPar) { + ret_val = true; + } + #if defined(RUN_KOKKOS) if ( vid == Kokkos_Lambda ) { ret_val = true; diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index 1f4da8c9b..cb7c914dd 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -194,6 +194,9 @@ enum VariantID { Lambda_HIP, RAJA_HIP, + Base_StdPar, + Lambda_StdPar, + Kokkos_Lambda, NumVariants // Keep this one last and NEVER comment out (!!) diff --git a/src/common/StdParUtils.hpp b/src/common/StdParUtils.hpp new file mode 100644 index 000000000..82ae2fa89 --- /dev/null +++ b/src/common/StdParUtils.hpp @@ -0,0 +1,133 @@ +/* +Copyright (c) 2021, NVIDIA +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#if defined(__NVCOMPILER_CUDA__) || defined(_NVHPC_STDPAR_CUDA) +#define USE_STDPAR_COLLAPSE +#define NVCXX_GPU_ENABLED +#endif + +#if ( defined(__NVCOMPILER_CUDA__) || defined(_NVHPC_STDPAR_CUDA)) +#define NVHPC_CALC_VERSION(MAJOR, MINOR, PATCH) (((MAJOR) * 10000) + ((MINOR) * 100) + (PATCH)) +#if NVHPC_CALC_VERSION(__NVCOMPILER_MAJOR__,__NVCOMPILER_MINOR__,__NVCOMPILER_PATCHLEVEL__) < 220900 +static inline void std::__throw_bad_array_new_length() { std::abort(); } +#endif +#endif + +#if 0 //defined(_NVHPC_STDPAR_MULTICORE) +#warning COLLAPSE (TESTING ONLY - DISABLE IN PRODUCTION) +#define USE_STDPAR_COLLAPSE +#endif + +#if defined(NVCXX_GPU_ENABLED) +// this is required to get NVC++ to compile CUDA atomics in StdPar +#include +#endif + +// This implementation was authored by David Olsen + +#include +#include +#include +#include + +template +struct counting_iterator { + +private: + typedef counting_iterator self; + +public: + typedef T value_type; + typedef typename std::make_signed::type difference_type; + typedef T const* pointer; + typedef T const& reference; + typedef std::random_access_iterator_tag iterator_category; + + explicit counting_iterator(value_type v) : value(v) { } + + value_type operator*() const { return value; } + value_type operator[](difference_type n) const { return value + n; } + + self& operator++() { ++value; return *this; } + self operator++(int) { + self result{value}; + ++value; + return result; + } + self& operator--() { --value; return *this; } + self operator--(int) { + self result{value}; + --value; + return result; + } + self& operator+=(difference_type n) { value += n; return *this; } + self& operator-=(difference_type n) { value -= n; return *this; } + + friend self operator+(self const& i, difference_type n) { + return self(i.value + n); + } + friend self operator+(difference_type n, self const& i) { + return self(i.value + n); + } + friend difference_type operator-(self const& x, self const& y) { + return x.value - y.value; + } + friend self operator-(self const& i, difference_type n) { + return self(i.value - n); + } + + friend bool operator==(self const& x, self const& y) { + return x.value == y.value; + } + friend bool operator!=(self const& x, self const& y) { + return x.value != y.value; + } + friend bool operator<(self const& x, self const& y) { + return x.value < y.value; + } + friend bool operator<=(self const& x, self const& y) { + return x.value <= y.value; + } + friend bool operator>(self const& x, self const& y) { + return x.value > y.value; + } + friend bool operator>=(self const& x, self const& y) { + return x.value >= y.value; + } +private: + value_type value; +}; + +template ::value>::type> +inline counting_iterator make_counter(T value) { + return counting_iterator{value}; +} + diff --git a/src/lcals/CMakeLists.txt b/src/lcals/CMakeLists.txt index f767bbd0b..06bf8d7cb 100644 --- a/src/lcals/CMakeLists.txt +++ b/src/lcals/CMakeLists.txt @@ -74,5 +74,16 @@ blt_add_library( TRIDIAG_ELIM-Cuda.cpp TRIDIAG_ELIM-OMP.cpp TRIDIAG_ELIM-OMPTarget.cpp + DIFF_PREDICT-StdPar.cpp + EOS-StdPar.cpp + FIRST_DIFF-StdPar.cpp + FIRST_MIN-StdPar.cpp + FIRST_SUM-StdPar.cpp + GEN_LIN_RECUR-StdPar.cpp + HYDRO_1D-StdPar.cpp + HYDRO_2D-StdPar.cpp + INT_PREDICT-StdPar.cpp + PLANCKIAN-StdPar.cpp + TRIDIAG_ELIM-StdPar.cpp DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} ) diff --git a/src/lcals/DIFF_PREDICT-StdPar.cpp b/src/lcals/DIFF_PREDICT-StdPar.cpp new file mode 100644 index 000000000..873703bcf --- /dev/null +++ b/src/lcals/DIFF_PREDICT-StdPar.cpp @@ -0,0 +1,88 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DIFF_PREDICT.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void DIFF_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + DIFF_PREDICT_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { + DIFF_PREDICT_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto diffpredict_lam = [=](Index_type i) { + DIFF_PREDICT_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { + diffpredict_lam(i); + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n DIFF_PREDICT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/lcals/DIFF_PREDICT.cpp b/src/lcals/DIFF_PREDICT.cpp index 57bb9fb39..676d5f447 100644 --- a/src/lcals/DIFF_PREDICT.cpp +++ b/src/lcals/DIFF_PREDICT.cpp @@ -51,6 +51,9 @@ DIFF_PREDICT::DIFF_PREDICT(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Kokkos_Lambda ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } DIFF_PREDICT::~DIFF_PREDICT() diff --git a/src/lcals/DIFF_PREDICT.hpp b/src/lcals/DIFF_PREDICT.hpp index 3a583381b..7d4868204 100644 --- a/src/lcals/DIFF_PREDICT.hpp +++ b/src/lcals/DIFF_PREDICT.hpp @@ -94,6 +94,7 @@ class DIFF_PREDICT : public KernelBase void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/EOS-StdPar.cpp b/src/lcals/EOS-StdPar.cpp new file mode 100644 index 000000000..eb74b434f --- /dev/null +++ b/src/lcals/EOS-StdPar.cpp @@ -0,0 +1,88 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "EOS.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void EOS::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + EOS_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { + EOS_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto eos_lam = [=](Index_type i) { + EOS_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { + eos_lam(i); + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n EOS : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/lcals/EOS.cpp b/src/lcals/EOS.cpp index 69ffa4bc3..4d9cf5720 100644 --- a/src/lcals/EOS.cpp +++ b/src/lcals/EOS.cpp @@ -59,6 +59,9 @@ EOS::EOS(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Kokkos_Lambda ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } EOS::~EOS() diff --git a/src/lcals/EOS.hpp b/src/lcals/EOS.hpp index 9cc202a02..3efe5cee0 100644 --- a/src/lcals/EOS.hpp +++ b/src/lcals/EOS.hpp @@ -63,6 +63,7 @@ class EOS : public KernelBase void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/FIRST_DIFF-StdPar.cpp b/src/lcals/FIRST_DIFF-StdPar.cpp new file mode 100644 index 000000000..9bf083a19 --- /dev/null +++ b/src/lcals/FIRST_DIFF-StdPar.cpp @@ -0,0 +1,88 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "FIRST_DIFF.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void FIRST_DIFF::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + FIRST_DIFF_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { + FIRST_DIFF_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto firstdiff_lam = [=](Index_type i) { + FIRST_DIFF_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { + firstdiff_lam(i); + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n FIRST_DIFF : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/lcals/FIRST_DIFF.cpp b/src/lcals/FIRST_DIFF.cpp index df13c2f39..b878d2c6c 100644 --- a/src/lcals/FIRST_DIFF.cpp +++ b/src/lcals/FIRST_DIFF.cpp @@ -29,7 +29,7 @@ FIRST_DIFF::FIRST_DIFF(const RunParams& params) m_N = getActualProblemSize()+1; setItsPerRep( getActualProblemSize() ); - setItsPerRep( getActualProblemSize() ); + setItsPerRep( getActualProblemSize() ); // why twice? setKernelsPerRep(1); setBytesPerRep( (1*sizeof(Real_type) + 0*sizeof(Real_type)) * getActualProblemSize() + (0*sizeof(Real_type) + 1*sizeof(Real_type)) * m_N ); @@ -55,6 +55,9 @@ FIRST_DIFF::FIRST_DIFF(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Kokkos_Lambda ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } FIRST_DIFF::~FIRST_DIFF() diff --git a/src/lcals/FIRST_DIFF.hpp b/src/lcals/FIRST_DIFF.hpp index f3f6424f0..1fd314b75 100644 --- a/src/lcals/FIRST_DIFF.hpp +++ b/src/lcals/FIRST_DIFF.hpp @@ -53,6 +53,7 @@ class FIRST_DIFF : public KernelBase void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/FIRST_MIN-StdPar.cpp b/src/lcals/FIRST_MIN-StdPar.cpp new file mode 100644 index 000000000..0f9067ea7 --- /dev/null +++ b/src/lcals/FIRST_MIN-StdPar.cpp @@ -0,0 +1,105 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "FIRST_MIN.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void FIRST_MIN::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + FIRST_MIN_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#if 0 + auto result = + std::min_element( std::execution::par_unseq, + &x[ibegin], &x[iend]); + auto loc = std::distance(&x[ibegin], result); + m_minloc = std::max(m_minloc, loc); +#else + FIRST_MIN_MINLOC_INIT; + for (Index_type i = ibegin; i < iend; ++i ) { + if ( x[i] < mymin.val ) { + mymin.val = x[i]; + mymin.loc = i; + } + } + m_minloc = mymin.loc; +#endif + + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto firstmin_base_lam = [=](Index_type i) -> Real_type { + return x[i]; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + FIRST_MIN_MINLOC_INIT; + + for (Index_type i = ibegin; i < iend; ++i ) { + if ( firstmin_base_lam(i) < mymin.val ) { + mymin.val = x[i]; + mymin.loc = i; + } + } + + m_minloc = std::max(m_minloc, mymin.loc); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n FIRST_MIN : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/lcals/FIRST_MIN.cpp b/src/lcals/FIRST_MIN.cpp index 427cbe0a6..e4ebe5f1d 100644 --- a/src/lcals/FIRST_MIN.cpp +++ b/src/lcals/FIRST_MIN.cpp @@ -59,6 +59,9 @@ FIRST_MIN::FIRST_MIN(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Kokkos_Lambda ); + + setVariantDefined( Base_StdPar ); + //setVariantDefined( Lambda_StdPar ); } FIRST_MIN::~FIRST_MIN() diff --git a/src/lcals/FIRST_MIN.hpp b/src/lcals/FIRST_MIN.hpp index dd00d4392..77e05fc59 100644 --- a/src/lcals/FIRST_MIN.hpp +++ b/src/lcals/FIRST_MIN.hpp @@ -80,6 +80,7 @@ class FIRST_MIN : public KernelBase void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/FIRST_SUM-StdPar.cpp b/src/lcals/FIRST_SUM-StdPar.cpp new file mode 100644 index 000000000..8c2881b02 --- /dev/null +++ b/src/lcals/FIRST_SUM-StdPar.cpp @@ -0,0 +1,88 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "FIRST_SUM.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void FIRST_SUM::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 1; + const Index_type iend = getActualProblemSize(); + + FIRST_SUM_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { + FIRST_SUM_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto firstsum_lam = [=](Index_type i) { + FIRST_SUM_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { + firstsum_lam(i); + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n FIRST_SUM : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/lcals/FIRST_SUM.cpp b/src/lcals/FIRST_SUM.cpp index d5559cd6c..bf0a6ec1f 100644 --- a/src/lcals/FIRST_SUM.cpp +++ b/src/lcals/FIRST_SUM.cpp @@ -54,6 +54,9 @@ FIRST_SUM::FIRST_SUM(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Kokkos_Lambda ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } FIRST_SUM::~FIRST_SUM() diff --git a/src/lcals/FIRST_SUM.hpp b/src/lcals/FIRST_SUM.hpp index 59c1c0bfd..a73cac474 100644 --- a/src/lcals/FIRST_SUM.hpp +++ b/src/lcals/FIRST_SUM.hpp @@ -56,6 +56,7 @@ class FIRST_SUM : public KernelBase void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/GEN_LIN_RECUR-StdPar.cpp b/src/lcals/GEN_LIN_RECUR-StdPar.cpp new file mode 100644 index 000000000..bcf188c70 --- /dev/null +++ b/src/lcals/GEN_LIN_RECUR-StdPar.cpp @@ -0,0 +1,101 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "GEN_LIN_RECUR.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void GEN_LIN_RECUR::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + GEN_LIN_RECUR_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type k) { + GEN_LIN_RECUR_BODY1; + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N, + [=](Index_type i) { + GEN_LIN_RECUR_BODY2; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto genlinrecur_lam1 = [=](Index_type k) { + GEN_LIN_RECUR_BODY1; + }; + auto genlinrecur_lam2 = [=](Index_type i) { + GEN_LIN_RECUR_BODY2; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type k) { + genlinrecur_lam1(k); + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N, + [=](Index_type i) { + genlinrecur_lam2(i); + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n GEN_LIN_RECUR : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/lcals/GEN_LIN_RECUR.cpp b/src/lcals/GEN_LIN_RECUR.cpp index 340219129..c6dd2bcfd 100644 --- a/src/lcals/GEN_LIN_RECUR.cpp +++ b/src/lcals/GEN_LIN_RECUR.cpp @@ -59,6 +59,9 @@ GEN_LIN_RECUR::GEN_LIN_RECUR(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Kokkos_Lambda ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } GEN_LIN_RECUR::~GEN_LIN_RECUR() diff --git a/src/lcals/GEN_LIN_RECUR.hpp b/src/lcals/GEN_LIN_RECUR.hpp index 9586a69b4..9daaac57a 100644 --- a/src/lcals/GEN_LIN_RECUR.hpp +++ b/src/lcals/GEN_LIN_RECUR.hpp @@ -77,6 +77,7 @@ class GEN_LIN_RECUR : public KernelBase void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/HYDRO_1D-StdPar.cpp b/src/lcals/HYDRO_1D-StdPar.cpp new file mode 100644 index 000000000..c458fdf64 --- /dev/null +++ b/src/lcals/HYDRO_1D-StdPar.cpp @@ -0,0 +1,88 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "HYDRO_1D.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void HYDRO_1D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + HYDRO_1D_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { + HYDRO_1D_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto hydro1d_lam = [=](Index_type i) { + HYDRO_1D_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { + hydro1d_lam(i); + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n HYDRO_1D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/lcals/HYDRO_1D.cpp b/src/lcals/HYDRO_1D.cpp index 16a251e2d..ea1853001 100644 --- a/src/lcals/HYDRO_1D.cpp +++ b/src/lcals/HYDRO_1D.cpp @@ -58,6 +58,9 @@ HYDRO_1D::HYDRO_1D(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Kokkos_Lambda ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } HYDRO_1D::~HYDRO_1D() diff --git a/src/lcals/HYDRO_1D.hpp b/src/lcals/HYDRO_1D.hpp index dd61f112c..45fe3b1d9 100644 --- a/src/lcals/HYDRO_1D.hpp +++ b/src/lcals/HYDRO_1D.hpp @@ -58,6 +58,7 @@ class HYDRO_1D : public KernelBase void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/HYDRO_2D-StdPar.cpp b/src/lcals/HYDRO_2D-StdPar.cpp new file mode 100644 index 000000000..da131a2b6 --- /dev/null +++ b/src/lcals/HYDRO_2D-StdPar.cpp @@ -0,0 +1,211 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "HYDRO_2D.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace lcals +{ + +void HYDRO_2D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type kbeg = 1; + const Index_type kend = m_kn - 1; + const Index_type jbeg = 1; + const Index_type jend = m_jn - 1; + +#ifdef USE_STDPAR_COLLAPSE + // this is going to run from [(0,0),..] + // we will add (1,1) later + const auto nk = kend-kbeg; + const auto nj = jend-jbeg; +#endif + + HYDRO_2D_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nk*nj, + [=](Index_type kj) { + const auto k = kbeg + kj / nj; + const auto j = jbeg + kj % nj; +#else + std::for_each_n( std::execution::par, + counting_iterator(kbeg), kend-kbeg, + [=](Index_type k) { + std::for_each_n( std::execution::unseq, + counting_iterator(jbeg), jend-jbeg, + [=](Index_type j) { +#endif + //std::cerr << "JEFF: " << k << "," << j << "\n"; + HYDRO_2D_BODY1; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nk*nj, + [=](Index_type kj) { + const auto k = kbeg + kj / nj; + const auto j = jbeg + kj % nj; +#else + std::for_each_n( std::execution::par, + counting_iterator(kbeg), kend-kbeg, + [=](Index_type k) { + std::for_each_n( std::execution::unseq, + counting_iterator(jbeg), jend-jbeg, + [=](Index_type j) { +#endif + HYDRO_2D_BODY2; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nk*nj, + [=](Index_type kj) { + const auto k = kbeg + kj / nj; + const auto j = jbeg + kj % nj; +#else + std::for_each_n( std::execution::par, + counting_iterator(kbeg), kend-kbeg, + [=](Index_type k) { + std::for_each_n( std::execution::unseq, + counting_iterator(jbeg), jend-jbeg, + [=](Index_type j) { +#endif + HYDRO_2D_BODY3; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto hydro2d_base_lam1 = [=] (Index_type k, Index_type j) { + HYDRO_2D_BODY1; + }; + auto hydro2d_base_lam2 = [=] (Index_type k, Index_type j) { + HYDRO_2D_BODY2; + }; + auto hydro2d_base_lam3 = [=] (Index_type k, Index_type j) { + HYDRO_2D_BODY3; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nk*nj, + [=](Index_type kj) { + const auto k = kbeg + kj / nj; + const auto j = jbeg + kj % nj; +#else + std::for_each_n( std::execution::par, + counting_iterator(kbeg), kend-kbeg, + [=](Index_type k) { + std::for_each_n( std::execution::unseq, + counting_iterator(jbeg), jend-jbeg, + [=](Index_type j) { +#endif + hydro2d_base_lam1(k, j); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nk*nj, + [=](Index_type kj) { + const auto k = kbeg + kj / nj; + const auto j = jbeg + kj % nj; +#else + std::for_each_n( std::execution::par, + counting_iterator(kbeg), kend-kbeg, + [=](Index_type k) { + std::for_each_n( std::execution::unseq, + counting_iterator(jbeg), jend-jbeg, + [=](Index_type j) { +#endif + hydro2d_base_lam2(k, j); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nk*nj, + [=](Index_type kj) { + const auto k = kbeg + kj / nj; + const auto j = jbeg + kj % nj; +#else + std::for_each_n( std::execution::par, + counting_iterator(kbeg), kend-kbeg, + [=](Index_type k) { + std::for_each_n( std::execution::unseq, + counting_iterator(jbeg), jend-jbeg, + [=](Index_type j) { +#endif + hydro2d_base_lam3(k, j); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n HYDRO_2D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/lcals/HYDRO_2D.cpp b/src/lcals/HYDRO_2D.cpp index 0920de8c1..9c696fa20 100644 --- a/src/lcals/HYDRO_2D.cpp +++ b/src/lcals/HYDRO_2D.cpp @@ -73,6 +73,9 @@ HYDRO_2D::HYDRO_2D(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Kokkos_Lambda ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } HYDRO_2D::~HYDRO_2D() diff --git a/src/lcals/HYDRO_2D.hpp b/src/lcals/HYDRO_2D.hpp index e735abde5..5f2c4132d 100644 --- a/src/lcals/HYDRO_2D.hpp +++ b/src/lcals/HYDRO_2D.hpp @@ -60,7 +60,7 @@ const Real_type s = m_s; \ const Real_type t = m_t; \ \ - const Index_type kn = m_kn; \ + const Index_type kn = m_kn; (void)kn; \ const Index_type jn = m_jn; #define HYDRO_2D_BODY1 \ @@ -154,6 +154,7 @@ class HYDRO_2D : public KernelBase void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/INT_PREDICT-StdPar.cpp b/src/lcals/INT_PREDICT-StdPar.cpp new file mode 100644 index 000000000..aeecccddc --- /dev/null +++ b/src/lcals/INT_PREDICT-StdPar.cpp @@ -0,0 +1,88 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INT_PREDICT.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void INT_PREDICT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + INT_PREDICT_DATA_SETUP; + + auto intpredict_lam = [=](Index_type i) { + INT_PREDICT_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { + INT_PREDICT_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { + intpredict_lam(i); + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n INT_PREDICT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/lcals/INT_PREDICT.cpp b/src/lcals/INT_PREDICT.cpp index 1e7375752..49437aae2 100644 --- a/src/lcals/INT_PREDICT.cpp +++ b/src/lcals/INT_PREDICT.cpp @@ -51,6 +51,9 @@ INT_PREDICT::INT_PREDICT(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Kokkos_Lambda ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } INT_PREDICT::~INT_PREDICT() diff --git a/src/lcals/INT_PREDICT.hpp b/src/lcals/INT_PREDICT.hpp index a81ae6fb2..a7366f532 100644 --- a/src/lcals/INT_PREDICT.hpp +++ b/src/lcals/INT_PREDICT.hpp @@ -73,6 +73,7 @@ class INT_PREDICT : public KernelBase void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/PLANCKIAN-StdPar.cpp b/src/lcals/PLANCKIAN-StdPar.cpp new file mode 100644 index 000000000..cb55f5869 --- /dev/null +++ b/src/lcals/PLANCKIAN-StdPar.cpp @@ -0,0 +1,89 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "PLANCKIAN.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void PLANCKIAN::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + PLANCKIAN_DATA_SETUP; + + auto planckian_lam = [=](Index_type i) { + PLANCKIAN_BODY; + }; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { + PLANCKIAN_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { + planckian_lam(i); + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n PLANCKIAN : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace lcals +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/lcals/PLANCKIAN.cpp b/src/lcals/PLANCKIAN.cpp index da178a407..44397bc8b 100644 --- a/src/lcals/PLANCKIAN.cpp +++ b/src/lcals/PLANCKIAN.cpp @@ -51,6 +51,9 @@ PLANCKIAN::PLANCKIAN(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Kokkos_Lambda ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } PLANCKIAN::~PLANCKIAN() diff --git a/src/lcals/PLANCKIAN.hpp b/src/lcals/PLANCKIAN.hpp index 92b55fc95..2af31cc8f 100644 --- a/src/lcals/PLANCKIAN.hpp +++ b/src/lcals/PLANCKIAN.hpp @@ -58,6 +58,7 @@ class PLANCKIAN : public KernelBase void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/lcals/TRIDIAG_ELIM-StdPar.cpp b/src/lcals/TRIDIAG_ELIM-StdPar.cpp new file mode 100644 index 000000000..9ad42dd9c --- /dev/null +++ b/src/lcals/TRIDIAG_ELIM-StdPar.cpp @@ -0,0 +1,90 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "TRIDIAG_ELIM.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void TRIDIAG_ELIM::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 1; + const Index_type iend = m_N; + + TRIDIAG_ELIM_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { + TRIDIAG_ELIM_BODY; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto tridiag_elim_lam = [=](Index_type i) { + TRIDIAG_ELIM_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend, + [=](Index_type i) { + tridiag_elim_lam(i); + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n TRIDIAG_ELIM : Unknown variant id = " << vid << std::endl; + } + + } + +#else + RAJA_UNUSED_VAR(vid); +#endif +} + +} // end namespace lcals +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/lcals/TRIDIAG_ELIM.cpp b/src/lcals/TRIDIAG_ELIM.cpp index 98278cbcf..49904e0b6 100644 --- a/src/lcals/TRIDIAG_ELIM.cpp +++ b/src/lcals/TRIDIAG_ELIM.cpp @@ -53,6 +53,9 @@ TRIDIAG_ELIM::TRIDIAG_ELIM(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Kokkos_Lambda ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } TRIDIAG_ELIM::~TRIDIAG_ELIM() diff --git a/src/lcals/TRIDIAG_ELIM.hpp b/src/lcals/TRIDIAG_ELIM.hpp index c95685de9..906dad0de 100644 --- a/src/lcals/TRIDIAG_ELIM.hpp +++ b/src/lcals/TRIDIAG_ELIM.hpp @@ -58,6 +58,7 @@ class TRIDIAG_ELIM : public KernelBase void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/CMakeLists.txt b/src/polybench/CMakeLists.txt index f9cd2c1c2..3779533d4 100644 --- a/src/polybench/CMakeLists.txt +++ b/src/polybench/CMakeLists.txt @@ -86,5 +86,18 @@ blt_add_library( POLYBENCH_MVT-Cuda.cpp POLYBENCH_MVT-OMP.cpp POLYBENCH_MVT-OMPTarget.cpp + POLYBENCH_2MM-StdPar.cpp + POLYBENCH_3MM-StdPar.cpp + POLYBENCH_ADI-StdPar.cpp + POLYBENCH_ATAX-StdPar.cpp + POLYBENCH_FDTD_2D-StdPar.cpp + POLYBENCH_FLOYD_WARSHALL-StdPar.cpp + POLYBENCH_GEMM-StdPar.cpp + POLYBENCH_GEMVER-StdPar.cpp + POLYBENCH_GESUMMV-StdPar.cpp + POLYBENCH_HEAT_3D-StdPar.cpp + POLYBENCH_JACOBI_1D-StdPar.cpp + POLYBENCH_JACOBI_2D-StdPar.cpp + POLYBENCH_MVT-StdPar.cpp DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} ) diff --git a/src/polybench/POLYBENCH_2MM-StdPar.cpp b/src/polybench/POLYBENCH_2MM-StdPar.cpp new file mode 100644 index 000000000..ed89ff4fe --- /dev/null +++ b/src/polybench/POLYBENCH_2MM-StdPar.cpp @@ -0,0 +1,204 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_2MM.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_2MM::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_2MM_DATA_SETUP; + +#if 0 + auto begin = counting_iterator(0); + auto end = counting_iterator(nk); +#endif + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nj, + [=](Index_type ij) { + const auto i = ij / nj; + const auto j = ij % nj; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), nj, + [=](Index_type j) { +#endif +#if 1 + POLYBENCH_2MM_BODY1; + std::for_each_n( std::execution::unseq, + counting_iterator(0), nk, + [=,&dot](Index_type k) { + POLYBENCH_2MM_BODY2; + }); + POLYBENCH_2MM_BODY3; +#else + tmp[j + i*nj] = std::transform_reduce( std::execution::unseq, + begin, end, + (Real_type)0, std::plus(), + [=] (Index_type k) { + return alpha * A[k + i*nk] * B[j + k*nj]; + }); +#endif +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nl, + [=](Index_type il) { + const auto i = il / nl; + const auto l = il % nl; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), nl, + [=](Index_type l) { +#endif + POLYBENCH_2MM_BODY4; + std::for_each_n( std::execution::unseq, + counting_iterator(0), nj, + [=,&dot](Index_type j) { + POLYBENCH_2MM_BODY5; + }); + POLYBENCH_2MM_BODY6; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_2mm_base_lam2 = + [=](Index_type i, Index_type j, Index_type k, Real_type &dot) { + POLYBENCH_2MM_BODY2; + }; + auto poly_2mm_base_lam3 = + [=](Index_type i, Index_type j, Real_type &dot) { + POLYBENCH_2MM_BODY3; + }; + auto poly_2mm_base_lam5 = + [=](Index_type i, Index_type l, Index_type j, Real_type &dot) { + POLYBENCH_2MM_BODY5; + }; + auto poly_2mm_base_lam6 = + [=](Index_type i, Index_type l, Real_type &dot) { + POLYBENCH_2MM_BODY6; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nj, + [=](Index_type ij) { + const auto i = ij / nj; + const auto j = ij % nj; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), nj, + [=](Index_type j) { +#endif + POLYBENCH_2MM_BODY1; + std::for_each_n( std::execution::unseq, + counting_iterator(0), nk, + [=,&dot](Index_type k) { + poly_2mm_base_lam2(i, j, k, dot); + }); + poly_2mm_base_lam3(i, j, dot); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nl, + [=](Index_type il) { + const auto i = il / nl; + const auto l = il % nl; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), nl, + [=](Index_type l) { +#endif + POLYBENCH_2MM_BODY4; + std::for_each_n( std::execution::unseq, + counting_iterator(0), nj, + [=,&dot](Index_type j) { + poly_2mm_base_lam5(i, l, j, dot); + }); + poly_2mm_base_lam6(i, l, dot); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n POLYBENCH_2MM : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/polybench/POLYBENCH_2MM.cpp b/src/polybench/POLYBENCH_2MM.cpp index 80136cd49..9d44e3005 100644 --- a/src/polybench/POLYBENCH_2MM.cpp +++ b/src/polybench/POLYBENCH_2MM.cpp @@ -78,6 +78,9 @@ POLYBENCH_2MM::POLYBENCH_2MM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } POLYBENCH_2MM::~POLYBENCH_2MM() diff --git a/src/polybench/POLYBENCH_2MM.hpp b/src/polybench/POLYBENCH_2MM.hpp index e11d4889b..d792d549d 100644 --- a/src/polybench/POLYBENCH_2MM.hpp +++ b/src/polybench/POLYBENCH_2MM.hpp @@ -127,6 +127,7 @@ class POLYBENCH_2MM : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_3MM-Seq.cpp b/src/polybench/POLYBENCH_3MM-Seq.cpp index fb1bdd03b..28ea1a319 100644 --- a/src/polybench/POLYBENCH_3MM-Seq.cpp +++ b/src/polybench/POLYBENCH_3MM-Seq.cpp @@ -19,7 +19,6 @@ namespace rajaperf namespace polybench { - void POLYBENCH_3MM::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps = getRunReps(); diff --git a/src/polybench/POLYBENCH_3MM-StdPar.cpp b/src/polybench/POLYBENCH_3MM-StdPar.cpp new file mode 100644 index 000000000..36ead6be7 --- /dev/null +++ b/src/polybench/POLYBENCH_3MM-StdPar.cpp @@ -0,0 +1,250 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_3MM.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_3MM::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_3MM_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nj, + [=](Index_type ij) { + const auto i = ij / nj; + const auto j = ij % nj; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni, + [=](Index_type i) { + std::for_each_n( std::execution::seq, + counting_iterator(0), nj, + [=](Index_type j) { +#endif + POLYBENCH_3MM_BODY1; + std::for_each_n( std::execution::unseq, + counting_iterator(0), nk, + [=,&dot](Index_type k) { + POLYBENCH_3MM_BODY2; + }); + POLYBENCH_3MM_BODY3; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nj*nl, + [=](Index_type jl) { + const auto j = jl / nl; + const auto l = jl % nl; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nj, + [=](Index_type j) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), nl, + [=](Index_type l) { +#endif + POLYBENCH_3MM_BODY4; + std::for_each_n( std::execution::unseq, + counting_iterator(0), nm, + [=,&dot](Index_type m) { + POLYBENCH_3MM_BODY5; + }); + POLYBENCH_3MM_BODY6; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nl, + [=](Index_type il) { + const auto i = il / nl; + const auto l = il % nl; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), nl, + [=](Index_type l) { +#endif + POLYBENCH_3MM_BODY7; + std::for_each_n( std::execution::unseq, + counting_iterator(0), nj, + [=,&dot](Index_type j) { + POLYBENCH_3MM_BODY8; + }); + POLYBENCH_3MM_BODY9; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_3mm_base_lam2 = + [=] (Index_type i, Index_type j, Index_type k, Real_type &dot) { + POLYBENCH_3MM_BODY2; + }; + auto poly_3mm_base_lam3 = + [=] (Index_type i, Index_type j, Real_type &dot) { + POLYBENCH_3MM_BODY3; + }; + auto poly_3mm_base_lam5 = + [=] (Index_type j, Index_type l, Index_type m, Real_type &dot) { + POLYBENCH_3MM_BODY5; + }; + auto poly_3mm_base_lam6 = + [=] (Index_type j, Index_type l, Real_type &dot) { + POLYBENCH_3MM_BODY6; + }; + auto poly_3mm_base_lam8 = + [=] (Index_type i, Index_type l, Index_type j, Real_type &dot) { + POLYBENCH_3MM_BODY8; + }; + auto poly_3mm_base_lam9 = + [=] (Index_type i, Index_type l, Real_type &dot) { + POLYBENCH_3MM_BODY9; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nj, + [=](Index_type ij) { + const auto i = ij / nj; + const auto j = ij % nj; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), nj, + [=](Index_type j) { +#endif + POLYBENCH_3MM_BODY1; + std::for_each_n( std::execution::unseq, + counting_iterator(0), nk, + [=,&dot](Index_type k) { + poly_3mm_base_lam2(i, j, k, dot); + }); + poly_3mm_base_lam3(i, j, dot); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nj*nl, + [=](Index_type jl) { + const auto j = jl / nl; + const auto l = jl % nl; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nj, + [=](Index_type j) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), nl, + [=](Index_type l) { +#endif + POLYBENCH_3MM_BODY4; + std::for_each_n( std::execution::unseq, + counting_iterator(0), nm, + [=,&dot](Index_type m) { + poly_3mm_base_lam5(j, l, m, dot); + }); + poly_3mm_base_lam6(j, l, dot); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nl, + [=](Index_type il) { + const auto i = il / nl; + const auto l = il % nl; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), nl, + [=](Index_type l) { +#endif + POLYBENCH_3MM_BODY7; + std::for_each_n( std::execution::unseq, + counting_iterator(0), nj, + [=,&dot](Index_type j) { + poly_3mm_base_lam8(i, l, j, dot); + }); + poly_3mm_base_lam9(i, l, dot); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n POLYBENCH_3MM : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/polybench/POLYBENCH_3MM.cpp b/src/polybench/POLYBENCH_3MM.cpp index e1dad595c..a4aa1d181 100644 --- a/src/polybench/POLYBENCH_3MM.cpp +++ b/src/polybench/POLYBENCH_3MM.cpp @@ -86,6 +86,9 @@ POLYBENCH_3MM::POLYBENCH_3MM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } POLYBENCH_3MM::~POLYBENCH_3MM() diff --git a/src/polybench/POLYBENCH_3MM.hpp b/src/polybench/POLYBENCH_3MM.hpp index 4331e3930..4e01307a3 100644 --- a/src/polybench/POLYBENCH_3MM.hpp +++ b/src/polybench/POLYBENCH_3MM.hpp @@ -153,6 +153,7 @@ class POLYBENCH_3MM : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_ADI-StdPar.cpp b/src/polybench/POLYBENCH_ADI-StdPar.cpp new file mode 100644 index 000000000..7ea88960a --- /dev/null +++ b/src/polybench/POLYBENCH_ADI-StdPar.cpp @@ -0,0 +1,154 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_ADI.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_ADI::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_ADI_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 1; t <= tsteps; ++t) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), n-2, + [=](Index_type i) { + POLYBENCH_ADI_BODY2; + for (Index_type j = 1; j < n-1; ++j) { + POLYBENCH_ADI_BODY3; + } + POLYBENCH_ADI_BODY4; + for (Index_type k = n-2; k >= 1; --k) { + POLYBENCH_ADI_BODY5; + } + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), n-2, + [=](Index_type i) { + POLYBENCH_ADI_BODY6; + for (Index_type j = 1; j < n-1; ++j) { + POLYBENCH_ADI_BODY7; + } + POLYBENCH_ADI_BODY8; + for (Index_type k = n-2; k >= 1; --k) { + POLYBENCH_ADI_BODY9; + } + }); + + } // tstep loop + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_adi_base_lam2 = [=](Index_type i) { + POLYBENCH_ADI_BODY2; + }; + auto poly_adi_base_lam3 = [=](Index_type i, Index_type j) { + POLYBENCH_ADI_BODY3; + }; + auto poly_adi_base_lam4 = [=](Index_type i) { + POLYBENCH_ADI_BODY4; + }; + auto poly_adi_base_lam5 = [=](Index_type i, Index_type k) { + POLYBENCH_ADI_BODY5; + }; + auto poly_adi_base_lam6 = [=](Index_type i) { + POLYBENCH_ADI_BODY6; + }; + auto poly_adi_base_lam7 = [=](Index_type i, Index_type j) { + POLYBENCH_ADI_BODY7; + }; + auto poly_adi_base_lam8 = [=](Index_type i) { + POLYBENCH_ADI_BODY8; + }; + auto poly_adi_base_lam9 = [=](Index_type i, Index_type k) { + POLYBENCH_ADI_BODY9; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 1; t <= tsteps; ++t) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), n-2, + [=](Index_type i) { + poly_adi_base_lam2(i); + for (Index_type j = 1; j < n-1; ++j) { + poly_adi_base_lam3(i, j); + } + poly_adi_base_lam4(i); + for (Index_type k = n-2; k >= 1; --k) { + poly_adi_base_lam5(i, k); + } + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), n-2, + [=](Index_type i) { + poly_adi_base_lam6(i); + for (Index_type j = 1; j < n-1; ++j) { + poly_adi_base_lam7(i, j); + } + poly_adi_base_lam8(i); + for (Index_type k = n-2; k >= 1; --k) { + poly_adi_base_lam9(i, k); + } + }); + + } // tstep loop + + } // run_reps + stopTimer(); + + break; + } + + default : { + getCout() << "\nPOLYBENCH_ADI Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/polybench/POLYBENCH_ADI.cpp b/src/polybench/POLYBENCH_ADI.cpp index b513bdebc..7d6968a36 100644 --- a/src/polybench/POLYBENCH_ADI.cpp +++ b/src/polybench/POLYBENCH_ADI.cpp @@ -63,6 +63,9 @@ POLYBENCH_ADI::POLYBENCH_ADI(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } POLYBENCH_ADI::~POLYBENCH_ADI() diff --git a/src/polybench/POLYBENCH_ADI.hpp b/src/polybench/POLYBENCH_ADI.hpp index 848fb9dc4..b316735ba 100644 --- a/src/polybench/POLYBENCH_ADI.hpp +++ b/src/polybench/POLYBENCH_ADI.hpp @@ -195,6 +195,7 @@ class POLYBENCH_ADI : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_ATAX-StdPar.cpp b/src/polybench/POLYBENCH_ATAX-StdPar.cpp new file mode 100644 index 000000000..88866ca63 --- /dev/null +++ b/src/polybench/POLYBENCH_ATAX-StdPar.cpp @@ -0,0 +1,130 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_ATAX.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_ATAX::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps= getRunReps(); + + POLYBENCH_ATAX_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type i) { + POLYBENCH_ATAX_BODY1; + std::for_each_n( std::execution::unseq, + counting_iterator(0), N, + [=,&dot](Index_type j) { + POLYBENCH_ATAX_BODY2; + }); + POLYBENCH_ATAX_BODY3; + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type j) { + POLYBENCH_ATAX_BODY4; + std::for_each_n( std::execution::unseq, + counting_iterator(0), N, + [=,&dot](Index_type i) { + POLYBENCH_ATAX_BODY5; + }); + POLYBENCH_ATAX_BODY6; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_atax_base_lam2 = [=] (Index_type i, Index_type j, Real_type &dot) { + POLYBENCH_ATAX_BODY2; + }; + auto poly_atax_base_lam3 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_ATAX_BODY3; + }; + auto poly_atax_base_lam5 = [=] (Index_type i, Index_type j , Real_type &dot) { + POLYBENCH_ATAX_BODY5; + }; + auto poly_atax_base_lam6 = [=] (Index_type j, Real_type &dot) { + POLYBENCH_ATAX_BODY6; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type i) { + POLYBENCH_ATAX_BODY1; + std::for_each_n( std::execution::unseq, + counting_iterator(0), N, + [=,&dot](Index_type j) { + poly_atax_base_lam2(i, j, dot); + }); + poly_atax_base_lam3(i, dot); + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type j) { + POLYBENCH_ATAX_BODY4; + std::for_each_n( std::execution::unseq, + counting_iterator(0), N, + [=,&dot](Index_type i) { + poly_atax_base_lam5(i, j, dot); + }); + poly_atax_base_lam6(j, dot); + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n POLYBENCH_ATAX : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/polybench/POLYBENCH_ATAX.cpp b/src/polybench/POLYBENCH_ATAX.cpp index 4c159d3a7..38318a600 100644 --- a/src/polybench/POLYBENCH_ATAX.cpp +++ b/src/polybench/POLYBENCH_ATAX.cpp @@ -65,6 +65,9 @@ POLYBENCH_ATAX::POLYBENCH_ATAX(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } POLYBENCH_ATAX::~POLYBENCH_ATAX() diff --git a/src/polybench/POLYBENCH_ATAX.hpp b/src/polybench/POLYBENCH_ATAX.hpp index f94ade140..d5c019ba1 100644 --- a/src/polybench/POLYBENCH_ATAX.hpp +++ b/src/polybench/POLYBENCH_ATAX.hpp @@ -115,6 +115,7 @@ class POLYBENCH_ATAX : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp new file mode 100644 index 000000000..64c50c34a --- /dev/null +++ b/src/polybench/POLYBENCH_FDTD_2D-StdPar.cpp @@ -0,0 +1,163 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_FDTD_2D.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_FDTD_2D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_FDTD_2D_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (t = 0; t < tsteps; ++t) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ny, + [=](Index_type j) { + POLYBENCH_FDTD_2D_BODY1; + }); + + // Note to future developers: + // Do not try to be smart and use more C++ than necessary. + // auto [i,j] = std::div(ij,ny); i++; + // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This is noticeably slower than below. + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), (nx-1)*ny, + [=](Index_type ij) { + const auto i = 1 + ij / ny; + const auto j = ij % ny; + POLYBENCH_FDTD_2D_BODY2; + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nx*(ny-1), + [=](Index_type ij) { + const auto i = ij / (ny-1); + const auto j = 1 + ij % (ny-1); + POLYBENCH_FDTD_2D_BODY3; + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), (nx-1)*(ny-1), + [=](Index_type ij) { + const auto i = ij / (ny-1); + const auto j = ij % (ny-1); + POLYBENCH_FDTD_2D_BODY4; + }); + + } // tstep loop + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + // + // Note: first lambda must use capture by reference so that the + // scalar variable 't' used in it is updated for each + // t-loop iteration. + // + // capturing t by reference is required for GCC 11 to generate correct results + // but that breaks NVHPC GPU, so we instead make it an explicit parameter + auto poly_fdtd2d_base_lam1 = [=](Index_type j, Index_type t) { + //ey[j + 0*ny] = fict[t]; + POLYBENCH_FDTD_2D_BODY1; + }; + auto poly_fdtd2d_base_lam2 = [=](Index_type i, Index_type j) { + POLYBENCH_FDTD_2D_BODY2; + }; + auto poly_fdtd2d_base_lam3 = [=](Index_type i, Index_type j) { + POLYBENCH_FDTD_2D_BODY3; + }; + auto poly_fdtd2d_base_lam4 = [=](Index_type i, Index_type j) { + POLYBENCH_FDTD_2D_BODY4; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (t = 0; t < tsteps; ++t) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ny, + [=](Index_type j) { + poly_fdtd2d_base_lam1(j,t); + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), (nx-1)*ny, + [=](Index_type ij) { + const auto i = 1 + ij / ny; + const auto j = ij % ny; + poly_fdtd2d_base_lam2(i, j); + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nx*(ny-1), + [=](Index_type ij) { + const auto i = ij / (ny-1); + const auto j = 1 + ij % (ny-1); + poly_fdtd2d_base_lam3(i, j); + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), (nx-1)*(ny-1), + [=](Index_type ij) { + const auto i = ij / (ny-1); + const auto j = ij % (ny-1); + poly_fdtd2d_base_lam4(i, j); + }); + + } // tstep loop + + } // run_reps + stopTimer(); + + break; + } + + default : { + getCout() << "\nPOLYBENCH_FDTD_2D Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/polybench/POLYBENCH_FDTD_2D.cpp b/src/polybench/POLYBENCH_FDTD_2D.cpp index 37dd1f9f5..1709d5eb7 100644 --- a/src/polybench/POLYBENCH_FDTD_2D.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D.cpp @@ -84,6 +84,9 @@ POLYBENCH_FDTD_2D::POLYBENCH_FDTD_2D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } POLYBENCH_FDTD_2D::~POLYBENCH_FDTD_2D() diff --git a/src/polybench/POLYBENCH_FDTD_2D.hpp b/src/polybench/POLYBENCH_FDTD_2D.hpp index e1d1b67c3..29127bd72 100644 --- a/src/polybench/POLYBENCH_FDTD_2D.hpp +++ b/src/polybench/POLYBENCH_FDTD_2D.hpp @@ -113,6 +113,7 @@ class POLYBENCH_FDTD_2D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-Seq.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-Seq.cpp index 40aad73e3..b40ff70df 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL-Seq.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-Seq.cpp @@ -17,7 +17,6 @@ namespace rajaperf namespace polybench { - void POLYBENCH_FLOYD_WARSHALL::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps= getRunReps(); diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp new file mode 100644 index 000000000..b17f9f9f4 --- /dev/null +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL-StdPar.cpp @@ -0,0 +1,119 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_FLOYD_WARSHALL.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +//#define USE_STDPAR_COLLAPSE 1 + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_FLOYD_WARSHALL::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_FLOYD_WARSHALL_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type k = 0; k < N; ++k) { +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N*N, + [=](Index_type ji) { + const auto j = ji / N; + const auto i = ji % N; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type i) { + std::for_each_n( std::execution::seq, + counting_iterator(0), N, + [=](Index_type j) { +#endif + POLYBENCH_FLOYD_WARSHALL_BODY; + }); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + } + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_floydwarshall_base_lam = [=](Index_type k, Index_type i, Index_type j) { + POLYBENCH_FLOYD_WARSHALL_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type k = 0; k < N; ++k) { +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N*N, + [=](Index_type ji) { + const auto j = ji / N; + const auto i = ji % N; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type i) { + std::for_each_n( std::execution::seq, + counting_iterator(0), N, + [=](Index_type j) { +#endif + poly_floydwarshall_base_lam(k, i, j); + }); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + } + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n POLYBENCH_FLOYD_WARSHALL : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp index 9770821b0..5ce169421 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp @@ -60,6 +60,9 @@ POLYBENCH_FLOYD_WARSHALL::POLYBENCH_FLOYD_WARSHALL(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } POLYBENCH_FLOYD_WARSHALL::~POLYBENCH_FLOYD_WARSHALL() diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp index e8a067377..0eebb9ee7 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp @@ -76,6 +76,7 @@ class POLYBENCH_FLOYD_WARSHALL : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_GEMM-StdPar.cpp b/src/polybench/POLYBENCH_GEMM-StdPar.cpp new file mode 100644 index 000000000..1c1687471 --- /dev/null +++ b/src/polybench/POLYBENCH_GEMM-StdPar.cpp @@ -0,0 +1,133 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_GEMM.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_GEMM::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_GEMM_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nj, + [=](Index_type ij) { + const auto i = ij / nj; + const auto j = ij % nj; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), nj, + [=](Index_type j) { +#endif + POLYBENCH_GEMM_BODY1; + POLYBENCH_GEMM_BODY2; + std::for_each_n( std::execution::unseq, + counting_iterator(0), nk, + [=,&dot](Index_type k) { + POLYBENCH_GEMM_BODY3; + }); + POLYBENCH_GEMM_BODY4; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_gemm_base_lam2 = [=](Index_type i, Index_type j) { + POLYBENCH_GEMM_BODY2; + }; + auto poly_gemm_base_lam3 = [=](Index_type i, Index_type j, Index_type k, Real_type& dot) { + POLYBENCH_GEMM_BODY3; + }; + auto poly_gemm_base_lam4 = [=](Index_type i, Index_type j, Real_type& dot) { + POLYBENCH_GEMM_BODY4; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni*nj, + [=](Index_type ij) { + const auto i = ij / nj; + const auto j = ij % nj; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), ni, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), nj, + [=](Index_type j) { +#endif + POLYBENCH_GEMM_BODY1; + poly_gemm_base_lam2(i, j); + std::for_each_n( std::execution::unseq, + counting_iterator(0), nk, + [=,&dot](Index_type k) { + poly_gemm_base_lam3(i, j, k, dot); + }); + poly_gemm_base_lam4(i, j, dot); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n POLYBENCH_GEMM : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/polybench/POLYBENCH_GEMM.cpp b/src/polybench/POLYBENCH_GEMM.cpp index e080c6df5..202713e61 100644 --- a/src/polybench/POLYBENCH_GEMM.cpp +++ b/src/polybench/POLYBENCH_GEMM.cpp @@ -70,6 +70,9 @@ POLYBENCH_GEMM::POLYBENCH_GEMM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } POLYBENCH_GEMM::~POLYBENCH_GEMM() diff --git a/src/polybench/POLYBENCH_GEMM.hpp b/src/polybench/POLYBENCH_GEMM.hpp index 33ea77997..cd1e00865 100644 --- a/src/polybench/POLYBENCH_GEMM.hpp +++ b/src/polybench/POLYBENCH_GEMM.hpp @@ -99,6 +99,7 @@ class POLYBENCH_GEMM : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_GEMVER-Seq.cpp b/src/polybench/POLYBENCH_GEMVER-Seq.cpp index d17f9b709..2423507f6 100644 --- a/src/polybench/POLYBENCH_GEMVER-Seq.cpp +++ b/src/polybench/POLYBENCH_GEMVER-Seq.cpp @@ -131,7 +131,7 @@ void POLYBENCH_GEMVER::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t auto poly_gemver_lam1 = [=] (Index_type i, Index_type j) { POLYBENCH_GEMVER_BODY1_RAJA; }; - auto poly_gemver_lam2 = [=] (Index_type /* i */, Real_type &dot) { + auto poly_gemver_lam2 = [=] (Real_type &dot) { POLYBENCH_GEMVER_BODY2_RAJA; }; auto poly_gemver_lam3 = [=] (Index_type i, Index_type j, Real_type &dot) { @@ -162,10 +162,10 @@ void POLYBENCH_GEMVER::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t > >; - using EXEC_POL24 = + using EXEC_POL2 = RAJA::KernelPolicy< RAJA::statement::For<0, RAJA::loop_exec, - RAJA::statement::Lambda<0, RAJA::Segs<0>, RAJA::Params<0>>, + RAJA::statement::Lambda<0, RAJA::Params<0>>, RAJA::statement::For<1, RAJA::loop_exec, RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> >, @@ -175,6 +175,17 @@ void POLYBENCH_GEMVER::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t using EXEC_POL3 = RAJA::loop_exec; + using EXEC_POL4 = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Segs<0>, RAJA::Params<0>>, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> + > + >; + startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { @@ -183,7 +194,7 @@ void POLYBENCH_GEMVER::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t poly_gemver_lam1 ); - RAJA::kernel_param( + RAJA::kernel_param( RAJA::make_tuple(RAJA::RangeSegment{0, n}, RAJA::RangeSegment{0, n}), RAJA::tuple{0.0}, @@ -197,7 +208,7 @@ void POLYBENCH_GEMVER::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(t poly_gemver_lam5 ); - RAJA::kernel_param( + RAJA::kernel_param( RAJA::make_tuple(RAJA::RangeSegment{0, n}, RAJA::RangeSegment{0, n}), RAJA::tuple{0.0}, diff --git a/src/polybench/POLYBENCH_GEMVER-StdPar.cpp b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp new file mode 100644 index 000000000..32d715002 --- /dev/null +++ b/src/polybench/POLYBENCH_GEMVER-StdPar.cpp @@ -0,0 +1,168 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_GEMVER.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_GEMVER::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_GEMVER_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), n, + [=](Index_type j) { + POLYBENCH_GEMVER_BODY1; + }); + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n, + [=](Index_type i) { + POLYBENCH_GEMVER_BODY2; + std::for_each_n( std::execution::unseq, + counting_iterator(0), n, + [=,&dot](Index_type j) { + POLYBENCH_GEMVER_BODY3; + }); + POLYBENCH_GEMVER_BODY4; + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n, + [=](Index_type i) { + POLYBENCH_GEMVER_BODY5; + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n, + [=](Index_type i) { + POLYBENCH_GEMVER_BODY6; + std::for_each_n( std::execution::unseq, + counting_iterator(0), n, + [=,&dot](Index_type j) { + POLYBENCH_GEMVER_BODY7; + }); + POLYBENCH_GEMVER_BODY8; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_gemver_base_lam1 = [=](Index_type i, Index_type j) { + POLYBENCH_GEMVER_BODY1; + }; + auto poly_gemver_base_lam3 = [=](Index_type i, Index_type j, Real_type &dot) { + POLYBENCH_GEMVER_BODY3; + }; + auto poly_gemver_base_lam4 = [=](Index_type i, Real_type &dot) { + POLYBENCH_GEMVER_BODY4; + }; + auto poly_gemver_base_lam5 = [=](Index_type i) { + POLYBENCH_GEMVER_BODY5; + }; + auto poly_gemver_base_lam7 = [=](Index_type i, Index_type j, Real_type &dot) { + POLYBENCH_GEMVER_BODY7; + }; + auto poly_gemver_base_lam8 = [=](Index_type i, Real_type &dot) { + POLYBENCH_GEMVER_BODY8; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(0), n, + [=](Index_type j) { + poly_gemver_base_lam1(i, j); + }); + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n, + [=](Index_type i) { + POLYBENCH_GEMVER_BODY2; + std::for_each_n( std::execution::unseq, + counting_iterator(0), n, + [=,&dot](Index_type j) { + poly_gemver_base_lam3(i, j, dot); + }); + poly_gemver_base_lam4(i, dot); + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n, + [=](Index_type i) { + poly_gemver_base_lam5(i); + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n, + [=](Index_type i) { + POLYBENCH_GEMVER_BODY6; + std::for_each_n( std::execution::unseq, + counting_iterator(0), n, + [=,&dot](Index_type j) { + poly_gemver_base_lam7(i, j, dot); + }); + poly_gemver_base_lam8(i, dot); + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n POLYBENCH_GEMVER : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace basic +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/polybench/POLYBENCH_GEMVER.cpp b/src/polybench/POLYBENCH_GEMVER.cpp index 99e16324f..5e6414c2f 100644 --- a/src/polybench/POLYBENCH_GEMVER.cpp +++ b/src/polybench/POLYBENCH_GEMVER.cpp @@ -79,6 +79,9 @@ POLYBENCH_GEMVER::POLYBENCH_GEMVER(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } POLYBENCH_GEMVER::~POLYBENCH_GEMVER() diff --git a/src/polybench/POLYBENCH_GEMVER.hpp b/src/polybench/POLYBENCH_GEMVER.hpp index 07ecae962..cbf6ef605 100644 --- a/src/polybench/POLYBENCH_GEMVER.hpp +++ b/src/polybench/POLYBENCH_GEMVER.hpp @@ -18,11 +18,9 @@ /// Note: this part of the kernel is modified to avoid /// excessively large checksums /// for (Index_type i = 0; i < N; i++) { -/// Real_type dot = 0.0; /// for (Index_type j = 0; j < N; j++) { -/// dot += beta * A[j][i] * y[j]; +/// x[i] = x[i] + beta * A[j][i] * y[j]; /// } -/// x[i] = dot; /// } /// /// for (Index_type i = 0; i < N; i++) { @@ -98,7 +96,7 @@ xview(i) += zview(i); #define POLYBENCH_GEMVER_BODY6_RAJA \ - dot = wview(i); + dot = w[i]; #define POLYBENCH_GEMVER_BODY7_RAJA \ dot += alpha * Aview(i,j) * xview(j); @@ -152,6 +150,7 @@ class POLYBENCH_GEMVER : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_GESUMMV-Seq.cpp b/src/polybench/POLYBENCH_GESUMMV-Seq.cpp index 642d0b463..3b8982696 100644 --- a/src/polybench/POLYBENCH_GESUMMV-Seq.cpp +++ b/src/polybench/POLYBENCH_GESUMMV-Seq.cpp @@ -93,9 +93,9 @@ void POLYBENCH_GESUMMV::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( using EXEC_POL = RAJA::KernelPolicy< - RAJA::statement::For<0, RAJA::loop_exec, // i + RAJA::statement::For<0, RAJA::loop_exec, RAJA::statement::Lambda<0, RAJA::Params<0,1>>, - RAJA::statement::For<1, RAJA::loop_exec, // j + RAJA::statement::For<1, RAJA::loop_exec, RAJA::statement::Lambda<1, RAJA::Segs<0, 1>, RAJA::Params<0,1>> >, RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0,1>> diff --git a/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp new file mode 100644 index 000000000..23afa5f2b --- /dev/null +++ b/src/polybench/POLYBENCH_GESUMMV-StdPar.cpp @@ -0,0 +1,100 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_GESUMMV.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_GESUMMV::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps= getRunReps(); + + POLYBENCH_GESUMMV_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type i) { + POLYBENCH_GESUMMV_BODY1; + std::for_each_n( std::execution::unseq, + counting_iterator(0), N, + [=,&tmpdot,&ydot](Index_type j) { + POLYBENCH_GESUMMV_BODY2; + }); + POLYBENCH_GESUMMV_BODY3; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_gesummv_base_lam2 = [=](Index_type i, Index_type j, Real_type& tmpdot, Real_type& ydot) { + POLYBENCH_GESUMMV_BODY2; + }; + auto poly_gesummv_base_lam3 = [=](Index_type i, Real_type& tmpdot, Real_type& ydot) { + POLYBENCH_GESUMMV_BODY3; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type i) { + POLYBENCH_GESUMMV_BODY1; + std::for_each_n( std::execution::unseq, + counting_iterator(0), N, + [=,&tmpdot,&ydot](Index_type j) { + poly_gesummv_base_lam2(i, j, tmpdot, ydot); + }); + poly_gesummv_base_lam3(i, tmpdot, ydot); + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n POLYBENCH_GESUMMV : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/polybench/POLYBENCH_GESUMMV.cpp b/src/polybench/POLYBENCH_GESUMMV.cpp index fdf07a58f..7e250b9e1 100644 --- a/src/polybench/POLYBENCH_GESUMMV.cpp +++ b/src/polybench/POLYBENCH_GESUMMV.cpp @@ -59,6 +59,9 @@ POLYBENCH_GESUMMV::POLYBENCH_GESUMMV(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } POLYBENCH_GESUMMV::~POLYBENCH_GESUMMV() diff --git a/src/polybench/POLYBENCH_GESUMMV.hpp b/src/polybench/POLYBENCH_GESUMMV.hpp index 32a1b0eae..9a6552301 100644 --- a/src/polybench/POLYBENCH_GESUMMV.hpp +++ b/src/polybench/POLYBENCH_GESUMMV.hpp @@ -98,6 +98,7 @@ class POLYBENCH_GESUMMV : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_HEAT_3D-Seq.cpp b/src/polybench/POLYBENCH_HEAT_3D-Seq.cpp index 7222e5934..1808e53b0 100644 --- a/src/polybench/POLYBENCH_HEAT_3D-Seq.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D-Seq.cpp @@ -107,6 +107,13 @@ void POLYBENCH_HEAT_3D::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( POLYBENCH_HEAT_3D_VIEWS_RAJA; + auto poly_heat3d_lam1 = [=](Index_type i, Index_type j, Index_type k) { + POLYBENCH_HEAT_3D_BODY1_RAJA; + }; + auto poly_heat3d_lam2 = [=](Index_type i, Index_type j, Index_type k) { + POLYBENCH_HEAT_3D_BODY2_RAJA; + }; + using EXEC_POL = RAJA::KernelPolicy< RAJA::statement::For<0, RAJA::loop_exec, @@ -115,6 +122,13 @@ void POLYBENCH_HEAT_3D::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( RAJA::statement::Lambda<0> > > + >, + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<1> + > + > > >; @@ -127,20 +141,8 @@ void POLYBENCH_HEAT_3D::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG( RAJA::RangeSegment{1, N-1}, RAJA::RangeSegment{1, N-1}), - [=](Index_type i, Index_type j, Index_type k) { - POLYBENCH_HEAT_3D_BODY1_RAJA; - } - - ); - - RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{1, N-1}, - RAJA::RangeSegment{1, N-1}, - RAJA::RangeSegment{1, N-1}), - - [=](Index_type i, Index_type j, Index_type k) { - POLYBENCH_HEAT_3D_BODY2_RAJA; - } - + poly_heat3d_lam1, + poly_heat3d_lam2 ); } diff --git a/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp new file mode 100644 index 000000000..1b70e2441 --- /dev/null +++ b/src/polybench/POLYBENCH_HEAT_3D-StdPar.cpp @@ -0,0 +1,198 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_HEAT_3D.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_HEAT_3D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + + POLYBENCH_HEAT_3D_DATA_SETUP; + +#ifdef USE_STDPAR_COLLAPSE + const auto nn = N-2; +#endif + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nn*nn*nn, + [=](Index_type ijk) { + const auto i = 1 + ijk / (nn*nn); + const auto jk = ijk % (nn*nn); + const auto j = 1 + jk / nn; + const auto k = 1 + jk % nn; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N-2, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), N-2, + [=](Index_type j) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), N-2, + [=](Index_type k) { +#endif + POLYBENCH_HEAT_3D_BODY1; +#ifndef USE_STDPAR_COLLAPSE + }); + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nn*nn*nn, + [=](Index_type ijk) { + const auto i = 1 + ijk / (nn*nn); + const auto jk = ijk % (nn*nn); + const auto j = 1 + jk / nn; + const auto k = 1 + jk % nn; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N-2, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), N-2, + [=](Index_type j) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), N-2, + [=](Index_type k) { +#endif + POLYBENCH_HEAT_3D_BODY2; +#ifndef USE_STDPAR_COLLAPSE + }); + }); +#endif + }); + + } + + } + stopTimer(); + + POLYBENCH_HEAT_3D_DATA_RESET; + + break; + } + + case Lambda_StdPar : { + + auto poly_heat3d_base_lam1 = [=](Index_type i, Index_type j, + Index_type k) { + POLYBENCH_HEAT_3D_BODY1; + }; + auto poly_heat3d_base_lam2 = [=](Index_type i, Index_type j, + Index_type k) { + POLYBENCH_HEAT_3D_BODY2; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nn*nn*nn, + [=](Index_type ijk) { + const auto i = 1 + ijk / (nn*nn); + const auto jk = ijk % (nn*nn); + const auto j = 1 + jk / nn; + const auto k = 1 + jk % nn; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N-2, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), N-2, + [=](Index_type j) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), N-2, + [=](Index_type k) { +#endif + poly_heat3d_base_lam1(i, j, k); +#ifndef USE_STDPAR_COLLAPSE + }); + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), nn*nn*nn, + [=](Index_type ijk) { + const auto i = 1 + ijk / (nn*nn); + const auto jk = ijk % (nn*nn); + const auto j = 1 + jk / nn; + const auto k = 1 + jk % nn; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N-2, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), N-2, + [=](Index_type j) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), N-2, + [=](Index_type k) { +#endif + poly_heat3d_base_lam2(i, j, k); +#ifndef USE_STDPAR_COLLAPSE + }); + }); +#endif + }); + + } + + } + stopTimer(); + + POLYBENCH_HEAT_3D_DATA_RESET; + + break; + } + + default : { + getCout() << "\n POLYBENCH_HEAT_3D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/polybench/POLYBENCH_HEAT_3D.cpp b/src/polybench/POLYBENCH_HEAT_3D.cpp index be6c0e218..506738787 100644 --- a/src/polybench/POLYBENCH_HEAT_3D.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D.cpp @@ -70,6 +70,9 @@ POLYBENCH_HEAT_3D::POLYBENCH_HEAT_3D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } POLYBENCH_HEAT_3D::~POLYBENCH_HEAT_3D() diff --git a/src/polybench/POLYBENCH_HEAT_3D.hpp b/src/polybench/POLYBENCH_HEAT_3D.hpp index 8d7eff93c..fd641dbed 100644 --- a/src/polybench/POLYBENCH_HEAT_3D.hpp +++ b/src/polybench/POLYBENCH_HEAT_3D.hpp @@ -124,6 +124,7 @@ class POLYBENCH_HEAT_3D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); @@ -140,6 +141,8 @@ class POLYBENCH_HEAT_3D : public KernelBase Index_type m_N; Index_type m_tsteps; + Real_type m_factor; + Real_ptr m_A; Real_ptr m_B; Real_ptr m_Ainit; diff --git a/src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp b/src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp index a7e81fe87..3c6e34bdc 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D-Seq.cpp @@ -18,7 +18,6 @@ namespace rajaperf namespace polybench { - void POLYBENCH_JACOBI_1D::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps= getRunReps(); diff --git a/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp new file mode 100644 index 000000000..ba3a65f9d --- /dev/null +++ b/src/polybench/POLYBENCH_JACOBI_1D-StdPar.cpp @@ -0,0 +1,110 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_JACOBI_1D.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_JACOBI_1D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps= getRunReps(); + + POLYBENCH_JACOBI_1D_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N-2, + [=](Index_type i) { + POLYBENCH_JACOBI_1D_BODY1; + }); + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N-2, + [=](Index_type i) { + POLYBENCH_JACOBI_1D_BODY2; + }); + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_1D_DATA_RESET; + + break; + } + + case Lambda_StdPar : { + + auto poly_jacobi1d_lam1 = [=] (Index_type i) { + POLYBENCH_JACOBI_1D_BODY1; + }; + auto poly_jacobi1d_lam2 = [=] (Index_type i) { + POLYBENCH_JACOBI_1D_BODY2; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N-2, + [=](Index_type i) { + poly_jacobi1d_lam1(i); + }); + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), N-2, + [=](Index_type i) { + poly_jacobi1d_lam2(i); + }); + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_1D_DATA_RESET; + + break; + } + + default : { + getCout() << "\n POLYBENCH_JACOBI_1D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/polybench/POLYBENCH_JACOBI_1D.cpp b/src/polybench/POLYBENCH_JACOBI_1D.cpp index 925cd2682..33ca0dc3e 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D.cpp @@ -67,6 +67,9 @@ POLYBENCH_JACOBI_1D::POLYBENCH_JACOBI_1D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } POLYBENCH_JACOBI_1D::~POLYBENCH_JACOBI_1D() diff --git a/src/polybench/POLYBENCH_JACOBI_1D.hpp b/src/polybench/POLYBENCH_JACOBI_1D.hpp index 035096f89..4a94f891e 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D.hpp +++ b/src/polybench/POLYBENCH_JACOBI_1D.hpp @@ -70,6 +70,7 @@ class POLYBENCH_JACOBI_1D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp new file mode 100644 index 000000000..948113937 --- /dev/null +++ b/src/polybench/POLYBENCH_JACOBI_2D-StdPar.cpp @@ -0,0 +1,169 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_JACOBI_2D.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_JACOBI_2D::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps= getRunReps(); + + POLYBENCH_JACOBI_2D_DATA_SETUP; + + const auto n2 = (N-2); + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n2*n2, + [=](Index_type ij) { + const auto i = 1 + ij / n2; + const auto j = 1 + ij % n2; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), n2, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), n2, + [=](Index_type j) { +#endif + POLYBENCH_JACOBI_2D_BODY1; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n2*n2, + [=](Index_type ij) { + const auto i = 1 + ij / n2; + const auto j = 1 + ij % n2; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), n2, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), n2, + [=](Index_type j) { +#endif + POLYBENCH_JACOBI_2D_BODY2; +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_2D_DATA_RESET; + + break; + } + + case Lambda_StdPar : { + + auto poly_jacobi2d_base_lam1 = [=](Index_type i, Index_type j) { + POLYBENCH_JACOBI_2D_BODY1; + }; + auto poly_jacobi2d_base_lam2 = [=](Index_type i, Index_type j) { + POLYBENCH_JACOBI_2D_BODY2; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n2*n2, + [=](Index_type ij) { + const auto i = 1 + ij / n2; + const auto j = 1 + ij % n2; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), n2, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), n2, + [=](Index_type j) { +#endif + poly_jacobi2d_base_lam1(i, j); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + +#ifdef USE_STDPAR_COLLAPSE + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), n2*n2, + [=](Index_type ij) { + const auto i = 1 + ij / n2; + const auto j = 1 + ij % n2; +#else + std::for_each_n( std::execution::par_unseq, + counting_iterator(1), n2, + [=](Index_type i) { + std::for_each_n( std::execution::unseq, + counting_iterator(1), n2, + [=](Index_type j) { +#endif + poly_jacobi2d_base_lam2(i, j); +#ifndef USE_STDPAR_COLLAPSE + }); +#endif + }); + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_2D_DATA_RESET; + + break; + } + + default : { + getCout() << "\n POLYBENCH_JACOBI_2D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/polybench/POLYBENCH_JACOBI_2D.cpp b/src/polybench/POLYBENCH_JACOBI_2D.cpp index e1b62d364..4e1d97598 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D.cpp @@ -69,6 +69,9 @@ POLYBENCH_JACOBI_2D::POLYBENCH_JACOBI_2D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } POLYBENCH_JACOBI_2D::~POLYBENCH_JACOBI_2D() diff --git a/src/polybench/POLYBENCH_JACOBI_2D.hpp b/src/polybench/POLYBENCH_JACOBI_2D.hpp index 49ab2cd40..17bd86c41 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D.hpp +++ b/src/polybench/POLYBENCH_JACOBI_2D.hpp @@ -90,6 +90,7 @@ class POLYBENCH_JACOBI_2D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/polybench/POLYBENCH_MVT-StdPar.cpp b/src/polybench/POLYBENCH_MVT-StdPar.cpp new file mode 100644 index 000000000..27867a184 --- /dev/null +++ b/src/polybench/POLYBENCH_MVT-StdPar.cpp @@ -0,0 +1,132 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_MVT.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_MVT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps= getRunReps(); + + POLYBENCH_MVT_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type i) { + POLYBENCH_MVT_BODY1; + std::for_each_n( std::execution::unseq, + counting_iterator(0), N, + [=,&dot](Index_type j) { + POLYBENCH_MVT_BODY2; + }); + POLYBENCH_MVT_BODY3; + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type i) { + POLYBENCH_MVT_BODY4; + std::for_each_n( std::execution::unseq, + counting_iterator(0), N, + [=,&dot](Index_type j) { + POLYBENCH_MVT_BODY5; + }); + POLYBENCH_MVT_BODY6; + }); + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto poly_mvt_base_lam2 = [=] (Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_MVT_BODY2; + }; + auto poly_mvt_base_lam3 = [=] (Index_type i, + Real_type &dot) { + POLYBENCH_MVT_BODY3; + }; + auto poly_mvt_base_lam5 = [=] (Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_MVT_BODY5; + }; + auto poly_mvt_base_lam6 = [=] (Index_type i, + Real_type &dot) { + POLYBENCH_MVT_BODY6; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type i) { + POLYBENCH_MVT_BODY1; + std::for_each_n( std::execution::unseq, + counting_iterator(0), N, + [=,&dot](Index_type j) { + poly_mvt_base_lam2(i, j, dot); + }); + poly_mvt_base_lam3(i, dot); + }); + + std::for_each_n( std::execution::par_unseq, + counting_iterator(0), N, + [=](Index_type i) { + POLYBENCH_MVT_BODY4; + std::for_each_n( std::execution::unseq, + counting_iterator(0), N, + [=,&dot](Index_type j) { + poly_mvt_base_lam5(i, j, dot); + }); + poly_mvt_base_lam6(i, dot); + }); + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n POLYBENCH_MVT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace polybench +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/polybench/POLYBENCH_MVT.cpp b/src/polybench/POLYBENCH_MVT.cpp index e58065f28..637431ed9 100644 --- a/src/polybench/POLYBENCH_MVT.cpp +++ b/src/polybench/POLYBENCH_MVT.cpp @@ -62,6 +62,9 @@ POLYBENCH_MVT::POLYBENCH_MVT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); } POLYBENCH_MVT::~POLYBENCH_MVT() diff --git a/src/polybench/POLYBENCH_MVT.hpp b/src/polybench/POLYBENCH_MVT.hpp index 518d75dd8..fb0adbcb9 100644 --- a/src/polybench/POLYBENCH_MVT.hpp +++ b/src/polybench/POLYBENCH_MVT.hpp @@ -112,6 +112,7 @@ class POLYBENCH_MVT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/stream/ADD-StdPar.cpp b/src/stream/ADD-StdPar.cpp new file mode 100644 index 000000000..968a24f58 --- /dev/null +++ b/src/stream/ADD-StdPar.cpp @@ -0,0 +1,105 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "ADD.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace stream +{ + + +void ADD::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + ADD_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#if 0 + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + ADD_BODY; + }); +#else + std::transform( std::execution::par_unseq, + &a[ibegin], &a[iend], &b[ibegin], &c[ibegin], + [=](Real_type a, Real_type b) { return a + b; }); +#endif + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + +#if 0 + auto add_lam = [=](Index_type i) { + ADD_BODY; + }; +#else + auto add_lam = [=](Real_type a, Real_type b) { + return a + b; + }; +#endif + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#if 0 + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + add_lam(i); + }); +#else + std::transform( std::execution::par_unseq, + &a[ibegin], &a[iend], &b[ibegin], &c[ibegin], + add_lam ); +#endif + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n ADD : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace stream +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/stream/ADD.cpp b/src/stream/ADD.cpp index 3ca91bed5..c58a73322 100644 --- a/src/stream/ADD.cpp +++ b/src/stream/ADD.cpp @@ -53,6 +53,9 @@ ADD::ADD(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/stream/ADD.hpp b/src/stream/ADD.hpp index 49e09a602..34ed50cf0 100644 --- a/src/stream/ADD.hpp +++ b/src/stream/ADD.hpp @@ -52,6 +52,7 @@ class ADD : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/stream/CMakeLists.txt b/src/stream/CMakeLists.txt index 03351ff5d..7a0e0b1fc 100644 --- a/src/stream/CMakeLists.txt +++ b/src/stream/CMakeLists.txt @@ -10,30 +10,35 @@ blt_add_library( NAME stream SOURCES ADD.cpp ADD-Seq.cpp + ADD-StdPar.cpp ADD-Hip.cpp ADD-Cuda.cpp ADD-OMP.cpp ADD-OMPTarget.cpp COPY.cpp COPY-Seq.cpp + COPY-StdPar.cpp COPY-Hip.cpp COPY-Cuda.cpp COPY-OMP.cpp COPY-OMPTarget.cpp DOT.cpp DOT-Seq.cpp + DOT-StdPar.cpp DOT-Hip.cpp DOT-Cuda.cpp DOT-OMP.cpp DOT-OMPTarget.cpp MUL.cpp MUL-Seq.cpp + MUL-StdPar.cpp MUL-Hip.cpp MUL-Cuda.cpp MUL-OMP.cpp MUL-OMPTarget.cpp TRIAD.cpp TRIAD-Seq.cpp + TRIAD-StdPar.cpp TRIAD-Hip.cpp TRIAD-Cuda.cpp TRIAD-OMPTarget.cpp diff --git a/src/stream/COPY-StdPar.cpp b/src/stream/COPY-StdPar.cpp new file mode 100644 index 000000000..488350a56 --- /dev/null +++ b/src/stream/COPY-StdPar.cpp @@ -0,0 +1,96 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "COPY.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace stream +{ + + +void COPY::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + COPY_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#if 0 + std::copy( std::execution::par_unseq, + &a[ibegin], &a[iend], &c[ibegin]); +#else + std::transform( std::execution::par_unseq, + &a[ibegin], &a[iend], &c[ibegin], + [=](Real_type a) { return a; }); +#endif + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + +#if 1 + auto copy_lam = [=](Real_type a) { + return a; + }; +#endif + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#if 0 + std::copy( std::execution::par_unseq, + &a[ibegin], &a[iend], &c[ibegin]); +#else + std::transform( std::execution::par_unseq, + &a[ibegin], &a[iend], &c[ibegin], + copy_lam ); +#endif + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n COPY : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace stream +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/stream/COPY.cpp b/src/stream/COPY.cpp index 9cbfcbff2..51df2fafb 100644 --- a/src/stream/COPY.cpp +++ b/src/stream/COPY.cpp @@ -53,6 +53,9 @@ COPY::COPY(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/stream/COPY.hpp b/src/stream/COPY.hpp index 0544e0d2f..574364388 100644 --- a/src/stream/COPY.hpp +++ b/src/stream/COPY.hpp @@ -51,6 +51,7 @@ class COPY : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/stream/DOT-StdPar.cpp b/src/stream/DOT-StdPar.cpp new file mode 100644 index 000000000..b00a9c5a7 --- /dev/null +++ b/src/stream/DOT-StdPar.cpp @@ -0,0 +1,96 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DOT.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace stream +{ + + +void DOT::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + DOT_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type dot = m_dot_init; + + dot += std::transform_reduce( std::execution::par_unseq, + &a[ibegin], &a[iend], &b[ibegin], + (Real_type)0); + + m_dot += dot; + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + + auto dot_base_lam = [=](Index_type i) -> Real_type { + return a[i] * b[i]; + }; + + auto begin = counting_iterator(ibegin); + auto end = counting_iterator(iend); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type dot = m_dot_init; + + dot += std::transform_reduce( std::execution::par_unseq, + begin,end, + (Real_type)0, + std::plus(), + dot_base_lam); + + m_dot += dot; + + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n DOT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace stream +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/stream/DOT.cpp b/src/stream/DOT.cpp index cc32be5f2..4e4d713be 100644 --- a/src/stream/DOT.cpp +++ b/src/stream/DOT.cpp @@ -53,6 +53,9 @@ DOT::DOT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/stream/DOT.hpp b/src/stream/DOT.hpp index 5912c120a..55cab2826 100644 --- a/src/stream/DOT.hpp +++ b/src/stream/DOT.hpp @@ -51,6 +51,7 @@ class DOT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/stream/MUL-StdPar.cpp b/src/stream/MUL-StdPar.cpp new file mode 100644 index 000000000..731cee15d --- /dev/null +++ b/src/stream/MUL-StdPar.cpp @@ -0,0 +1,105 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MUL.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace stream +{ + + +void MUL::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MUL_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#if 0 + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + MUL_BODY; + }); +#else + std::transform( std::execution::par_unseq, + &c[ibegin], &c[iend], &b[ibegin], + [=](Real_type c) { return alpha * c; }); +#endif + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + +#if 0 + auto mul_lam = [=](Index_type i) { + MUL_BODY; + }; +#else + auto mul_lam = [=](Real_type c) { + return alpha * c; + }; +#endif + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#if 0 + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + mul_lam(i); + }); +#else + std::transform( std::execution::par_unseq, + &c[ibegin], &c[iend], &b[ibegin], + mul_lam ); +#endif + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n MUL : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace stream +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/stream/MUL.cpp b/src/stream/MUL.cpp index 74ce32cb0..0bad0d536 100644 --- a/src/stream/MUL.cpp +++ b/src/stream/MUL.cpp @@ -53,6 +53,9 @@ MUL::MUL(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/stream/MUL.hpp b/src/stream/MUL.hpp index 3db59092a..4596a7d39 100644 --- a/src/stream/MUL.hpp +++ b/src/stream/MUL.hpp @@ -52,6 +52,7 @@ class MUL : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); diff --git a/src/stream/TRIAD-StdPar.cpp b/src/stream/TRIAD-StdPar.cpp new file mode 100644 index 000000000..93d08a2dd --- /dev/null +++ b/src/stream/TRIAD-StdPar.cpp @@ -0,0 +1,105 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-22, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "TRIAD.hpp" + +#include "RAJA/RAJA.hpp" + +#if defined(BUILD_STDPAR) + +#include "common/StdParUtils.hpp" + +#include + +namespace rajaperf +{ +namespace stream +{ + + +void TRIAD::runStdParVariant(VariantID vid, size_t tune_idx) +{ +#if defined(RUN_STDPAR) + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + TRIAD_DATA_SETUP; + + switch ( vid ) { + + case Base_StdPar : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#if 0 + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + TRIAD_BODY; + }); +#else + std::transform( std::execution::par_unseq, + &b[ibegin], &b[iend], &c[ibegin], &a[ibegin], + [=](Real_type b, Real_type c) { return b + alpha * c; }); +#endif + + } + stopTimer(); + + break; + } + + case Lambda_StdPar : { + +#if 0 + auto triad_lam = [=](Index_type i) { + TRIAD_BODY; + }; +#else + auto triad_lam = [=](Real_type b, Real_type c) { + return b + alpha * c; + }; +#endif + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +#if 0 + std::for_each_n( std::execution::par_unseq, + counting_iterator(ibegin), iend-ibegin, + [=](Index_type i) { + triad_lam(i); + }); +#else + std::transform( std::execution::par_unseq, + &b[ibegin], &b[iend], &c[ibegin], &a[ibegin], + triad_lam ); +#endif + } + stopTimer(); + + break; + } + + default : { + getCout() << "\n TRIAD : Unknown variant id = " << vid << std::endl; + } + + } + +#endif +} + +} // end namespace stream +} // end namespace rajaperf + +#endif // BUILD_STDPAR + diff --git a/src/stream/TRIAD.cpp b/src/stream/TRIAD.cpp index 4790707bb..cb26ba43c 100644 --- a/src/stream/TRIAD.cpp +++ b/src/stream/TRIAD.cpp @@ -57,6 +57,9 @@ TRIAD::TRIAD(const RunParams& params) setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Base_StdPar ); + setVariantDefined( Lambda_StdPar ); + setVariantDefined( Kokkos_Lambda ); } diff --git a/src/stream/TRIAD.hpp b/src/stream/TRIAD.hpp index 3f65bf804..1f7a3dd41 100644 --- a/src/stream/TRIAD.hpp +++ b/src/stream/TRIAD.hpp @@ -53,6 +53,7 @@ class TRIAD : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runStdParVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid);