diff --git a/.gitmodules b/.gitmodules index 047c42160..fa2e028fb 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,6 +4,9 @@ [submodule "tpl/RAJA"] path = tpl/RAJA url = https://github.com/LLNL/RAJA.git +[submodule "tpl/kokkos"] + path = tpl/kokkos + url = https://github.com/kokkos/kokkos [submodule "scripts/radiuss-spack-configs"] path = scripts/radiuss-spack-configs url = https://github.com/LLNL/radiuss-spack-configs.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 4fc8c256a..1cdab4474 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,11 +7,11 @@ ############################################################################### project(RAJAPerfSuite CXX) - cmake_minimum_required(VERSION 3.9) option(ENABLE_RAJA_SEQUENTIAL "Run sequential variants of RAJA kernels. Disable this, and all other variants, to run _only_ raw C loops." On) +option(ENABLE_KOKKOS "Include Kokkos implementations of the kernels in the RAJA Perfsuite" Off) # # Initialize the BLT build system @@ -25,14 +25,14 @@ set(ENABLE_TESTS Off CACHE BOOL "Enable BLT and RAJA tests") set(ENABLE_EXAMPLES Off CACHE BOOL "Enable RAJA examples") set(RAJA_ENABLE_EXERCISES Off CACHE BOOL "Enable RAJA exercises") -set(CMAKE_CXX_STANDARD 11) -set(BLT_CXX_STANDARD 11) - +if(ENABLE_KOKKOS) + set(CMAKE_CXX_STANDARD 14) + set(BLT_CXX_STANDARD 14) +endif() include(blt/SetupBLT.cmake) # # Define RAJA settings... -# set(ENABLE_TESTS Off CACHE BOOL "") set(ENABLE_EXAMPLES Off CACHE BOOL "") @@ -53,11 +53,8 @@ add_subdirectory(tpl/RAJA) get_property(RAJA_INCLUDE_DIRS DIRECTORY tpl/RAJA PROPERTY INCLUDE_DIRECTORIES) include_directories(${RAJA_INCLUDE_DIRS}) - # # Setup variables to pass to Perf suite -# - # # These (hopefully temporary) macro constants are needed to work-around # performance issues in the xl compiler. @@ -80,8 +77,13 @@ if (ENABLE_OPENMP) endif() if (ENABLE_CUDA) list(APPEND RAJA_PERFSUITE_DEPENDS cuda) -endif() -if (ENABLE_HIP) +endif() + +# Kokkos requires hipcc as the CMAKE_CXX_COMPILER for HIP AMD/VEGA GPU +# platforms, whereas RAJAPerf Suite uses blt/CMake FindHIP to set HIP compiler +# Separate RAJAPerf Suite and Kokkos handling of HIP compilers + +if ((ENABLE_HIP) AND (NOT ENABLE_KOKKOS)) list(APPEND RAJA_PERFSUITE_DEPENDS hip) endif() @@ -89,9 +91,10 @@ set(RAJAPERF_BUILD_SYSTYPE $ENV{SYS_TYPE}) set(RAJAPERF_BUILD_HOST $ENV{HOSTNAME}) if (ENABLE_CUDA) - set(CMAKE_CUDA_STANDARD 11) + if (ENABLE_CUDA AND ENABLE_KOKKOS) + set(CMAKE_CUDA_STANDARD 14) + endif() set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict -arch ${CUDA_ARCH} --expt-extended-lambda --expt-relaxed-constexpr") - set(RAJAPERF_COMPILER "${CUDA_NVCC_EXECUTABLE}") list(APPEND RAJAPERF_COMPILER ${CMAKE_CXX_COMPILER}) set(RAJAPERF_COMPILER_OPTIONS "${CUDA_NVCC_FLAGS}") @@ -106,14 +109,58 @@ else() list(APPEND RAJAPERF_COMPILER_OPTIONS ${CMAKE_CXX_FLAGS}) endif() -configure_file(${CMAKE_SOURCE_DIR}/src/rajaperf_config.hpp.in +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/rajaperf_config.hpp.in ${CMAKE_CURRENT_BINARY_DIR}/bin/rajaperf_config.hpp) -# Make sure RAJA flag propagate (we need to do some house cleaning to +# Make sure RAJA flags propagate (we need to do some tidying to # remove project-specific CMake variables that are no longer needed) set (CUDA_NVCC_FLAGS ${RAJA_NVCC_FLAGS}) -# -# Each directory in the perf suite has its own CMakeLists.txt file. -# +# The statement below is required for Kokkos compilation. +if(ENABLE_KOKKOS) + include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/tpl/RAJA/include/) +endif() + + +# ENABLE_KOKKOS is A RAJAPerf Suite Option +if(ENABLE_KOKKOS) + add_definitions(-DRUN_KOKKOS) + if(ENABLE_HIP) + set(Kokkos_ENABLE_HIP ON CACHE BOOL "Kokkos builds for AMD HIP set the +Kokkos_ENABLE_HIP variable to ON") + #set(Kokkos_ARCH_VEGA900 ON CACHE BOOL "Docstring") #TODO: better + endif() + if(ENABLE_TARGET_OPENMP) + set(Kokkos_ENABLE_OPENMPTARGET ON CACHE BOOL "Docstring") + set(Kokkos_ARCH_VOLTA70 ON CACHE BOOL "Docstring") #TODO: better + set(CMAKE_CXX_STANDARD 17) + set(BLT_CXX_STANDARD 17) + set(RAJA_ENABLE_TARGET_OPENMP ON CACHE BOOL "Docstring") + if(NOT CMAKE_BUILD_TYPE MATCHES Debug) + if(NOT EXPERIMENTAL_BUILD) + message(FATAL_ERROR "Kokkos builds with OpenMPTarget require a Debug build to succeed at the moment. Rebuild with CMAKE_BUILD_TYPE=Debug. If you're a compiler developer, rebuild with -DEXPERIMENTAL_BUILD=ON") + endif() + endif() + + #add_definitions(-DRAJA_ENABLE_TARGET_OPENMP) + endif() + +# ENABLE_CUDA IS A RAJA PERFSUITE OPTION + if(ENABLE_CUDA) + set(Kokkos_ENABLE_CUDA ON CACHE BOOL "Docstring") + set(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "Docstring") + set(Kokkos_ARCH_VOLTA70 ON CACHE BOOL "Docstring") #TODO: better + enable_language(CUDA) + endif() + if(ENABLE_OPENMP) + set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "Docstring") + endif() + + add_subdirectory(tpl/kokkos) + get_property(KOKKOS_INCLUDE_DIRS DIRECTORY tpl/kokkos PROPERTY INCLUDE_DIRECTORIES) + include_directories(${KOKKOS_INCLUDE_DIRS}) + + list(APPEND RAJA_PERFSUITE_DEPENDS kokkos) +endif() + add_subdirectory(src) diff --git a/blt b/blt index ddd5a0ca7..d14490144 160000 --- a/blt +++ b/blt @@ -1 +1 @@ -Subproject commit ddd5a0ca7c566d0ae14270b66625c8a363630ddb +Subproject commit d144901443362ff153291121717a28778a703c60 diff --git a/scripts/config/watchr_KokkosConfig.json b/scripts/config/watchr_KokkosConfig.json new file mode 100755 index 000000000..ec10e2e8a --- /dev/null +++ b/scripts/config/watchr_KokkosConfig.json @@ -0,0 +1,132 @@ +{ + "plots" : { + "files" : { + "fileName": "RAJAPerf*", + "type" : "xml", + "ignoreOldFiles" : true, + "recurseDirectories" : true + }, + "categories": [ + "kokkos_lambda", + "lambda_cuda", + "base_cuda", + "base_seq", + "lambda_seq", + "raja_cuda", + "raja_seq" + ], + "plot" : [ + { + "autoname" : { + "useProperty" : "y/path", + "formatByRemovingPrefix" : "\\/RAJAPerf\\d{4}\\-\\d{2}\\-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.xml\\/" + }, + "category" : "kokkos_lambda", + "template" : "kokkos_template", + "dataLines" : [ + { + "name" : "Data Line", + "template" : "Line_Template", + "x" : { + "getPath": "*", + "getElement" : "performance-report", + "getKey" : "date", + "unit" : "timestamp" + }, + "y" : { + "getElement" : "performance-report|timing", + "getPath": "*/kokkos_perf_suite/*", + "getPathAttribute": "name", + "getKey" : "kokkos_lambda", + "unit" : "seconds", + "strategy" : { + "getFirstMatchOnly" : "false", + "recurseChildGraphs" : "true" + } + }, + "color" : "202,77,77" + } + ] + },{ + "inherit" : "kokkos_template", + "category" : "base_cuda", + "dataLines" : [ + { + "inherit" : "Line_Template", + "y" : { + "getKey" : "base_cuda" + } + } + ] + }, { + "inherit" : "kokkos_template", + "category" : "base_seq", + "dataLines" : [ + { + "inherit" : "Line_Template", + "y" : { + "getKey" : "base_seq" + } + } + ] + }, { + + "inherit" : "kokkos_template", + "category" : "lambda_seq", + "dataLines" : [ + { + "inherit" : "Line_Template", + "y" : { + "getKey" : "lambda_seq" + } + } + ] + + },{ + "inherit" : "kokkos_template", + "category" : "lambda_cuda", + "dataLines" : [ + { + "inherit" : "Line_Template", + "y" : { + "getKey" : "lambda_cuda" + } + } + ] + },{ + "inherit" : "kokkos_template", + "category" : "raja_cuda", + "dataLines" : [ + { + "inherit" : "Line_Template", + "y" : { + "getKey" : "raja_cuda" + } + } + ] + }, { + "inherit" : "kokkos_template", + "category" : "raja_seq", + "dataLines" : [ + { + "inherit" : "Line_Template", + "y" : { + "getKey" : "raja_seq" + } + } + ] + } + ] + }, + "graphDisplay": { + "dbLocation" : "root", + "page" : 1, + "displayCategory" : "kokkos_lambda", + "displayRange" : 30, + "graphWidth" : 450, + "graphHeight" : 450, + "graphsPerRow" : 3, + "graphsPerPage" : 15, + "displayedDecimalPlaces" : 3 + } +} diff --git a/scripts/snl-builds/caraway_rhel7_hipcc_4.0.0.sh b/scripts/snl-builds/caraway_rhel7_hipcc_4.0.0.sh new file mode 100755 index 000000000..6ecde9d4f --- /dev/null +++ b/scripts/snl-builds/caraway_rhel7_hipcc_4.0.0.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +############################################################################### +# Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +# and RAJA Performance Suite project contributors. +# See the RAJAPerf/COPYRIGHT file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +################################################################################# + +BUILD_SUFFIX=snl_rhel7-hipcc-4.0.0 + +rm -rf build_${BUILD_SUFFIX} 2>/dev/null +mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} + +################################## +#Caraway Build (AMD) +################################# +module purge + +module load cmake/3.19.3 + +module load git/2.9.4 + +################################## +# FOR COMPUTE NODE (caraway04 GPU): + +module load rocm/4.0.0 + +module load python/3.7.3 + +cmake \ +-DCMAKE_BUILD_TYPE=Release \ +-DENABLE_KOKKOS=ON \ +-DENABLE_HIP=ON \ +-DKokkos_ARCH_VEGA900=ON \ +-DCMAKE_CXX_FLAGS="--gcc-toolchain=/home/projects/x86-64/gcc/8.2.0/" \ +-DHIP_HIPCC_FLAGS="--gcc-toolchain=/home/projects/x86-64/gcc/8.2.0/ -std=c++17" \ +-DCMAKE_CXX_STANDARD=17 \ +-DCMAKE_CXX_COMPILER=hipcc .. \ + +make -j24;make + +cd bin/ +./raja-perf.exe + + + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index aff1d7326..b648d8c18 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,30 +8,67 @@ include_directories(.) +# The INFRASTRUCTURE_ONLY option is for the scenario where +# ONLY the RAJAPerf Suite infrastructure is used as the driver for +# Kokkos and Kokkos Kernels performance tests add_subdirectory(common) -add_subdirectory(apps) -add_subdirectory(basic) -add_subdirectory(lcals) -add_subdirectory(polybench) -add_subdirectory(stream) -add_subdirectory(algorithm) -set(RAJA_PERFSUITE_EXECUTABLE_DEPENDS - common - apps +if(NOT INFRASTRUCTURE_ONLY) + add_subdirectory(algorithm) + add_subdirectory(apps) + add_subdirectory(basic) + add_subdirectory(lcals) + add_subdirectory(stream) + add_subdirectory(polybench) + if(ENABLE_KOKKOS) + # Kokkos translations + add_subdirectory(algorithm-kokkos) + add_subdirectory(apps-kokkos) + add_subdirectory(basic-kokkos) + add_subdirectory(lcals-kokkos) + add_subdirectory(stream-kokkos) + # Stub Kokkos implementations for polybench + add_subdirectory(polybench-kokkos) + endif() + +endif() + +set(RAJA_PERFSUITE_EXECUTABLE_DEPENDS common) +if(NOT INFRASTRUCTURE_ONLY) + list(APPEND RAJA_PERFSUITE_EXECUTABLE_DEPENDS basic + apps lcals - polybench stream - algorithm) -list(APPEND RAJA_PERFSUITE_EXECUTABLE_DEPENDS ${RAJA_PERFSUITE_DEPENDS}) + algorithm + polybench) +endif() +if(ENABLE_KOKKOS) + list(APPEND RAJA_PERFSUITE_EXECUTABLE_DEPENDS + basic-kokkos + apps-kokkos + lcals-kokkos + stream-kokkos + algorithm-kokkos + # Stub implementation of polybench for Kokkos + polybench-kokkos + ) +endif() +#endif() +# This line must be kept +list(APPEND RAJA_PERFSUITE_EXECUTABLE_DEPENDS ${RAJA_PERFSUITE_DEPENDS}) if(ENABLE_TARGET_OPENMP) remove_definitions(-DRUN_RAJA_SEQ -DRUN_OPENMP ) +include_directories(basic) +include_directories(lcals) +include_directories(apps) +include_directories(algorithm) +include_directories(stream) +include_directories(polybench) +list(APPEND RAJA_PERF_OMP_SOURCES -blt_add_executable( - NAME raja-perf-omptarget.exe - SOURCES RAJAPerfSuiteDriver.cpp +RAJAPerfSuiteDriver.cpp apps/AppsData.cpp apps/DEL_DOT_VEC_2D.cpp apps/DEL_DOT_VEC_2D-Seq.cpp @@ -66,7 +103,13 @@ blt_add_executable( apps/VOL3D.cpp apps/VOL3D-Seq.cpp apps/VOL3D-OMPTarget.cpp - apps/WIP-COUPLE.cpp + #apps/WIP-COUPLE.cpp + basic/ATOMIC_PI.cpp + basic/ATOMIC_PI-Seq.cpp + basic/ATOMIC_PI-OMPTarget.cpp + basic/PI_ATOMIC.cpp + basic/PI_ATOMIC-Seq.cpp + basic/PI_ATOMIC-OMPTarget.cpp basic/DAXPY.cpp basic/DAXPY-Seq.cpp basic/DAXPY-OMPTarget.cpp @@ -190,6 +233,10 @@ blt_add_executable( stream/TRIAD.cpp stream/TRIAD-Seq.cpp stream/TRIAD-OMPTarget.cpp + algorithm/SORT.cpp + algorithm/SORT-Seq.cpp + algorithm/SORTPAIRS.cpp + algorithm/SORTPAIRS-Seq.cpp common/DataUtils.cpp common/Executor.cpp common/KernelBase.cpp @@ -197,17 +244,71 @@ blt_add_executable( common/RAJAPerfSuite.cpp common/RPTypes.hpp common/RunParams.cpp - algorithm/SORT.cpp - algorithm/SORT-Seq.cpp - algorithm/SORTPAIRS.cpp - algorithm/SORTPAIRS-Seq.cpp +) +if(ENABLE_KOKKOS) +list(APPEND RAJA_PERF_OMP_SOURCES + #Kokkos translations + apps-kokkos/DEL_DOT_VEC_2D-Kokkos.cpp + apps-kokkos/ENERGY-Kokkos.cpp + apps-kokkos/FIR-Kokkos.cpp + apps-kokkos/HALOEXCHANGE-Kokkos.cpp + apps-kokkos/PRESSURE-Kokkos.cpp + apps-kokkos/LTIMES-Kokkos.cpp + apps-kokkos/LTIMES_NOVIEW-Kokkos.cpp + apps-kokkos/VOL3D-Kokkos.cpp + basic-kokkos/PI_ATOMIC-Kokkos.cpp + basic-kokkos/DAXPY-Kokkos.cpp + basic-kokkos/IF_QUAD-Kokkos.cpp + basic-kokkos/INIT3-Kokkos.cpp + basic-kokkos/INIT_VIEW1D-Kokkos.cpp + basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp + basic-kokkos/MULADDSUB-Kokkos.cpp + basic-kokkos/NESTED_INIT-Kokkos.cpp + basic-kokkos/REDUCE3_INT-Kokkos.cpp + basic-kokkos/TRAP_INT-Kokkos.cpp + lcals-kokkos/DIFF_PREDICT-Kokkos.cpp + lcals-kokkos/EOS-Kokkos.cpp + lcals-kokkos/FIRST_DIFF-Kokkos.cpp + lcals-kokkos/FIRST_MIN-Kokkos.cpp + lcals-kokkos/FIRST_SUM-Kokkos.cpp + lcals-kokkos/GEN_LIN_RECUR-Kokkos.cpp + lcals-kokkos/HYDRO_1D-Kokkos.cpp + lcals-kokkos/HYDRO_2D-Kokkos.cpp + lcals-kokkos/INT_PREDICT-Kokkos.cpp + lcals-kokkos/PLANCKIAN-Kokkos.cpp + lcals-kokkos/TRIDIAG_ELIM-Kokkos.cpp + stream-kokkos/ADD-Kokkos.cpp + stream-kokkos/COPY-Kokkos.cpp + stream-kokkos/DOT-Kokkos.cpp + stream-kokkos/MUL-Kokkos.cpp + algorithm-kokkos/SORT-Kokkos.cpp + algorithm-kokkos/SORTPAIRS-Kokkos.cpptream-kokkos/TRIAD-Kokkos.cpp + #Stub implementations for polybench-kokkos + polybench-kokkos/POLYBENCH_2MM-Seq.cpp + polybench-kokkos/POLYBENCH_3MM-Seq.cpp + polybench-kokkos/POLYBENCH_ADI-Seq.cpp + polybench-kokkos/POLYBENCH_ATAX-Seq.cpp + polybench-kokkos/POLYBENCH_FDTD_2D-Seq.cpp + polybench-kokkos/POLYBENCH_FLOYD_WARSHALL-Seq.cpp + polybench-kokkos/POLYBENCH_GEMM-Seq.cpp + polybench-kokkos/POLYBENCH_GEMVER-Seq.cpp + polybench-kokkos/POLYBENCH_GESUMMV-Seq.cpp + polybench-kokkos/POLYBENCH_HEAT_3D-Seq.cpp + polybench-kokkos/POLYBENCH_JACOBI_1D-Seq.cpp + polybench-kokkos/POLYBENCH_JACOBI_2D-Seq.cpp + polybench-kokkos/POLYBENCH_MVT-Seq.cpp +) +endif() #ENABLE_KOKKOS +blt_add_executable( + NAME raja-perf-omptarget.exe + SOURCES ${RAJA_PERF_OMP_SOURCES} DEPENDS_ON ${RAJA_PERFSUITE_DEPENDS} ) +else() #ENABLE_TARGET_OPENMP -else() blt_add_executable( NAME raja-perf.exe SOURCES RAJAPerfSuiteDriver.cpp DEPENDS_ON ${RAJA_PERFSUITE_EXECUTABLE_DEPENDS} ) -endif() +endif() # NOT INFRASTRUCTURE_ONLY diff --git a/src/RAJAPerfSuiteDriver.cpp b/src/RAJAPerfSuiteDriver.cpp index c47ecd9f1..a58de4240 100644 --- a/src/RAJAPerfSuiteDriver.cpp +++ b/src/RAJAPerfSuiteDriver.cpp @@ -7,28 +7,39 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "common/Executor.hpp" - +#include "common/QuickKernelBase.hpp" #include //------------------------------------------------------------------------------ int main( int argc, char** argv ) { - // STEP 1: Create suite executor object - rajaperf::Executor executor(argc, argv); + // Create suite executor object with the arguments that were passed in + // rajaperf::Executor executor(argc, argv); + +#if defined(RUN_KOKKOS) + Kokkos::initialize(argc, argv); +#endif // RUN_KOKKOS - // STEP 2: Assemble kernels and variants to run + rajaperf::Executor executor(argc, argv); + rajaperf::make_perfsuite_executor(&executor, argc, argv); + + // Assemble kernels and variants to run executor.setupSuite(); - // STEP 3: Report suite run summary + // Report suite run summary // (enable users to catch errors before entire suite is run) executor.reportRunSummary(std::cout); - // STEP 4: Execute suite + // Execute suite of selected tests executor.runSuite(); - // STEP 5: Generate suite execution reports + // Generate suite execution reports executor.outputRunData(); +#if defined(RUN_KOKKOS) + Kokkos::finalize(); // TODO DZP: should this be here? Good question. AJP +#endif + std::cout << "\n\nDONE!!!...." << std::endl; return 0; diff --git a/src/algorithm-kokkos/CMakeLists.txt b/src/algorithm-kokkos/CMakeLists.txt new file mode 100644 index 000000000..78b10b113 --- /dev/null +++ b/src/algorithm-kokkos/CMakeLists.txt @@ -0,0 +1,17 @@ +############################################################################### +# Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +# and RAJA Performance Suite project contributors. +# See the RAJAPerf/COPYRIGHT file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/../algorithm) + + +blt_add_library( + NAME algorithm-kokkos + SOURCES SORT-Kokkos.cpp + SORTPAIRS-Kokkos.cpp + DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} + ) diff --git a/src/algorithm-kokkos/SORT-Kokkos.cpp b/src/algorithm-kokkos/SORT-Kokkos.cpp new file mode 100644 index 000000000..dfc0292fa --- /dev/null +++ b/src/algorithm-kokkos/SORT-Kokkos.cpp @@ -0,0 +1,70 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "SORT.hpp" +#include + + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void SORT::runKokkosVariant(VariantID vid) +{ + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + SORT_DATA_SETUP; + + // Wrap pointers in Kokkos view + + auto x_view = getViewFromPointer(x, iend*run_reps); + +#if defined (RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + //#define STD_SORT_ARGS vs. using RAJAPerf Suite expression + //x + iend*irep + ibegin, x + iend*irep + iend + + Kokkos::sort(x_view, iend*irep + ibegin, iend*irep + iend); + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default : { + std::cout << "\n SORT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(x, x_view, iend*run_reps); + + +} + +} // end namespace algorithm +} // end namespace rajaperf diff --git a/src/algorithm-kokkos/SORTPAIRS-Kokkos.cpp b/src/algorithm-kokkos/SORTPAIRS-Kokkos.cpp new file mode 100644 index 000000000..68504e3b6 --- /dev/null +++ b/src/algorithm-kokkos/SORTPAIRS-Kokkos.cpp @@ -0,0 +1,71 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "SORTPAIRS.hpp" + +#include "RAJA/RAJA.hpp" + +#include +#include +#include +#include + +namespace rajaperf +{ +namespace algorithm +{ + + +void SORTPAIRS::runKokkosVariant(VariantID vid) +{ + // Here, we are returning for configure, build and running purposes, + // because Kokkos does not yet have a "sort pairs" capability + return; + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + SORTPAIRS_DATA_SETUP; + + +#if defined (RUN_KOKKOS) + switch ( vid ) { + +/* + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + //RAJA::sort_pairs(RAJA_SORTPAIRS_ARGS); + + + }); + } + Kokkos::fence(); + stopTimer(); + + break; + } +*/ + default : { + std::cout << "\n SORTPAIRS : Unknown variant id = " << vid << std::endl; + } + + } +#endif // RUN_KOKKOS + + //moveDataToHostFromKokkosView(x, x_view, iend); + //moveDataToHostFromKokkosView(i, i_view, iend); + +} + +} // end namespace algorithm +} // end namespace rajaperf diff --git a/src/algorithm/SORT.cpp b/src/algorithm/SORT.cpp index d9d659482..7ca2f7e2d 100644 --- a/src/algorithm/SORT.cpp +++ b/src/algorithm/SORT.cpp @@ -41,6 +41,7 @@ SORT::SORT(const RunParams& params) setVariantDefined( RAJA_CUDA ); setVariantDefined( RAJA_HIP ); + setVariantDefined(Kokkos_Lambda); } SORT::~SORT() diff --git a/src/algorithm/SORT.hpp b/src/algorithm/SORT.hpp index f576bee97..4c1a3cd12 100644 --- a/src/algorithm/SORT.hpp +++ b/src/algorithm/SORT.hpp @@ -46,6 +46,7 @@ class SORT : public KernelBase void updateChecksum(VariantID vid); void tearDown(VariantID vid); + void runKokkosVariant(VariantID vid); void runSeqVariant(VariantID vid); void runOpenMPVariant(VariantID vid); void runCudaVariant(VariantID vid); @@ -55,6 +56,8 @@ class SORT : public KernelBase std::cout << "\n SORT : Unknown OMP Target variant id = " << vid << std::endl; } + + private: Real_ptr m_x; }; diff --git a/src/algorithm/SORTPAIRS.cpp b/src/algorithm/SORTPAIRS.cpp index 7f2e59cbb..0ff6f3cfb 100644 --- a/src/algorithm/SORTPAIRS.cpp +++ b/src/algorithm/SORTPAIRS.cpp @@ -41,6 +41,7 @@ SORTPAIRS::SORTPAIRS(const RunParams& params) setVariantDefined( RAJA_CUDA ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Kokkos_Lambda ); } SORTPAIRS::~SORTPAIRS() diff --git a/src/algorithm/SORTPAIRS.hpp b/src/algorithm/SORTPAIRS.hpp index b6f03005f..4708018b0 100644 --- a/src/algorithm/SORTPAIRS.hpp +++ b/src/algorithm/SORTPAIRS.hpp @@ -54,6 +54,8 @@ class SORTPAIRS : public KernelBase std::cout << "\n SORTPAIRS : Unknown OMP Target variant id = " << vid << std::endl; } + void runKokkosVariant(VariantID vid); + private: Real_ptr m_x; Real_ptr m_i; diff --git a/src/apps-kokkos/CMakeLists.txt b/src/apps-kokkos/CMakeLists.txt new file mode 100644 index 000000000..07802d26e --- /dev/null +++ b/src/apps-kokkos/CMakeLists.txt @@ -0,0 +1,23 @@ +############################################################################### +# Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +# and RAJA Performance Suite project contributors. +# See the RAJAPerf/COPYRIGHT file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + + +include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/../apps) + +blt_add_library( + NAME apps-kokkos + SOURCES DEL_DOT_VEC_2D-Kokkos.cpp + ENERGY-Kokkos.cpp + FIR-Kokkos.cpp + HALOEXCHANGE-Kokkos.cpp + LTIMES-Kokkos.cpp + LTIMES_NOVIEW-Kokkos.cpp + PRESSURE-Kokkos.cpp + VOL3D-Kokkos.cpp + DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} + ) diff --git a/src/apps-kokkos/DEL_DOT_VEC_2D-Kokkos.cpp b/src/apps-kokkos/DEL_DOT_VEC_2D-Kokkos.cpp new file mode 100644 index 000000000..9ccfcefed --- /dev/null +++ b/src/apps-kokkos/DEL_DOT_VEC_2D-Kokkos.cpp @@ -0,0 +1,201 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DEL_DOT_VEC_2D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "AppsData.hpp" + +#include "camp/resource.hpp" + +#include + +namespace rajaperf { +namespace apps { + +struct arrayOffSetStruct { + using ViewType = Kokkos::View; // Real_ptr is equivalent to float* + + // v's represent different offsets in different Kokkos views; + ViewType v, v4, v1, v2, v3; + + // constructor + arrayOffSetStruct(const std::string& name, // we needed a name, for future efforts + Index_type num_elements, // alloc size of head; + Index_type jp, // their macro took in jp, so we're using it + Real_ptr head // v, approximately; + ): + // ":" = list of things to initialize + v (getViewFromPointer(head, num_elements)), + // Initializing v4 with v + v4(v), + v1(Kokkos::subview(v4, std::make_pair(static_cast(1), v4.extent(0)))), + v2(Kokkos::subview(v1, std::make_pair(static_cast(jp), v1.extent(0)))), + v3(Kokkos::subview(v4, std::make_pair(static_cast(jp), v4.extent(0)))){ + } +}; + +void DEL_DOT_VEC_2D::runKokkosVariant(VariantID vid) { + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = m_domain->n_real_zones; + + DEL_DOT_VEC_2D_DATA_SETUP; + + NDSET2D(m_domain->jp, x, x1, x2, x3, x4); + NDSET2D(m_domain->jp, y, y1, y2, y3, y4); + NDSET2D(m_domain->jp, xdot, fx1, fx2, fx3, fx4); + NDSET2D(m_domain->jp, ydot, fy1, fy2, fy3, fy4); + + // Instantiating Kokkos view + auto div_view = getViewFromPointer(div, m_domain->nnalls); + + arrayOffSetStruct x_offsets("x_offsets", m_domain->nnalls, m_domain->jp, x ); + arrayOffSetStruct y_offsets("y_offsets", m_domain->nnalls, m_domain->jp, y ); + arrayOffSetStruct xdot_offsets("xdot_offsets", m_domain->nnalls, m_domain->jp, xdot ); + arrayOffSetStruct ydot_offsets("ydot_offsets", m_domain->nnalls, m_domain->jp, ydot ); + + auto& x_view = x_offsets.v; + auto& x1_view = x_offsets.v1; + auto& x2_view = x_offsets.v2; + auto& x3_view = x_offsets.v3; + auto& x4_view = x_offsets.v4; + + + auto& y_view = y_offsets.v; + auto& y1_view = y_offsets.v1; + auto& y2_view = y_offsets.v2; + auto& y3_view = y_offsets.v3; + auto& y4_view = y_offsets.v4; + + + auto& xdot_view = xdot_offsets.v; + auto& fx1_view = xdot_offsets.v1; + auto& fx2_view = xdot_offsets.v2; + auto& fx3_view = xdot_offsets.v3; + auto& fx4_view = xdot_offsets.v4; + + + auto& ydot_view = ydot_offsets.v; + auto& fy1_view = ydot_offsets.v1; + auto& fy2_view = ydot_offsets.v2; + auto& fy3_view = ydot_offsets.v3; + auto& fy4_view = ydot_offsets.v4; + + +#if defined(RUN_KOKKOS) + switch (vid) { + + case Kokkos_Lambda: { + + // Translation from RAJAPerf Suite to Kokkos notes: + // Host resource will be used for loop execution + // camp::resources::Resource working_res{camp::resources::Host()}; + + // List segment = indices you're iterating over are contained in lists; + + /* RAJA::TypedListSegment zones(m_domain->real_zones, + m_domain->n_real_zones, + working_res); + */ + + auto index_list = + getViewFromPointer(m_domain->real_zones, m_domain->n_real_zones); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // RAJA::forall(zones, deldotvec2d_lam); + Kokkos::parallel_for( + "DEL_DOT_VEC_2D Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type ii) { + // #define DEL_DOT_VEC_2D_BODY + int i = index_list[ii]; + + // Real_type xi = half * ( x1[i] + x2[i] - x3[i] - x4[i] ) ; + Real_type xi = + half * (x1_view[i] + x2_view[i] - x3_view[i] - + x4_view[i]); // Real_type xj = half * ( x2[i] + x3[i] + // - x4[i] - x1[i] ) ; + Real_type xj = + half * (x2_view[i] + x3_view[i] - x4_view[i] - + x1_view[i]); // Real_type yi = half * ( y1[i] + y2[i] + // - y3[i] - y4[i] ) ; + Real_type yi = + half * (y1_view[i] + y2_view[i] - y3_view[i] - + y4_view[i]); // Real_type yj = half * ( y2[i] + y3[i] + // - y4[i] - y1[i] ) ; + Real_type yj = + half * (y2_view[i] + y3_view[i] - y4_view[i] - + y1_view[i]); // Real_type fxi = half * ( fx1[i] + fx2[i] + // - fx3[i] - fx4[i] ) ; + Real_type fxi = + half * (fx1_view[i] + fx2_view[i] - fx3_view[i] - + fx4_view[i]); // Real_type fxj = half * ( fx2[i] + + // fx3[i] - fx4[i] - fx1[i] ) ; + Real_type fxj = + half * (fx2_view[i] + fx3_view[i] - fx4_view[i] - + fx1_view[i]); // Real_type fyi = half * ( fy1[i] + + // fy2[i] - fy3[i] - fy4[i] ) ; + Real_type fyi = + half * (fy1_view[i] + fy2_view[i] - fy3_view[i] - + fy4_view[i]); // Real_type fyj = half * ( fy2[i] + + // fy3[i] - fy4[i] - fy1[i] ) ; + Real_type fyj = + half * (fy2_view[i] + fy3_view[i] - fy4_view[i] - + fy1_view[i]); // Real_type rarea = 1.0 / ( xi * yj - xj + // * yi + ptiny ) ; + Real_type rarea = + 1.0 / + (xi * yj - xj * yi + + ptiny); // Real_type dfxdx = rarea * ( fxi * yj - fxj * yi ) ; + Real_type dfxdx = + rarea * (fxi * yj - fxj * yi); // Real_type dfydy = rarea * ( + // fyj * xi - fyi * xj ) ; + Real_type dfydy = + rarea * (fyj * xi - fyi * xj); /* Real_type affine = ( fy1[i] + + fy2[i] + fy3[i] + fy4[i] ) / \ + ( y1[i] + + y2[i] + y3[i] + y4[i] ) ; \ + */ + Real_type affine = + (fy1_view[i] + fy2_view[i] + fy3_view[i] + fy4_view[i]) / + (y1_view[i] + y2_view[i] + y3_view[i] + + y4_view[i]); // div[i] = dfxdx + dfydy + affine ; + div_view[i] = dfxdx + dfydy + affine; + } + + ); + } + stopTimer(); + + break; + } + + default: { + std::cout << "\n DEL_DOT_VEC_2D : Unknown variant id = " << vid + << std::endl; + } + +} + +#endif // RUN_KOKKOS + + + moveDataToHostFromKokkosView(x, x_view, m_domain->nnalls); + moveDataToHostFromKokkosView(y, y_view, m_domain->nnalls); + moveDataToHostFromKokkosView(xdot, xdot_view, m_domain->nnalls); + moveDataToHostFromKokkosView(ydot, ydot_view, m_domain->nnalls); + moveDataToHostFromKokkosView(div, div_view, m_domain->nnalls); + +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps-kokkos/ENERGY-Kokkos.cpp b/src/apps-kokkos/ENERGY-Kokkos.cpp new file mode 100644 index 000000000..a19bdfbc2 --- /dev/null +++ b/src/apps-kokkos/ENERGY-Kokkos.cpp @@ -0,0 +1,220 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "ENERGY.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void ENERGY::runKokkosVariant(VariantID vid) +{ + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + ENERGY_DATA_SETUP; + + // Wrap pointers in Kokkos views + + auto e_new_view = getViewFromPointer(e_new, iend); + auto e_old_view = getViewFromPointer(e_old, iend); + auto delvc_view = getViewFromPointer(delvc, iend); + auto p_new_view = getViewFromPointer(p_new, iend); + auto p_old_view = getViewFromPointer(p_old, iend); + auto q_new_view = getViewFromPointer(q_new, iend); + auto q_old_view = getViewFromPointer(q_old, iend); + auto work_view = getViewFromPointer(work, iend); + auto compHalfStep_view = getViewFromPointer(compHalfStep, iend); + auto pHalfStep_view = getViewFromPointer(pHalfStep, iend); + auto bvc_view = getViewFromPointer(bvc, iend); + auto pbvc_view = getViewFromPointer(pbvc, iend); + auto ql_old_view = getViewFromPointer(ql_old, iend); + auto qq_old_view = getViewFromPointer(qq_old, iend); + auto vnewc_view = getViewFromPointer(vnewc, iend); + + + auto energy_lam1 = [=](Index_type i) { + ENERGY_BODY1; + }; + auto energy_lam2 = [=](Index_type i) { + ENERGY_BODY2; + }; + auto energy_lam3 = [=](Index_type i) { + ENERGY_BODY3; + }; + auto energy_lam4 = [=](Index_type i) { + ENERGY_BODY4; + }; + auto energy_lam5 = [=](Index_type i) { + ENERGY_BODY5; + }; + auto energy_lam6 = [=](Index_type i) { + ENERGY_BODY6; + }; + +#if defined(RUN_KOKKOS) + switch ( vid ) { + + case Kokkos_Lambda : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for("ENERGY - lambda 1", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(const int64_t i){ + //#define ENERGY_BODY1 + e_new_view[i] = e_old_view[i] - 0.5 * delvc_view[i] * \ + (p_old_view[i] + ql_old_view[i]) + 0.5 * work_view[i]; + + }); + + Kokkos::parallel_for("ENERGY - lambda 2", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(const int64_t i){ + //#define ENERGY_BODY2 + if ( delvc_view[i] > 0.0 ) { + q_new_view[i] = 0.0 ; + } \ + else { \ + Real_type vhalf = 1.0 / (1.0 + compHalfStep_view[i]) ; + Real_type ssc = ( pbvc[i] * e_new_view[i] + + vhalf * vhalf * bvc[i] * pHalfStep_view[i] ) / rho0 ; + if ( ssc <= 0.1111111e-36 ) { + ssc = 0.3333333e-18 ; + } else { + ssc = sqrt(ssc) ; + } + q_new_view[i] = (ssc*ql_old_view[i] + qq_old_view[i]) ; + } + }); + + + Kokkos::parallel_for("ENERGY - lambda 3", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(const int64_t i){ + //#define ENERGY_BODY3 + + e_new_view[i] = e_new_view[i] + 0.5 * delvc_view[i] \ + * ( 3.0*(p_old_view[i] + qq_old_view[i]) \ + - 4.0*(pHalfStep_view[i] + q_new_view[i])) ; + + + }); + + + Kokkos::parallel_for("ENERGY - lambda 4", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(const int64_t i){ + //#define ENERGY_BODY4 + + e_new_view[i] += 0.5 * work_view[i]; \ + if ( fabs(e_new_view[i]) < e_cut ) { e_new_view[i] = 0.0 ; } \ + if ( e_new_view[i] < emin ) { e_new_view[i] = emin ; } + + }); + + + Kokkos::parallel_for("ENERGY - lambda 5", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(const int64_t i){ + //#define ENERGY_BODY5 + Real_type q_tilde ; \ + + if (delvc_view[i] > 0.0) { \ + q_tilde = 0. ; \ + } \ + else { \ + Real_type ssc = ( pbvc_view[i] * e_new_view[i] \ + + vnewc_view[i] * vnewc_view[i] * bvc_view[i] * p_new_view[i] ) / rho0 ; \ + if ( ssc <= 0.1111111e-36 ) { \ + ssc = 0.3333333e-18 ; \ + } else { \ + ssc = sqrt(ssc) ; \ + } \ + q_tilde = (ssc*ql_old[i] + qq_old_view[i]) ; \ + } \ + e_new_view[i] = e_new_view[i] - ( 7.0*(p_old_view[i] + q_old_view[i]) \ + - 8.0*(pHalfStep_view[i] + q_new_view[i]) \ + + (p_new_view[i] + q_tilde)) * delvc_view[i] / 6.0 ; \ + if ( fabs(e_new_view[i]) < e_cut ) { \ + e_new_view[i] = 0.0 ; \ + } \ + if ( e_new_view[i] < emin ) { \ + e_new_view[i] = emin ; \ + } + + + }); + + + Kokkos::parallel_for("ENERGY - lambda 6", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(const int64_t i){ + //#define ENERGY_BODY6 + + if ( delvc_view[i] <= 0.0 ) { \ + Real_type ssc = ( pbvc_view[i] * e_new_view[i] \ + + vnewc_view[i] * vnewc_view[i] * bvc_view[i] * p_new_view[i] ) / rho0 ; \ + if ( ssc <= 0.1111111e-36 ) { \ + ssc = 0.3333333e-18 ; \ + } else { \ + ssc = sqrt(ssc) ; \ + } \ + q_new_view[i] = (ssc*ql_old_view[i] + qq_old_view[i]) ; \ + if (fabs(q_new_view[i]) < q_cut) q_new_view[i] = 0.0 ; \ + } + + + }); + + } + stopTimer(); + + break; + } + + + default : { + std::cout << "\n ENERGY : Unknown variant id = " << vid << std::endl; + } + + } + + +#endif // RUN_KOKKOS + + // Move data from Kokkos view on device back to the host + moveDataToHostFromKokkosView(e_new, e_new_view, iend); + moveDataToHostFromKokkosView(e_old, e_old_view, iend); + moveDataToHostFromKokkosView(delvc, delvc_view, iend); + moveDataToHostFromKokkosView(p_new, p_new_view, iend); + moveDataToHostFromKokkosView(p_old, p_old_view, iend); + moveDataToHostFromKokkosView(q_new, q_new_view, iend); + moveDataToHostFromKokkosView(q_old, ql_old_view, iend); + moveDataToHostFromKokkosView(work, work_view, iend); + moveDataToHostFromKokkosView(compHalfStep, compHalfStep_view, iend); + moveDataToHostFromKokkosView(pHalfStep, pHalfStep_view, iend); + moveDataToHostFromKokkosView(bvc, bvc_view, iend); + moveDataToHostFromKokkosView(pbvc, pbvc_view, iend); + moveDataToHostFromKokkosView(ql_old, ql_old_view, iend); + moveDataToHostFromKokkosView(qq_old, qq_old_view, iend); + moveDataToHostFromKokkosView(vnewc, vnewc_view, iend); + +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps-kokkos/FIR-Kokkos.cpp b/src/apps-kokkos/FIR-Kokkos.cpp new file mode 100644 index 000000000..696af71eb --- /dev/null +++ b/src/apps-kokkos/FIR-Kokkos.cpp @@ -0,0 +1,93 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "FIR.hpp" + +#include "RAJA/RAJA.hpp" + +#include +#include + +namespace rajaperf +{ +namespace apps +{ + + +void FIR::runKokkosVariant(VariantID vid) +{ + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize() - m_coefflen; + + FIR_DATA_SETUP; + + // Wrap 4x4 array, "coeff" in a Kokkos::View; + // "coeff" is used in the FIR_BODY + // Real_type coeff[FIR_COEFFLEN]; + // Macro for 4x4 input array + FIR_COEFF; + // "coeff" is assined the memory location containing the value of the 0th element of coeff_array; + Real_ptr coeff = &coeff_array[0]; + + auto coeff_view = getViewFromPointer(coeff, FIR_COEFFLEN); + + auto in_view = getViewFromPointer(in, iend + m_coefflen); + auto out_view = getViewFromPointer(out, iend + m_coefflen); + + auto fir_lam = [=](Index_type i) { + FIR_BODY; + }; + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for("FIR - Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + // #define FIR_BODY + Real_type sum = 0.0; + + for (Index_type j = 0; j < coefflen; ++j ) { + sum += coeff_view[j]*in_view[i+j]; + } + out_view[i] = sum; + }); + + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default : { + std::cout << "\n FIR : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(in, in_view, iend + m_coefflen); + moveDataToHostFromKokkosView(out, out_view, iend + m_coefflen); + + +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps-kokkos/HALOEXCHANGE-Kokkos.cpp b/src/apps-kokkos/HALOEXCHANGE-Kokkos.cpp new file mode 100644 index 000000000..ba1c400ff --- /dev/null +++ b/src/apps-kokkos/HALOEXCHANGE-Kokkos.cpp @@ -0,0 +1,170 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "HALOEXCHANGE.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void HALOEXCHANGE::runKokkosVariant(VariantID vid) +{ + + const Index_type run_reps = getRunReps(); + + // Nota bene: ibegin, iend not defined for this kernel + // Instead: + // Index_type num_neighbors = s_num_neighbors; + // Index_type num_vars = m_num_vars; + // How these variables are set:: + // apps/HALOEXCHANGE.cpp: m_num_vars_default = 3; + // apps/HALOEXCHANGE.hpp: static const int s_num_neighbors = 26; + + // HALOEXCHANGE_DATA_SETUP; + +// Declare and define Kokkos Views +// Preserving the names of the pointer variables to avoid typo errors in the +// Kokkos_Lambda expressions + +std::vector> vars; +std::vector> buffers; +std::vector> pack_index_lists; +std::vector> unpack_index_lists; + +for (auto var: m_vars) { + vars.push_back(getViewFromPointer(var, m_var_size)); +} + +for ( int x = 0; x < m_buffers.size(); ++x ) { + Index_type buffer_len = m_num_vars * m_pack_index_list_lengths[x]; + buffers.push_back(getViewFromPointer(m_buffers[x], buffer_len)); +} + + +for ( int x = 0; x < m_pack_index_lists.size(); ++x ) { + + pack_index_lists.push_back(getViewFromPointer(m_pack_index_lists[x], m_pack_index_list_lengths[x])); +} + + +for ( int x = 0; x < m_unpack_index_lists.size(); ++x ) { + + unpack_index_lists.push_back(getViewFromPointer(m_unpack_index_lists[x], m_unpack_index_list_lengths[x])); +} +auto num_neighbors = s_num_neighbors; +auto num_vars = m_num_vars; + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + // FYI: num_neigbors defined in HALOEXCHANGE.hpp + // num_neighbors is set in HALOEXCHANGE.cpp + for (Index_type l = 0; l < num_neighbors; ++l) { + auto buffer = buffers[l]; + auto list = pack_index_lists[l]; + Index_type len = m_pack_index_list_lengths[l]; + // FYI: num_vars defined in HALOEXCHANGE.hpp + // num_vars is set in HALOEXCHANGE.cpp + for (Index_type v = 0; v < num_vars; ++v) { + auto var = vars[v]; + auto haloexchange_pack_base_lam = KOKKOS_LAMBDA(Index_type i) { + // HALOEXCHANGE_PACK_BODY + // #define HALOEXCHANGE_PACK_BODY \ + // buffer[i] = var[list[i]]; + buffer[i] = var[list[i]]; + }; + +Kokkos::parallel_for("HALOEXCHANGE - Pack Body - Kokkos Lambda", + Kokkos::RangePolicy(0, len), + haloexchange_pack_base_lam); + //buffer += len + + auto end = buffer.extent(0); + decltype(end) begin = len; + buffer = Kokkos::subview(buffer, std::make_pair(begin,end)); + } + } + + for (Index_type l = 0; l < num_neighbors; ++l) { + auto buffer = buffers[l]; + auto list = unpack_index_lists[l]; + Index_type len = m_unpack_index_list_lengths[l]; + for (Index_type v = 0; v < num_vars; ++v) { + auto var = vars[v]; + auto haloexchange_unpack_base_lam = KOKKOS_LAMBDA(Index_type i) { + //#define HALOEXCHANGE_UNPACK_BODY \ + //var[list[i]] = buffer[i]; + var[list[i]] = buffer[i]; + + }; + + Kokkos::parallel_for("HALOEXCHANGE - Unpack Body - Kokkos Lambda", + Kokkos::RangePolicy(0, len), + haloexchange_unpack_base_lam); + //buffer += len; + auto end = buffer.extent(0); + decltype(end) begin = len; + buffer = Kokkos::subview(buffer, std::make_pair(begin,end)); + } + } + + } + Kokkos::fence(); + stopTimer(); + break; + } + + default : { + std::cout << "\n HALOEXCHANGE : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + +for ( int x = 0; x < m_vars.size(); ++x ) { + //RAJAPerf Suite operation: vars.push_back(getViewFromPointer(var, m_var_size)); + moveDataToHostFromKokkosView(m_vars[x], vars[x], m_var_size); +} + +for ( int x = 0; x < m_buffers.size(); ++x ) { + Index_type buffer_len = m_num_vars * m_pack_index_list_lengths[x]; + moveDataToHostFromKokkosView(m_buffers[x], buffers[x], buffer_len); +} + + +for ( int x = 0; x < m_pack_index_lists.size(); ++x ) { + + //RAJAPerf Suite operation: pack_index_lists.push_back(getViewFromPointer(m_pack_index_lists[x], m_pack_index_list_lengths[x])); + moveDataToHostFromKokkosView(m_pack_index_lists[x], pack_index_lists[x], m_pack_index_list_lengths[x]); +} + + +for ( int x = 0; x < m_unpack_index_lists.size(); ++x ) { + + //unpack_index_lists.push_back(getViewFromPointer(m_unpack_index_lists[x], m_unpack_index_list_lengths[x])); + moveDataToHostFromKokkosView(m_unpack_index_lists[x], unpack_index_lists[x], m_unpack_index_list_lengths[x]); +} + +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps-kokkos/LTIMES-Kokkos.cpp b/src/apps-kokkos/LTIMES-Kokkos.cpp new file mode 100644 index 000000000..880b690b0 --- /dev/null +++ b/src/apps-kokkos/LTIMES-Kokkos.cpp @@ -0,0 +1,77 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "LTIMES.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void LTIMES::runKokkosVariant(VariantID vid) +{ + + const Index_type run_reps = getRunReps(); + + LTIMES_DATA_SETUP; + + auto phi = getViewFromPointer(phidat, num_z, num_g, num_m); + auto psi = getViewFromPointer(psidat, num_z, num_g, num_d); + auto ell = getViewFromPointer(elldat, num_m, num_d); + +#if defined (RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + + // Kokkos uses MDRange to model tightly-nested loops + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + Kokkos::parallel_for("LTIMES", + Kokkos::MDRangePolicy>({0,0,0,0},{num_z, num_g, num_m, num_d}), + KOKKOS_LAMBDA(int64_t z, int64_t g, int64_t m, int64_t d) { + // #define LTIMES_BODY_RAJA \ + // phi(z, g, m) += ell(m, d) * psi(z, g, d); + // make view named phi from phi dat + phi(z, g, m) += ell(m, d) * psi(z, g, d); + + +}); + + + + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default : { + std::cout << "\n LTIMES : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(phidat, phi, num_z, num_g, num_m); + moveDataToHostFromKokkosView(psidat, psi, num_z, num_g, num_d); + moveDataToHostFromKokkosView(elldat, ell, num_m, num_d); + +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps-kokkos/LTIMES_NOVIEW-Kokkos.cpp b/src/apps-kokkos/LTIMES_NOVIEW-Kokkos.cpp new file mode 100644 index 000000000..483926287 --- /dev/null +++ b/src/apps-kokkos/LTIMES_NOVIEW-Kokkos.cpp @@ -0,0 +1,89 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "LTIMES_NOVIEW.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void LTIMES_NOVIEW::runKokkosVariant(VariantID vid) +{ + // Nota bene: we put a return statement for build and running purposes; + // A kernel without a Kokkos view is not informative for Kokkos + // performance + return; + const Index_type run_reps = getRunReps(); + + LTIMES_NOVIEW_DATA_SETUP; + + auto ltimesnoview_lam = [=](Index_type d, Index_type z, + Index_type g, Index_type m) { + LTIMES_NOVIEW_BODY; + }; + +#if defined (RUN_KOKKOS) + + switch ( vid ) { + +/* + Future Kokkos Translation here: + case Kokkos_Lambda : { + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<1, RAJA::loop_exec, // z + RAJA::statement::For<2, RAJA::loop_exec, // g + RAJA::statement::For<3, RAJA::loop_exec, // m + RAJA::statement::For<0, RAJA::loop_exec, // d + RAJA::statement::Lambda<0> + > + > + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment(0, num_d), + RAJA::RangeSegment(0, num_z), + RAJA::RangeSegment(0, num_g), + RAJA::RangeSegment(0, num_m)), + ltimesnoview_lam + ); + + } + stopTimer(); + + break; + + } + +*/ + default : { + std::cout << "\n LTIMES_NOVIEW : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + +// Move data from Kokkos View on device back to the host + + +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps-kokkos/PRESSURE-Kokkos.cpp b/src/apps-kokkos/PRESSURE-Kokkos.cpp new file mode 100644 index 000000000..7e2fc32a4 --- /dev/null +++ b/src/apps-kokkos/PRESSURE-Kokkos.cpp @@ -0,0 +1,114 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "PRESSURE.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +void PRESSURE::runKokkosVariant(VariantID vid) +{ + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + PRESSURE_DATA_SETUP; + + // Real_ptr compression = m_compression; \ + // Real_ptr bvc = m_bvc; \ + // Real_ptr p_new = m_p_new; \ + // Real_ptr e_old = m_e_old; \ + // Real_ptr vnewc = m_vnewc; \ + + auto compression_view = getViewFromPointer(compression, iend); + auto bvc_view = getViewFromPointer(bvc, iend); + auto p_new_view = getViewFromPointer(p_new, iend); + auto e_old_view = getViewFromPointer(e_old, iend); + auto vnewc_view = getViewFromPointer(vnewc, iend); + + auto pressure_lam1 = [=](Index_type i) { + PRESSURE_BODY1; + }; + auto pressure_lam2 = [=](Index_type i) { + PRESSURE_BODY2; + }; + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // Christian Trott : Look at Kokkos graphs as an implementation for kernel + // seq_region - create a sequential region + // Intent: two loop bodies will be executed consecutively + // https://raja.readthedocs.io/en/v0.9.0/feature/policies.html?highlight=seq_region#parallel-region-policies + // The sequential region specialization is essentially a pass through operation. + // It is provided so that if you want to turn off OpenMP in your code, + // you can simply replace the region policy type, and you do not have to change your algorithm source code. + + Kokkos::parallel_for("PRESSURE_BODY1 - Kokkos_Lambda", + Kokkos::RangePolicy(ibegin,iend), + KOKKOS_LAMBDA(Index_type i) { + // #define PRESSURE_BODY1 + // bvc[i] = cls * (compression[i] + 1.0); + bvc_view[i] = cls * (compression_view[i] + 1.0); + + }); + + + + Kokkos::parallel_for("PRESSURE_BODY2 - Kokkos_Lambda", + Kokkos::RangePolicy(ibegin,iend), + KOKKOS_LAMBDA(Index_type i) { + // #define PRESSURE_BODY2 + p_new_view[i] = bvc_view[i] * e_old_view[i] ; + if ( fabs(p_new_view[i]) < p_cut ) p_new_view[i] = 0.0 ; + if ( vnewc_view[i] >= eosvmax ) p_new_view[i] = 0.0 ; + if ( p_new_view[i] < pmin ) p_new_view[i] = pmin ; + }); + + + } + Kokkos::fence(); + stopTimer(); + + break; + } + + + default : { + std::cout << "\n PRESSURE : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(compression, compression_view, iend); + moveDataToHostFromKokkosView(bvc, bvc_view, iend); + moveDataToHostFromKokkosView(p_new, p_new_view, iend); + moveDataToHostFromKokkosView(e_old, e_old_view, iend); + moveDataToHostFromKokkosView(vnewc, vnewc_view, iend); + +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps-kokkos/VOL3D-Kokkos.cpp b/src/apps-kokkos/VOL3D-Kokkos.cpp new file mode 100644 index 000000000..bb510bba8 --- /dev/null +++ b/src/apps-kokkos/VOL3D-Kokkos.cpp @@ -0,0 +1,209 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "VOL3D.hpp" + +#include "RAJA/RAJA.hpp" + +#include "AppsData.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +struct arrayOffSetStruct3D { + using ViewType = Kokkos::View; + + // The different v's are offsets of indices in different Kokkos views + ViewType v, v0, v1, v2, v3, v4, v5, v6, v7; + + // Constructor + arrayOffSetStruct3D(const std::string& name, + Index_type num_elements, + Index_type jp, + Index_type kp, + Real_ptr head + ): + // ":" = list of things to initialize + // Initialize v + v (getViewFromPointer(head, num_elements)), + v0(v), + v1(Kokkos::subview(v0, std::make_pair(static_cast(1), v0.extent(0)))), + v2(Kokkos::subview(v0, std::make_pair(static_cast(jp), v0.extent(0)))), + v3(Kokkos::subview(v1, std::make_pair(static_cast(jp), v1.extent(0)))), + v4(Kokkos::subview(v0, std::make_pair(static_cast(kp), v0.extent(0)))), + v5(Kokkos::subview(v1, std::make_pair(static_cast(kp), v1.extent(0)))), + v6(Kokkos::subview(v2, std::make_pair(static_cast(kp), v2.extent(0)))), + v7(Kokkos::subview(v3, std::make_pair(static_cast(kp), v3.extent(0)))) { + } +}; + + +void VOL3D::runKokkosVariant(VariantID vid) +{ + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = m_domain->fpz; + const Index_type iend = m_domain->lpz+1; + + VOL3D_DATA_SETUP; + + NDPTRSET(m_domain->jp, m_domain->kp, x,x0,x1,x2,x3,x4,x5,x6,x7) ; + NDPTRSET(m_domain->jp, m_domain->kp, y,y0,y1,y2,y3,y4,y5,y6,y7) ; + NDPTRSET(m_domain->jp, m_domain->kp, z,z0,z1,z2,z3,z4,z5,z6,z7) ; + + // The 'ibegin, iend' are unclear here: + auto vol_view = getViewFromPointer(vol, m_domain->nnalls); + + arrayOffSetStruct3D x_offsets("x_offsets", m_domain->nnalls, m_domain->jp, m_domain->kp, x); + arrayOffSetStruct3D y_offsets("y_offsets", m_domain->nnalls, m_domain->jp, m_domain->kp, y); + arrayOffSetStruct3D z_offsets("z_offsets", m_domain->nnalls, m_domain->jp, m_domain->kp, z); + + auto& x_view = x_offsets.v; + auto& x0_view = x_offsets.v0; + auto& x1_view = x_offsets.v1; + auto& x2_view = x_offsets.v2; + auto& x3_view = x_offsets.v3; + auto& x4_view = x_offsets.v4; + auto& x5_view = x_offsets.v5; + auto& x6_view = x_offsets.v6; + auto& x7_view = x_offsets.v7; + + auto& y_view = y_offsets.v; + auto& y0_view = y_offsets.v0; + auto& y1_view = y_offsets.v1; + auto& y2_view = y_offsets.v2; + auto& y3_view = y_offsets.v3; + auto& y4_view = y_offsets.v4; + auto& y5_view = y_offsets.v5; + auto& y6_view = y_offsets.v6; + auto& y7_view = y_offsets.v7; + + auto& z_view = z_offsets.v; + auto& z0_view = z_offsets.v0; + auto& z1_view = z_offsets.v1; + auto& z2_view = z_offsets.v2; + auto& z3_view = z_offsets.v3; + auto& z4_view = z_offsets.v4; + auto& z5_view = z_offsets.v5; + auto& z6_view = z_offsets.v6; + auto& z7_view = z_offsets.v7; + + + auto vol3d_lam = [=](Index_type i) { + VOL3D_BODY; + }; + +#if defined(RUN_KOKKOS) + switch ( vid ) { + + case Kokkos_Lambda : { + + startTimer(); + + //auto index_list = getViewFromPointer(m_domain->real_zones, m_domain->n_real_zones); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for( + "VOL3D Kokkos_Lambda", + Kokkos::RangePolicy(ibegin,iend), + KOKKOS_LAMBDA(Index_type i ) { + + // #define VOL3D_BODY + //int i = index_list[ii]; + + Real_type x71 = x7_view[i] - x1_view[i] ; \ + Real_type x72 = x7_view[i] - x2_view[i] ; \ + Real_type x74 = x7_view[i] - x4_view[i] ; \ + Real_type x30 = x3_view[i] - x0_view[i] ; \ + Real_type x50 = x5_view[i] - x0_view[i] ; \ + Real_type x60 = x6_view[i] - x0_view[i] ; \ + + Real_type y71 = y7_view[i] - y1_view[i] ; \ + Real_type y72 = y7_view[i] - y2_view[i] ; \ + Real_type y74 = y7_view[i] - y4_view[i] ; \ + Real_type y30 = y3_view[i] - y0_view[i] ; \ + Real_type y50 = y5_view[i] - y0_view[i] ; \ + Real_type y60 = y6_view[i] - y0_view[i] ; \ + + Real_type z71 = z7_view[i] - z1_view[i] ; \ + Real_type z72 = z7_view[i] - z2_view[i] ; \ + Real_type z74 = z7_view[i] - z4_view[i] ; \ + Real_type z30 = z3_view[i] - z0_view[i] ; \ + Real_type z50 = z5_view[i] - z0_view[i] ; \ + Real_type z60 = z6_view[i] - z0_view[i] ; \ + + Real_type xps = x71 + x60 ; \ + Real_type yps = y71 + y60 ; \ + Real_type zps = z71 + z60 ; \ + + Real_type cyz = y72 * z30 - z72 * y30 ; \ + Real_type czx = z72 * x30 - x72 * z30 ; \ + Real_type cxy = x72 * y30 - y72 * x30 ; \ + vol_view[i] = xps * cyz + yps * czx + zps * cxy ; \ + + xps = x72 + x50 ; \ + yps = y72 + y50 ; \ + zps = z72 + z50 ; \ + + cyz = y74 * z60 - z74 * y60 ; \ + czx = z74 * x60 - x74 * z60 ; \ + cxy = x74 * y60 - y74 * x60 ; \ + vol_view[i] += xps * cyz + yps * czx + zps * cxy ; \ + + xps = x74 + x30 ; \ + yps = y74 + y30 ; \ + zps = z74 + z30 ; \ + + cyz = y74 * z60 - z74 * y60 ; \ + czx = z74 * x60 - x74 * z60 ; \ + cxy = x74 * y60 - y74 * x60 ; \ + vol_view[i] += xps * cyz + yps * czx + zps * cxy ; \ + + xps = x74 + x30 ; \ + yps = y74 + y30 ; \ + zps = z74 + z30 ; \ + + cyz = y71 * z50 - z71 * y50 ; \ + czx = z71 * x50 - x71 * z50 ; \ + cxy = x71 * y50 - y71 * x50 ; \ + vol_view[i] += xps * cyz + yps * czx + zps * cxy ; \ + + vol_view[i] *= vnormq ; + } + ); + + } + stopTimer(); + + break; + } + + default : { + std::cout << "\n VOL3D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(x, x_view, m_domain->nnalls); + moveDataToHostFromKokkosView(y, y_view, m_domain->nnalls); + moveDataToHostFromKokkosView(z, z_view, m_domain->nnalls); + moveDataToHostFromKokkosView(vol, vol_view, m_domain->nnalls); + + +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps-kokkos/WIP-COUPLE.cpp.kokkos.wip b/src/apps-kokkos/WIP-COUPLE.cpp.kokkos.wip new file mode 100644 index 000000000..ed205e08e --- /dev/null +++ b/src/apps-kokkos/WIP-COUPLE.cpp.kokkos.wip @@ -0,0 +1,192 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "WIP-COUPLE.hpp" + +#include "RAJA/RAJA.hpp" + +#include "AppsData.hpp" +#include "common/DataUtils.hpp" + +#include + +namespace rajaperf +{ +namespace apps +{ + + +COUPLE::COUPLE(const RunParams& params) + : KernelBase(rajaperf::Apps_COUPLE, params) +{ + setDefaultSize(64); // See rzmax in ADomain struct + setDefaultReps(60); + + m_domain = new ADomain(getRunSize(), /* ndims = */ 3); + + m_imin = m_domain->imin; + m_imax = m_domain->imax; + m_jmin = m_domain->jmin; + m_jmax = m_domain->jmax; + m_kmin = m_domain->kmin; + m_kmax = m_domain->kmax; +} + +COUPLE::~COUPLE() +{ + delete m_domain; +} + +Index_type COUPLE::getItsPerRep() const +{ + return ( (m_imax - m_imin) * (m_jmax - m_jmin) * (m_kmax - m_kmin) ); +} + +void COUPLE::setUp(VariantID vid) +{ + Index_type max_loop_index = m_domain->lrn; + + allocAndInitData(m_t0, max_loop_index, vid); + allocAndInitData(m_t1, max_loop_index, vid); + allocAndInitData(m_t2, max_loop_index, vid); + allocAndInitData(m_denac, max_loop_index, vid); + allocAndInitData(m_denlw, max_loop_index, vid); + + m_clight = 3.e+10; + m_csound = 3.09e+7; + m_omega0 = 0.9; + m_omegar = 0.9; + m_dt = 0.208; + m_c10 = 0.25 * (m_clight / m_csound); + m_fratio = sqrt(m_omegar / m_omega0); + m_r_fratio = 1.0/m_fratio; + m_c20 = 0.25 * (m_clight / m_csound) * m_r_fratio; + m_ireal = Complex_type(0.0, 1.0); +} + +void COUPLE::runKernel(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + + COUPLE_DATA_SETUP; + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type k = kmin ; k < kmax ; ++k ) { + COUPLE_BODY; + } + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_SEQ) + case RAJA_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(kmin, kmax), [=](Index_type k) { + COUPLE_BODY; + }); + + } + stopTimer(); + + break; + } +#endif + +#if defined(RAJA_ENABLE_OPENMP) && defined(RUN_OPENMP) + case Base_OpenMP : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + #pragma omp parallel for + for (Index_type k = kmin ; k < kmax ; ++k ) { + COUPLE_BODY; + } + + } + stopTimer(); + break; + } + + case RAJA_OpenMP : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::forall( + RAJA::RangeSegment(kmin, kmax), [=](Index_type k) { + COUPLE_BODY; + }); + + } + stopTimer(); + + break; + } +#endif + +#if defined(RAJA_ENABLE_TARGET_OPENMP) && 0 + case Base_OpenMPTarget : + case RAJA_OpenMPTarget : + { + runOpenMPTargetVariant(vid); + break; + } +#endif + +#if defined(RAJA_ENABLE_CUDA) && 0 + case Base_CUDA : + case RAJA_CUDA : + { + runCudaVariant(vid); + break; + } +#endif + + default : { + std::cout << "\n COUPLE : Unknown variant id = " << vid << std::endl; + } + + } +} + +void COUPLE::updateChecksum(VariantID vid) +{ + Index_type max_loop_index = m_domain->lrn; + + checksum[vid] += calcChecksum(m_t0, max_loop_index); + checksum[vid] += calcChecksum(m_t1, max_loop_index); + checksum[vid] += calcChecksum(m_t2, max_loop_index); +} + +void COUPLE::tearDown(VariantID vid) +{ + (void) vid; + + deallocData(m_t0); + deallocData(m_t1); + deallocData(m_t2); + deallocData(m_denac); + deallocData(m_denlw); +} + +} // end namespace apps +} // end namespace rajaperf diff --git a/src/apps/AppsData.hpp b/src/apps/AppsData.hpp index f1616968c..74f52c727 100644 --- a/src/apps/AppsData.hpp +++ b/src/apps/AppsData.hpp @@ -18,7 +18,8 @@ namespace apps // // Some macros used in kernels to mimic real app code style. -// + +// For VOL-3D #define NDPTRSET(jp, kp,v,v0,v1,v2,v3,v4,v5,v6,v7) \ v0 = v ; \ v1 = v0 + 1 ; \ @@ -29,6 +30,7 @@ namespace apps v6 = v2 + kp ; \ v7 = v3 + kp ; +// For DEL_DOT_VEC_2D #define NDSET2D(jp,v,v1,v2,v3,v4) \ v4 = v ; \ v1 = v4 + 1 ; \ diff --git a/src/apps/CMakeLists.txt b/src/apps/CMakeLists.txt index a82bed339..21f7ce5f3 100644 --- a/src/apps/CMakeLists.txt +++ b/src/apps/CMakeLists.txt @@ -75,6 +75,6 @@ blt_add_library( VOL3D-Cuda.cpp VOL3D-OMP.cpp VOL3D-OMPTarget.cpp - WIP-COUPLE.cpp + #WIP-COUPLE.cpp DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} ) diff --git a/src/apps/DEL_DOT_VEC_2D.cpp b/src/apps/DEL_DOT_VEC_2D.cpp index cec0af410..d0cb757d9 100644 --- a/src/apps/DEL_DOT_VEC_2D.cpp +++ b/src/apps/DEL_DOT_VEC_2D.cpp @@ -62,6 +62,8 @@ DEL_DOT_VEC_2D::DEL_DOT_VEC_2D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } DEL_DOT_VEC_2D::~DEL_DOT_VEC_2D() diff --git a/src/apps/DEL_DOT_VEC_2D.hpp b/src/apps/DEL_DOT_VEC_2D.hpp index 1a4d7670b..ec4735d55 100644 --- a/src/apps/DEL_DOT_VEC_2D.hpp +++ b/src/apps/DEL_DOT_VEC_2D.hpp @@ -113,6 +113,7 @@ class DEL_DOT_VEC_2D : public KernelBase void runCudaVariant(VariantID vid); void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); private: Real_ptr m_x; diff --git a/src/apps/ENERGY.cpp b/src/apps/ENERGY.cpp index a6a779f8c..4dba76548 100644 --- a/src/apps/ENERGY.cpp +++ b/src/apps/ENERGY.cpp @@ -62,6 +62,7 @@ ENERGY::ENERGY(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Kokkos_Lambda ); } ENERGY::~ENERGY() diff --git a/src/apps/ENERGY.hpp b/src/apps/ENERGY.hpp index 00a45de1d..2aa286670 100644 --- a/src/apps/ENERGY.hpp +++ b/src/apps/ENERGY.hpp @@ -204,6 +204,8 @@ class ENERGY : public KernelBase void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + private: Real_ptr m_e_new; Real_ptr m_e_old; diff --git a/src/apps/FIR.cpp b/src/apps/FIR.cpp index fe3993cd9..26c969c03 100644 --- a/src/apps/FIR.cpp +++ b/src/apps/FIR.cpp @@ -24,6 +24,9 @@ FIR::FIR(const RunParams& params) setDefaultProblemSize(1000000); setDefaultReps(160); + //setDefaultProblemSize(10); + //setDefaultReps(1); + m_coefflen = FIR_COEFFLEN; setActualProblemSize( getTargetProblemSize() ); @@ -56,6 +59,8 @@ FIR::FIR(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } FIR::~FIR() diff --git a/src/apps/FIR.hpp b/src/apps/FIR.hpp index e9b49edcb..bd494d7b7 100644 --- a/src/apps/FIR.hpp +++ b/src/apps/FIR.hpp @@ -78,6 +78,8 @@ class FIR : public KernelBase void runCudaVariant(VariantID vid); void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + private: Real_ptr m_in; diff --git a/src/apps/HALOEXCHANGE.cpp b/src/apps/HALOEXCHANGE.cpp index db7c7bb90..878373931 100644 --- a/src/apps/HALOEXCHANGE.cpp +++ b/src/apps/HALOEXCHANGE.cpp @@ -98,6 +98,7 @@ HALOEXCHANGE::HALOEXCHANGE(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + setVariantDefined( Kokkos_Lambda ); } HALOEXCHANGE::~HALOEXCHANGE() diff --git a/src/apps/HALOEXCHANGE.hpp b/src/apps/HALOEXCHANGE.hpp index d10bd4790..d69a4d2c5 100644 --- a/src/apps/HALOEXCHANGE.hpp +++ b/src/apps/HALOEXCHANGE.hpp @@ -93,6 +93,8 @@ class HALOEXCHANGE : public KernelBase void runCudaVariant(VariantID vid); void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + private: static const int s_num_neighbors = 26; diff --git a/src/apps/LTIMES.cpp b/src/apps/LTIMES.cpp index c69a2b300..ff3e18fdc 100644 --- a/src/apps/LTIMES.cpp +++ b/src/apps/LTIMES.cpp @@ -77,6 +77,8 @@ LTIMES::LTIMES(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } LTIMES::~LTIMES() diff --git a/src/apps/LTIMES.hpp b/src/apps/LTIMES.hpp index 6177873be..92a31cfc4 100644 --- a/src/apps/LTIMES.hpp +++ b/src/apps/LTIMES.hpp @@ -117,6 +117,8 @@ class LTIMES : public KernelBase void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + private: Real_ptr m_phidat; Real_ptr m_elldat; diff --git a/src/apps/LTIMES_NOVIEW.hpp b/src/apps/LTIMES_NOVIEW.hpp index 24c524ecc..c9f9bd7e5 100644 --- a/src/apps/LTIMES_NOVIEW.hpp +++ b/src/apps/LTIMES_NOVIEW.hpp @@ -67,6 +67,8 @@ class LTIMES_NOVIEW : public KernelBase void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + private: Real_ptr m_phidat; Real_ptr m_elldat; diff --git a/src/apps/PRESSURE.cpp b/src/apps/PRESSURE.cpp index b4ef1d72c..44d141fd8 100644 --- a/src/apps/PRESSURE.cpp +++ b/src/apps/PRESSURE.cpp @@ -52,6 +52,8 @@ PRESSURE::PRESSURE(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } PRESSURE::~PRESSURE() diff --git a/src/apps/PRESSURE.hpp b/src/apps/PRESSURE.hpp index 44c6602fa..b170a7715 100644 --- a/src/apps/PRESSURE.hpp +++ b/src/apps/PRESSURE.hpp @@ -73,6 +73,8 @@ class PRESSURE : public KernelBase void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + private: Real_ptr m_compression; Real_ptr m_bvc; diff --git a/src/apps/VOL3D.cpp b/src/apps/VOL3D.cpp index a8ac3bbc6..9218611bb 100644 --- a/src/apps/VOL3D.cpp +++ b/src/apps/VOL3D.cpp @@ -64,6 +64,8 @@ VOL3D::VOL3D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } VOL3D::~VOL3D() diff --git a/src/apps/VOL3D.hpp b/src/apps/VOL3D.hpp index 6faf02523..b7f6287d9 100644 --- a/src/apps/VOL3D.hpp +++ b/src/apps/VOL3D.hpp @@ -170,6 +170,8 @@ class VOL3D : public KernelBase void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + private: Real_ptr m_x; Real_ptr m_y; diff --git a/src/apps/WIP-COUPLE.cpp b/src/apps/WIP-COUPLE.cpp index 51bb4fa2f..e536c07a1 100644 --- a/src/apps/WIP-COUPLE.cpp +++ b/src/apps/WIP-COUPLE.cpp @@ -20,15 +20,14 @@ namespace rajaperf namespace apps { - COUPLE::COUPLE(const RunParams& params) : KernelBase(rajaperf::Apps_COUPLE, params) { - setDefaultProblemSize(100*100*100); // See rzmax in ADomain struct - setDefaultReps(50); + + setDefaultSize(64); // See rzmax in ADomain struct + setDefaultReps(60); - Index_type rzmax = std::cbrt(getTargetProblemSize())+1; - m_domain = new ADomain(rzmax, /* ndims = */ 3); + m_domain = new ADomain(getRunSize(), /* ndims = */ 3); m_imin = m_domain->imin; m_imax = m_domain->imax; @@ -36,28 +35,19 @@ COUPLE::COUPLE(const RunParams& params) m_jmax = m_domain->jmax; m_kmin = m_domain->kmin; m_kmax = m_domain->kmax; - - setActualProblemSize( m_domain->n_real_zones ); - - setItsPerRep( getActualProblemSize() ); - setKernelsPerRep(1); - setBytesPerRep( (3*sizeof(Complex_type) + 5*sizeof(Complex_type)) * m_domain->n_real_zones ); - setFLOPsPerRep(0); - - setUsesFeature(Forall); - - setVariantDefined( Base_Seq ); - setVariantDefined( RAJA_Seq ); - - setVariantDefined( Base_OpenMP ); - setVariantDefined( RAJA_OpenMP ); } -COUPLE::~COUPLE() + +COUPLE::~COUPLE() { delete m_domain; } +Index_type COUPLE::getItsPerRep() const +{ + return ( (m_imax - m_imin) * (m_jmax - m_jmin) * (m_kmax - m_kmin) ); +} + void COUPLE::setUp(VariantID vid) { Index_type max_loop_index = m_domain->lrn; @@ -77,7 +67,7 @@ void COUPLE::setUp(VariantID vid) m_fratio = sqrt(m_omegar / m_omega0); m_r_fratio = 1.0/m_fratio; m_c20 = 0.25 * (m_clight / m_csound) * m_r_fratio; - m_ireal = Complex_type(0.0, 1.0); + m_ireal = Complex_type(0.0, 1.0); } void COUPLE::runKernel(VariantID vid) @@ -101,7 +91,7 @@ void COUPLE::runKernel(VariantID vid) stopTimer(); break; - } + } #if defined(RUN_RAJA_SEQ) case RAJA_Seq : { @@ -112,10 +102,10 @@ void COUPLE::runKernel(VariantID vid) RAJA::forall( RAJA::RangeSegment(kmin, kmax), [=](Index_type k) { COUPLE_BODY; - }); + }); } - stopTimer(); + stopTimer(); break; } @@ -127,7 +117,7 @@ void COUPLE::runKernel(VariantID vid) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - #pragma omp parallel for + #pragma omp parallel for for (Index_type k = kmin ; k < kmax ; ++k ) { COUPLE_BODY; } @@ -145,10 +135,10 @@ void COUPLE::runKernel(VariantID vid) RAJA::forall( RAJA::RangeSegment(kmin, kmax), [=](Index_type k) { COUPLE_BODY; - }); + }); } - stopTimer(); + stopTimer(); break; } @@ -191,7 +181,7 @@ void COUPLE::updateChecksum(VariantID vid) void COUPLE::tearDown(VariantID vid) { (void) vid; - + deallocData(m_t0); deallocData(m_t1); deallocData(m_t2); diff --git a/src/apps/WIP-COUPLE.hpp b/src/apps/WIP-COUPLE.hpp index e5040ea57..f22b9a60f 100644 --- a/src/apps/WIP-COUPLE.hpp +++ b/src/apps/WIP-COUPLE.hpp @@ -171,6 +171,9 @@ class COUPLE : public KernelBase void runCudaVariant(VariantID vid) {(void) vid;} void runHipVariant(VariantID vid) {(void) vid;} void runOpenMPTargetVariant(VariantID vid) {(void) vid;} + //void runKokkosVariant(VariantID vid); + + void runKokkosVariant(VariantID vid) {(void) vid;} private: Complex_ptr m_t0; diff --git a/src/basic-kokkos/CMakeLists.txt b/src/basic-kokkos/CMakeLists.txt new file mode 100644 index 000000000..c859747c2 --- /dev/null +++ b/src/basic-kokkos/CMakeLists.txt @@ -0,0 +1,33 @@ +############################################################################### +# Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +# and RAJA Performance Suite project contributors. +# See the RAJAPerf/COPYRIGHT file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/../basic) + +blt_add_library( + NAME basic-kokkos + SOURCES + PI_ATOMIC-Kokkos.cpp + DAXPY-Kokkos.cpp + IF_QUAD-Kokkos.cpp + INIT3-Kokkos.cpp + INIT_VIEW1D-Kokkos.cpp + INIT_VIEW1D_OFFSET-Kokkos.cpp + MULADDSUB-Kokkos.cpp + NESTED_INIT-Kokkos.cpp + REDUCE3_INT-Kokkos.cpp + TRAP_INT-Kokkos.cpp + DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} + ) + +# Diagnostics +message (STATUS "${RAJA_PERFSUITE_DEPENDS}") + +blt_print_target_properties(TARGET RAJA) + +get_source_file_property(blah ATOMIC_PI-Kokkos.cpp HIP_SOURCE_PROPERTY_FORMAT) +message (STATUS "DOGS1 - ${blah}") diff --git a/src/basic-kokkos/DAXPY-Kokkos.cpp b/src/basic-kokkos/DAXPY-Kokkos.cpp new file mode 100644 index 000000000..8883d0da6 --- /dev/null +++ b/src/basic-kokkos/DAXPY-Kokkos.cpp @@ -0,0 +1,98 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DAXPY.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + +struct DaxpyFunctor { + Real_ptr x; + Real_ptr y; + Real_type a; + DaxpyFunctor(Real_ptr m_x, Real_ptr m_y, Real_type m_a) : DAXPY_FUNCTOR_CONSTRUCT { } + void operator()(Index_type i) const { DAXPY_BODY; } +}; + +void DAXPY::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + DAXPY_DATA_SETUP; + + // Declare KokkosViews for the pointers that will be wrapped. + // Get pointer names in the KERNEL_NAME.hpp file + // Wrap pointers x and y in separate KokkosViews + // This is a one dimension array + // One dimensional arrays are indexed to iend (RAJAPerfSuite convention) + // New template-based machinery in /rajaperf/src/common/RAJAPerfSuite.hpp + + auto x_view = getViewFromPointer(x, iend); + + auto y_view = getViewFromPointer(y, iend); + + + + auto daxpy_lam = [=](Index_type i) { + DAXPY_BODY; + }; + + + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + Kokkos::parallel_for("DAXPY-Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + // Increment y_view (pointer wrapped in KokksView) + // by product of a and ith entry of x_view + // DAXPY_BODY substituted with the + // calculation defined in DAXPY.hpp + KOKKOS_LAMBDA(Index_type i) { y_view[i] += a * x_view[i];} + ); + } + // Kokkos fence + Kokkos::fence(); + + stopTimer(); + + break; + } + default : { + std::cout << "\n DAXPY : Unknown variant id = " << vid << std::endl; + } + + } + + // Move data (i.e., pointer, KokkosView-wrapped ponter) back to the host from the device + + moveDataToHostFromKokkosView(x, x_view, iend); + + moveDataToHostFromKokkosView(y, y_view, iend); + +#endif // RUN_KOKKOS +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic-kokkos/IF_QUAD-Kokkos.cpp b/src/basic-kokkos/IF_QUAD-Kokkos.cpp new file mode 100644 index 000000000..39f2f6dc2 --- /dev/null +++ b/src/basic-kokkos/IF_QUAD-Kokkos.cpp @@ -0,0 +1,99 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "IF_QUAD.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + +void IF_QUAD::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + IF_QUAD_DATA_SETUP; + + // Instantiating views using getViewFromPointer for the IF_QUAD definition + + auto a_view = getViewFromPointer(a, iend); + auto b_view = getViewFromPointer(b, iend); + auto c_view = getViewFromPointer(c, iend); + auto x1_view = getViewFromPointer(x1, iend); + auto x2_view = getViewFromPointer(x2, iend); + + + auto ifquad_lam = [=](Index_type i) { + IF_QUAD_BODY; + }; + + + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for("IF_QUAD_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA (Index_type i) { + + Real_type s = b_view[i]*b_view[i] - 4.0*a_view[i]*c_view[i]; + if ( s >= 0 ) { + s = sqrt(s); + x2_view[i] = (-b_view[i]+s)/(2.0*a_view[i]); + x1_view[i] = (-b_view[i]-s)/(2.0*a_view[i]); + } + else { + x2_view[i] = 0.0; + x1_view[i] = 0.0; + + } +}); + + } + + Kokkos::fence(); + stopTimer(); + + break; + + } + + default : { + std::cout << "\n IF_QUAD : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(a, a_view, iend); + moveDataToHostFromKokkosView(b, b_view, iend); + moveDataToHostFromKokkosView(c, c_view, iend); + moveDataToHostFromKokkosView(x1, x1_view, iend); + moveDataToHostFromKokkosView(x2, x2_view, iend); + + +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic-kokkos/INIT3-Kokkos.cpp b/src/basic-kokkos/INIT3-Kokkos.cpp new file mode 100644 index 000000000..1ebaf83cc --- /dev/null +++ b/src/basic-kokkos/INIT3-Kokkos.cpp @@ -0,0 +1,90 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INIT3.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void INIT3::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + INIT3_DATA_SETUP; + + // Instantiating Views using getViewFromPointer for the INIT3 definition + // (i.e., INIT3.hpp) + + // The pointer is the first argument, and the last index, denoted by iend, is + // your second argument + // + auto out1_view = getViewFromPointer(out1, iend); + auto out2_view = getViewFromPointer(out2, iend); + auto out3_view = getViewFromPointer(out3, iend); + auto in1_view = getViewFromPointer(in1, iend); + auto in2_view = getViewFromPointer(in2, iend); + + // Next step, integrate the INIT3_BODY into the Kokkos parallel expression + + auto init3_lam = [=](Index_type i) { + INIT3_BODY; + }; + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + +// Nota bene -- Conversion of Raja code begins here + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // Kokkos translation of INIT3_BODY + Kokkos::parallel_for("INIT3-Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + //INIT3_BODY definition: + // out1[i] = out2[i] = out3[i] = - in1[i] - in2[i] ; + out1_view[i] = out2_view[i] = out3_view[i] = - in1_view[i] - in2_view[i]; + }); + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default : { + std::cout << "\n INIT3 : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(out1, out1_view, iend); + moveDataToHostFromKokkosView(out2, out2_view, iend); + moveDataToHostFromKokkosView(out3, out3_view, iend); + moveDataToHostFromKokkosView(in1, in1_view, iend); + moveDataToHostFromKokkosView(in2, in2_view, iend); + +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp new file mode 100644 index 000000000..95702570e --- /dev/null +++ b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp @@ -0,0 +1,74 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INIT_VIEW1D.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void INIT_VIEW1D::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + INIT_VIEW1D_DATA_SETUP; + + // Declare a Kokkos View that will be used to wrap a pointer + auto a_view = getViewFromPointer(a, iend); + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for("INIT_VIEW1D_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin,iend), + KOKKOS_LAMBDA (Index_type i) { + //INIT_VIEW1D_BODY_RAJA + //Instead, use the INIT_VIEW1D_BODY definition + //with Kokkos View + //a[i] = (i+1) * v; + a_view[i] = (i + 1) * v; + + }); + + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default : { + std::cout << "\n INIT_VIEW1D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(a, a_view, iend); + +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp new file mode 100644 index 000000000..bc2d9d955 --- /dev/null +++ b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp @@ -0,0 +1,74 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INIT_VIEW1D_OFFSET.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void INIT_VIEW1D_OFFSET::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 1; + const Index_type iend = getActualProblemSize()+1; + + INIT_VIEW1D_OFFSET_DATA_SETUP; + + auto a_view = getViewFromPointer(a, iend); + + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for("INIT_VIEW1D_OFFSET_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA (Index_type i) { + //INIT_VIEW1D_OFFSET_BODY_RAJA + //Instead, use the INIT_VIEW1D_OFFSET_BODY + //definition: + //a[i-ibegin] = i * v; + a_view[i-ibegin] = i * v; + }); + + + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default : { + std::cout << "\n INIT_VIEW1D_OFFSET : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + // Move data from Kokkos View (on Device) back to Host + moveDataToHostFromKokkosView(a, a_view, iend); + +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic-kokkos/MULADDSUB-Kokkos.cpp b/src/basic-kokkos/MULADDSUB-Kokkos.cpp new file mode 100644 index 000000000..0caad2748 --- /dev/null +++ b/src/basic-kokkos/MULADDSUB-Kokkos.cpp @@ -0,0 +1,90 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MULADDSUB.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void MULADDSUB::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MULADDSUB_DATA_SETUP; + + + // Define Kokkos Views that will wrap pointers defined in MULADDSUB.hpp + auto out1_view = getViewFromPointer(out1, iend); + auto out2_view = getViewFromPointer(out2, iend); + auto out3_view = getViewFromPointer(out3, iend); + auto in1_view = getViewFromPointer(in1, iend); + auto in2_view = getViewFromPointer(in2, iend); + + auto mas_lam = [=](Index_type i) { + MULADDSUB_BODY; + }; + + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // If SIMD really matters , consider using Kokkos SIMD + Kokkos::parallel_for("MULTISUB-KokkosSeq Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + //MULADDSUB_BODY definition: + //out1[i] = in1[i] * in2[i] ; + //out2[i] = in1[i] + in2[i] ; + //out3[i] = in1[i] - in2[i] ; + // WITH KOKKOS VIEWS + out1_view[i] = in1_view[i] * in2_view[i] ; + out2_view[i] = in1_view[i] + in2_view[i] ; + out3_view[i] = in1_view[i] - in2_view[i] ; + }); + + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default : { + std::cout << "\n MULADDSUB : Unknown variant id = " << vid << std::endl; + } + + } +#endif // RUN_KOKKOS + moveDataToHostFromKokkosView(out1, out1_view, iend); + moveDataToHostFromKokkosView(out2, out2_view, iend); + moveDataToHostFromKokkosView(out3, out3_view, iend); + moveDataToHostFromKokkosView(out3, out3_view, iend); + moveDataToHostFromKokkosView(in1, in1_view, iend); + moveDataToHostFromKokkosView(in2, in2_view, iend); + +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic-kokkos/NESTED_INIT-Kokkos.cpp b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp new file mode 100644 index 000000000..6f58e34c4 --- /dev/null +++ b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp @@ -0,0 +1,79 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "NESTED_INIT.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf { +namespace basic { + + +void NESTED_INIT::runKokkosVariant(VariantID vid) { + const Index_type run_reps = getRunReps(); + + NESTED_INIT_DATA_SETUP; + + // Wrap the nested init array pointer in a Kokkos View + // In a Kokkos View, array arguments for array boundaries go from outmost + // to innermost dimension sizes + // See the basic NESTED_INIT.hpp file for defnition of NESTED_INIT + + auto array_kokkos_view = getViewFromPointer(array, nk, nj, ni); + + auto nestedinit_lam = [=](Index_type i, Index_type j, Index_type k) { + NESTED_INIT_BODY; + }; + +#if defined RUN_KOKKOS + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // MDRange can be optimized + Kokkos::parallel_for("NESTED_INIT KokkosSeq", + // Range policy to define amount of work to be done + Kokkos::MDRangePolicy, + // Execution space + Kokkos::DefaultExecutionSpace>({0, 0, 0}, {nk, nj, ni}), + // Loop body + KOKKOS_LAMBDA(Index_type k, Index_type j, Index_type i) { + // #define NESTED_INIT_BODY + // array[i+ni*(j+nj*k)] = 0.00000001 * i * j * k ; + array_kokkos_view(k, j, i) = 0.00000001 * i * j * k; + }); + } + + Kokkos::fence(); + + stopTimer(); + // Moves mirror data from GPU to CPU (void, i.e., no return type). In + // this moving of data back to Host, the layout is changed back to Layout + // Right, vs. the LayoutLeft of the GPU + moveDataToHostFromKokkosView(array, array_kokkos_view, nk, nj, ni); + + break; + } + + default: { + std::cout << "\n NESTED_INIT : Unknown variant id = " << vid << std::endl; + } + } +#endif // RUN_KOKKOS +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp new file mode 100644 index 000000000..51a819951 --- /dev/null +++ b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp @@ -0,0 +1,77 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "PI_ATOMIC.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf { +namespace basic { + +void PI_ATOMIC::runKokkosVariant(VariantID vid) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + PI_ATOMIC_DATA_SETUP; + + // Declare Kokkos View that will wrap the pointer defined in PI_ATOMIC.hpp + auto pi_view = getViewFromPointer(pi, 1); + +#if defined(RUN_KOKKOS) + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // Initializing a value, pi, on the host + *pi = m_pi_init; + + pi_view = getViewFromPointer(pi, 1); + + Kokkos::parallel_for( + "PI_ATOMIC-Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + double x = (double(i) + 0.5) * dx; + // Make a reference to the 0th element of a 1D view with one + // element + // Atomic operation is an uninterruptable, single operation; e.g., + // addition, multiplication, division, etc. All of these atomic + // operations are architecture dependent. Atomics are advantageous + // from a correctness point of view + Kokkos::atomic_add(&pi_view(0), dx / (1.0 + x * x)); + }); + // Moving the data on the device (held in the KokkosView) BACK to the + // pointer, pi. + moveDataToHostFromKokkosView(pi, pi_view, 1); + *pi *= 4.0; + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; + } + } +#endif // RUN_KOKKOS + +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp new file mode 100644 index 000000000..4f340a919 --- /dev/null +++ b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp @@ -0,0 +1,151 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "REDUCE3_INT.hpp" + +#include "RAJA/RAJA.hpp" + +#include +#include + +namespace rajaperf +{ +namespace basic +{ + + +void REDUCE3_INT::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + REDUCE3_INT_DATA_SETUP; + + //Declare KokkosView that will wrap the pointer to a vector + + auto vec_view = getViewFromPointer(vec, iend); + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Int_type vsum = m_vsum_init; + Int_type vmin = m_vmin_init; + Int_type vmax = m_vmax_init; + + for (Index_type i = ibegin; i < iend; ++i ) { + REDUCE3_INT_BODY; + } + + m_vsum += vsum; + m_vmin = RAJA_MIN(m_vmin, vmin); + m_vmax = RAJA_MAX(m_vmax, vmax); + + } + stopTimer(); + + break; + } + + case Lambda_Seq : { + + auto init3_base_lam = [=](Index_type i) -> Int_type { + return vec[i]; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Int_type vsum = m_vsum_init; + Int_type vmin = m_vmin_init; + Int_type vmax = m_vmax_init; + + for (Index_type i = ibegin; i < iend; ++i ) { + vsum += init3_base_lam(i); + vmin = RAJA_MIN(vmin, init3_base_lam(i)); + vmax = RAJA_MAX(vmax, init3_base_lam(i)); + } + + m_vsum += vsum; + m_vmin = RAJA_MIN(m_vmin, vmin); + m_vmax = RAJA_MAX(m_vmax, vmax); + + } + stopTimer(); + + break; + } + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +/* + RAJA::ReduceSum vsum(m_vsum_init); + RAJA::ReduceMin vmin(m_vmin_init); + RAJA::ReduceMax vmax(m_vmax_init); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { + REDUCE3_INT_BODY_RAJA; + }); + + m_vsum += static_cast(vsum.get()); + m_vmin = RAJA_MIN(m_vmin, static_cast(vmin.get())); + m_vmax = RAJA_MAX(m_vmax, static_cast(vmax.get())); +*/ + // These values are initilized elsewhere by RPS + // These variables were declared to Kokkos-ify the parallel_reduce + // construct: +#ifndef RAJA_ENABLE_TARGET_OPENMP + Int_type max_value = m_vmax_init; + Int_type min_value = m_vmin_init; + Int_type sum = m_vsum_init; + + + parallel_reduce("REDUCE3-Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(const int64_t i, Int_type& tl_max, Int_type& tl_min, Int_type& tl_sum){ + Int_type vec_i = vec_view[i]; + if (vec_i > tl_max) tl_max = vec_i; + if (vec_i < tl_min) tl_min = vec_i; + tl_sum += vec_i; + }, + Kokkos::Max(max_value), + Kokkos::Min(min_value), + sum); + m_vsum += static_cast(sum); + m_vmin = RAJA_MIN(m_vmin, static_cast(min_value)); + m_vmax = RAJA_MAX(m_vmax, static_cast(max_value)); +#endif + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default : { + std::cout << "\n REDUCE3_INT : Unknown variant id = " << vid << std::endl; + } + + } +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(vec, vec_view, iend); +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic-kokkos/TRAP_INT-Kokkos.cpp b/src/basic-kokkos/TRAP_INT-Kokkos.cpp new file mode 100644 index 000000000..45e822015 --- /dev/null +++ b/src/basic-kokkos/TRAP_INT-Kokkos.cpp @@ -0,0 +1,143 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "TRAP_INT.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + +// +// Function used in TRAP_INT loop. +// +RAJA_INLINE +// +KOKKOS_FUNCTION +Real_type trap_int_func(Real_type x, + Real_type y, + Real_type xp, + Real_type yp) +{ + Real_type denom = (x - xp)*(x - xp) + (y - yp)*(y - yp); + denom = 1.0/sqrt(denom); + return denom; +} + + +void TRAP_INT::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + TRAP_INT_DATA_SETUP; + +// Declare KokkosViews that will wrap a pointer - not relevant in this case +// ...? + + + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type sumx = m_sumx_init; + + for (Index_type i = ibegin; i < iend; ++i ) { + TRAP_INT_BODY; + } + + m_sumx += sumx * h; + + } + stopTimer(); + + break; + } + + case Lambda_Seq : { + + auto trapint_base_lam = [=](Index_type i) -> Real_type { + Real_type x = x0 + i*h; + return trap_int_func(x, y, xp, yp); + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type sumx = m_sumx_init; + + for (Index_type i = ibegin; i < iend; ++i ) { + sumx += trapint_base_lam(i); + } + + m_sumx += sumx * h; + + } + stopTimer(); + + break; + } + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +// RAJA::ReduceSum sumx(m_sumx_init); + +// RAJA::forall( +// RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { +// TRAP_INT_BODY; +// +// Begin Kokkos translation +// A RAJA reduce translates into a Kokkoss::parallel_reduce +// To perform the translation: + // Declare and initialize variables + // To perform a reduction, you need: 1) an initial value; 2) iterate + // over an iterable; 3) to be able to extract the result at the end of + // the reduction (in this case, trap_integral_val) + + Real_type trap_integral_val = m_sumx_init; + + Kokkos::parallel_reduce("TRAP_INT_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(const int64_t i, Real_type& sumx) {TRAP_INT_BODY}, + trap_integral_val + ); + + m_sumx += static_cast(trap_integral_val) * h; + + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default : { + std::cout << "\n TRAP_INT : Unknown variant id = " << vid << std::endl; + } + + } +#endif //RUN_KOKKOS +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/DAXPY-OMPTarget.cpp b/src/basic/DAXPY-OMPTarget.cpp index 286003a5d..666307b54 100644 --- a/src/basic/DAXPY-OMPTarget.cpp +++ b/src/basic/DAXPY-OMPTarget.cpp @@ -72,10 +72,10 @@ void DAXPY::runOpenMPTargetVariant(VariantID vid) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::forall>( - RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { - DAXPY_BODY; - }); + //RAJA::forall>( + // RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { + // DAXPY_BODY; + //}); } stopTimer(); diff --git a/src/basic/DAXPY-Seq.cpp b/src/basic/DAXPY-Seq.cpp index 325297cd5..401980032 100644 --- a/src/basic/DAXPY-Seq.cpp +++ b/src/basic/DAXPY-Seq.cpp @@ -17,6 +17,13 @@ namespace rajaperf namespace basic { +struct DaxpyFunctor { + Real_ptr y; + Real_ptr x; + Real_type a; + DaxpyFunctor(Real_ptr m_x, Real_ptr m_y, Real_type m_a) { DAXPY_DATA_SETUP; } + void operator()(Index_type i) const { DAXPY_BODY; } +}; void DAXPY::runSeqVariant(VariantID vid) { diff --git a/src/basic/DAXPY.cpp b/src/basic/DAXPY.cpp index 16782df2a..6019610a1 100644 --- a/src/basic/DAXPY.cpp +++ b/src/basic/DAXPY.cpp @@ -51,6 +51,13 @@ DAXPY::DAXPY(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + + + + + } DAXPY::~DAXPY() diff --git a/src/basic/DAXPY.hpp b/src/basic/DAXPY.hpp index 9f0688d8a..ae8eef2fa 100644 --- a/src/basic/DAXPY.hpp +++ b/src/basic/DAXPY.hpp @@ -22,6 +22,11 @@ Real_ptr y = m_y; \ Real_type a = m_a; +#define DAXPY_FUNCTOR_CONSTRUCT \ + x(m_x),\ + y(m_y), \ + a(m_a) + #define DAXPY_BODY \ y[i] += a * x[i] ; @@ -53,6 +58,10 @@ class DAXPY : public KernelBase void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + + + private: Real_ptr m_x; Real_ptr m_y; diff --git a/src/basic/IF_QUAD-OMPTarget.cpp b/src/basic/IF_QUAD-OMPTarget.cpp index 0a16fccc8..d4014e057 100644 --- a/src/basic/IF_QUAD-OMPTarget.cpp +++ b/src/basic/IF_QUAD-OMPTarget.cpp @@ -78,10 +78,10 @@ void IF_QUAD::runOpenMPTargetVariant(VariantID vid) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::forall>( - RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { - IF_QUAD_BODY; - }); + //RAJA::forall>( + // RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { + // IF_QUAD_BODY; + //}); } stopTimer(); diff --git a/src/basic/IF_QUAD.cpp b/src/basic/IF_QUAD.cpp index 2baff8244..ed84c5bd2 100644 --- a/src/basic/IF_QUAD.cpp +++ b/src/basic/IF_QUAD.cpp @@ -37,6 +37,10 @@ IF_QUAD::IF_QUAD(const RunParams& params) setUsesFeature(Forall); + + setVariantDefined( Kokkos_Lambda ); + + setVariantDefined( Base_Seq ); setVariantDefined( Lambda_Seq ); setVariantDefined( RAJA_Seq ); diff --git a/src/basic/IF_QUAD.hpp b/src/basic/IF_QUAD.hpp index dad204ce3..d2524e935 100644 --- a/src/basic/IF_QUAD.hpp +++ b/src/basic/IF_QUAD.hpp @@ -70,6 +70,11 @@ class IF_QUAD : public KernelBase void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + + + + private: Real_ptr m_a; Real_ptr m_b; diff --git a/src/basic/INIT3.cpp b/src/basic/INIT3.cpp index cb3c14132..6b8eb6b55 100644 --- a/src/basic/INIT3.cpp +++ b/src/basic/INIT3.cpp @@ -36,11 +36,13 @@ INIT3::INIT3(const RunParams& params) setVariantDefined( Base_Seq ); setVariantDefined( Lambda_Seq ); setVariantDefined( RAJA_Seq ); + setVariantDefined( Kokkos_Lambda ); setVariantDefined( Base_OpenMP ); setVariantDefined( Lambda_OpenMP ); setVariantDefined( RAJA_OpenMP ); + setVariantDefined( Base_OpenMPTarget ); setVariantDefined( RAJA_OpenMPTarget ); @@ -48,6 +50,7 @@ INIT3::INIT3(const RunParams& params) setVariantDefined( Lambda_CUDA ); setVariantDefined( RAJA_CUDA ); + setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); diff --git a/src/basic/INIT3.hpp b/src/basic/INIT3.hpp index 9d9de78da..31e8dcb6f 100644 --- a/src/basic/INIT3.hpp +++ b/src/basic/INIT3.hpp @@ -56,6 +56,11 @@ class INIT3 : public KernelBase void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + + + + private: Real_ptr m_out1; Real_ptr m_out2; diff --git a/src/basic/INIT_VIEW1D.cpp b/src/basic/INIT_VIEW1D.cpp index bad47eae8..434c3775e 100644 --- a/src/basic/INIT_VIEW1D.cpp +++ b/src/basic/INIT_VIEW1D.cpp @@ -52,6 +52,11 @@ INIT_VIEW1D::INIT_VIEW1D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + + + } INIT_VIEW1D::~INIT_VIEW1D() diff --git a/src/basic/INIT_VIEW1D.hpp b/src/basic/INIT_VIEW1D.hpp index b215439dc..fbbe7e737 100644 --- a/src/basic/INIT_VIEW1D.hpp +++ b/src/basic/INIT_VIEW1D.hpp @@ -66,7 +66,10 @@ class INIT_VIEW1D : public KernelBase void runCudaVariant(VariantID vid); void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); - + void runKokkosVariant(VariantID vid); + + + private: Real_ptr m_a; Real_type m_val; diff --git a/src/basic/INIT_VIEW1D_OFFSET-OMPTarget.cpp b/src/basic/INIT_VIEW1D_OFFSET-OMPTarget.cpp index e419e7fca..7462741e5 100644 --- a/src/basic/INIT_VIEW1D_OFFSET-OMPTarget.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET-OMPTarget.cpp @@ -72,10 +72,10 @@ void INIT_VIEW1D_OFFSET::runOpenMPTargetVariant(VariantID vid) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::forall>( - RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { - INIT_VIEW1D_OFFSET_BODY_RAJA; - }); + //RAJA::forall>( + // RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { + // INIT_VIEW1D_OFFSET_BODY_RAJA; + //}); } stopTimer(); diff --git a/src/basic/INIT_VIEW1D_OFFSET.cpp b/src/basic/INIT_VIEW1D_OFFSET.cpp index 06519f61b..67b95f9a6 100644 --- a/src/basic/INIT_VIEW1D_OFFSET.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET.cpp @@ -52,6 +52,11 @@ INIT_VIEW1D_OFFSET::INIT_VIEW1D_OFFSET(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + + + } INIT_VIEW1D_OFFSET::~INIT_VIEW1D_OFFSET() diff --git a/src/basic/INIT_VIEW1D_OFFSET.hpp b/src/basic/INIT_VIEW1D_OFFSET.hpp index 333139909..660e17ddc 100644 --- a/src/basic/INIT_VIEW1D_OFFSET.hpp +++ b/src/basic/INIT_VIEW1D_OFFSET.hpp @@ -65,7 +65,10 @@ class INIT_VIEW1D_OFFSET : public KernelBase void runCudaVariant(VariantID vid); void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); - + void runKokkosVariant(VariantID vid); + + + private: Real_ptr m_a; Real_type m_val; diff --git a/src/basic/MULADDSUB-OMPTarget.cpp b/src/basic/MULADDSUB-OMPTarget.cpp index 064628d61..5b6bb7b2c 100644 --- a/src/basic/MULADDSUB-OMPTarget.cpp +++ b/src/basic/MULADDSUB-OMPTarget.cpp @@ -80,10 +80,10 @@ void MULADDSUB::runOpenMPTargetVariant(VariantID vid) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::forall>( - RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { - MULADDSUB_BODY; - }); + //RAJA::forall>( + // RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { + // MULADDSUB_BODY; + //}); } stopTimer(); diff --git a/src/basic/MULADDSUB.cpp b/src/basic/MULADDSUB.cpp index baa201dc1..d96d5d0c5 100644 --- a/src/basic/MULADDSUB.cpp +++ b/src/basic/MULADDSUB.cpp @@ -51,6 +51,12 @@ MULADDSUB::MULADDSUB(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + + + + } MULADDSUB::~MULADDSUB() diff --git a/src/basic/MULADDSUB.hpp b/src/basic/MULADDSUB.hpp index afb0a5f38..7365f924c 100644 --- a/src/basic/MULADDSUB.hpp +++ b/src/basic/MULADDSUB.hpp @@ -58,7 +58,10 @@ class MULADDSUB : public KernelBase void runCudaVariant(VariantID vid); void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); - + void runKokkosVariant(VariantID vid); + + + private: Real_ptr m_out1; Real_ptr m_out2; diff --git a/src/basic/NESTED_INIT-OMPTarget.cpp b/src/basic/NESTED_INIT-OMPTarget.cpp index 435df40c1..b65708773 100644 --- a/src/basic/NESTED_INIT-OMPTarget.cpp +++ b/src/basic/NESTED_INIT-OMPTarget.cpp @@ -64,23 +64,23 @@ void NESTED_INIT::runOpenMPTargetVariant(VariantID vid) NESTED_INIT_DATA_SETUP_OMP_TARGET; - using EXEC_POL = - RAJA::KernelPolicy< - RAJA::statement::Collapse, // k, j, i - RAJA::statement::Lambda<0> - > - >; + //using EXEC_POL = + // RAJA::KernelPolicy< + // RAJA::statement::Collapse, // k, j, i + // RAJA::statement::Lambda<0> + // > + // >; startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment(0, ni), - RAJA::RangeSegment(0, nj), - RAJA::RangeSegment(0, nk)), - [=](Index_type i, Index_type j, Index_type k) { - NESTED_INIT_BODY; - }); + // RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment(0, ni), + // RAJA::RangeSegment(0, nj), + // RAJA::RangeSegment(0, nk)), + // [=](Index_type i, Index_type j, Index_type k) { + // NESTED_INIT_BODY; + // }); } stopTimer(); diff --git a/src/basic/NESTED_INIT.cpp b/src/basic/NESTED_INIT.cpp index 77d847691..9574b3ed1 100644 --- a/src/basic/NESTED_INIT.cpp +++ b/src/basic/NESTED_INIT.cpp @@ -62,6 +62,10 @@ NESTED_INIT::NESTED_INIT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + + } NESTED_INIT::~NESTED_INIT() diff --git a/src/basic/NESTED_INIT.hpp b/src/basic/NESTED_INIT.hpp index 508ba8030..4573da55f 100644 --- a/src/basic/NESTED_INIT.hpp +++ b/src/basic/NESTED_INIT.hpp @@ -58,7 +58,10 @@ class NESTED_INIT : public KernelBase void runCudaVariant(VariantID vid); void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); - + void runKokkosVariant(VariantID vid); + + + private: Index_type m_array_length; diff --git a/src/basic/PI_ATOMIC-OMPTarget.cpp b/src/basic/PI_ATOMIC-OMPTarget.cpp index 08cc41167..bddf4ebe1 100644 --- a/src/basic/PI_ATOMIC-OMPTarget.cpp +++ b/src/basic/PI_ATOMIC-OMPTarget.cpp @@ -78,11 +78,11 @@ void PI_ATOMIC::runOpenMPTargetVariant(VariantID vid) initOpenMPDeviceData(pi, &m_pi_init, 1, did, hid); - RAJA::forall>( - RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { - double x = (double(i) + 0.5) * dx; - RAJA::atomicAdd(pi, dx / (1.0 + x * x)); - }); + //RAJA::forall>( + // RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { + // double x = (double(i) + 0.5) * dx; + // RAJA::atomicAdd(pi, dx / (1.0 + x * x)); + //}); getOpenMPDeviceData(m_pi, pi, 1, hid, did); *m_pi *= 4.0; diff --git a/src/basic/PI_ATOMIC.cpp b/src/basic/PI_ATOMIC.cpp index 94e29c8ae..f6375f3cf 100644 --- a/src/basic/PI_ATOMIC.cpp +++ b/src/basic/PI_ATOMIC.cpp @@ -35,6 +35,10 @@ PI_ATOMIC::PI_ATOMIC(const RunParams& params) setUsesFeature(Forall); setUsesFeature(Atomic); + setVariantDefined( Kokkos_Lambda ); + + + setVariantDefined( Base_Seq ); setVariantDefined( Lambda_Seq ); setVariantDefined( RAJA_Seq ); @@ -68,6 +72,7 @@ void PI_ATOMIC::setUp(VariantID vid) void PI_ATOMIC::updateChecksum(VariantID vid) { + //std::cout << "Value is "<<*m_pi< vsum(m_vsum_init); - RAJA::ReduceMin vmin(m_vmin_init); - RAJA::ReduceMax vmax(m_vmax_init); - - RAJA::forall>( - RAJA::RangeSegment(ibegin, iend), - [=](Index_type i) { - REDUCE3_INT_BODY_RAJA; - }); - - m_vsum += static_cast(vsum.get()); - m_vmin = RAJA_MIN(m_vmin, static_cast(vmin.get())); - m_vmax = RAJA_MAX(m_vmax, static_cast(vmax.get())); + // RAJA::ReduceSum vsum(m_vsum_init); + // RAJA::ReduceMin vmin(m_vmin_init); + // RAJA::ReduceMax vmax(m_vmax_init); + + // RAJA::forall>( + // RAJA::RangeSegment(ibegin, iend), + // [=](Index_type i) { + // REDUCE3_INT_BODY_RAJA; + // }); + + // m_vsum += static_cast(vsum.get()); + // m_vmin = RAJA_MIN(m_vmin, static_cast(vmin.get())); + // m_vmax = RAJA_MAX(m_vmax, static_cast(vmax.get())); } stopTimer(); diff --git a/src/basic/REDUCE3_INT.cpp b/src/basic/REDUCE3_INT.cpp index 821a4b7e3..5011a3036 100644 --- a/src/basic/REDUCE3_INT.cpp +++ b/src/basic/REDUCE3_INT.cpp @@ -56,6 +56,11 @@ REDUCE3_INT::REDUCE3_INT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + + + } REDUCE3_INT::~REDUCE3_INT() diff --git a/src/basic/REDUCE3_INT.hpp b/src/basic/REDUCE3_INT.hpp index b3acc5004..341dbdb75 100644 --- a/src/basic/REDUCE3_INT.hpp +++ b/src/basic/REDUCE3_INT.hpp @@ -70,7 +70,10 @@ class REDUCE3_INT : public KernelBase void runCudaVariant(VariantID vid); void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); - + void runKokkosVariant(VariantID vid); + + + private: Int_ptr m_vec; Int_type m_vsum; diff --git a/src/basic/TRAP_INT-OMPTarget.cpp b/src/basic/TRAP_INT-OMPTarget.cpp index 636f4090a..6fd4a2e14 100644 --- a/src/basic/TRAP_INT-OMPTarget.cpp +++ b/src/basic/TRAP_INT-OMPTarget.cpp @@ -86,14 +86,14 @@ void TRAP_INT::runOpenMPTargetVariant(VariantID vid) startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - RAJA::ReduceSum sumx(m_sumx_init); + //RAJA::ReduceSum sumx(m_sumx_init); - RAJA::forall>( - RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { - TRAP_INT_BODY; - }); + //RAJA::forall>( + // RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { + // TRAP_INT_BODY; + //}); - m_sumx += static_cast(sumx.get()) * h; + //m_sumx += static_cast(sumx.get()) * h; } stopTimer(); diff --git a/src/basic/TRAP_INT.cpp b/src/basic/TRAP_INT.cpp index e7483d9f4..00144c39b 100644 --- a/src/basic/TRAP_INT.cpp +++ b/src/basic/TRAP_INT.cpp @@ -51,6 +51,11 @@ TRAP_INT::TRAP_INT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + + + } TRAP_INT::~TRAP_INT() diff --git a/src/basic/TRAP_INT.hpp b/src/basic/TRAP_INT.hpp index 171d72418..44e2bdc34 100644 --- a/src/basic/TRAP_INT.hpp +++ b/src/basic/TRAP_INT.hpp @@ -67,7 +67,10 @@ class TRAP_INT : public KernelBase void runCudaVariant(VariantID vid); void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); - + void runKokkosVariant(VariantID vid); + + + private: Real_type m_x0; Real_type m_xp; diff --git a/src/common/BuiltinTimer.hpp b/src/common/BuiltinTimer.hpp new file mode 100644 index 000000000..f20a7f543 --- /dev/null +++ b/src/common/BuiltinTimer.hpp @@ -0,0 +1,40 @@ +// +// Created by Poliakoff, David Zoeller on 4/26/21. +// +#include +#include +#ifndef RAJAPERFSUITE_BUILTINTIMER_HPP +#define RAJAPERFSUITE_BUILTINTIMER_HPP +namespace rajaperf { + class ChronoTimer { + public: + using ElapsedType = double; + + private: + using ClockType = std::chrono::steady_clock; + using TimeType = ClockType::time_point; + using DurationType = std::chrono::duration; + + public: + ChronoTimer() : tstart(ClockType::now()), tstop(ClockType::now()), telapsed(0) { + } + + void start() { tstart = ClockType::now(); } + + void stop() { + tstop = ClockType::now(); + telapsed += + std::chrono::duration_cast(tstop - tstart).count(); + } + + ElapsedType elapsed() const { return telapsed; } + + void reset() { telapsed = 0; } + + private: + TimeType tstart; + TimeType tstop; + ElapsedType telapsed; + }; +} +#endif //RAJAPERFSUITE_BUILTINTIMER_HPP diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index a673d2e43..2c4be2fbc 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -5,14 +5,25 @@ # # SPDX-License-Identifier: (BSD-3-Clause) ############################################################################### +set(RAJAPERF_COMMON_SRC + Executor.cpp + KernelBase.cpp + OutputUtils.cpp + RunParams.cpp + RAJAPerfSuite.cpp + ) +if(NOT INFRASTRUCTURE_ONLY) + #MESSAGE(FATAL_ERROR "TODO ERROR ${INFRASTRUCTURE_ONLY}") + LIST(APPEND RAJAPERF_COMMON_SRC + DataUtils.cpp + PerfsuiteKernelDefinitions.cpp + ) + endif() blt_add_library( NAME common - SOURCES DataUtils.cpp - Executor.cpp - KernelBase.cpp - OutputUtils.cpp - RAJAPerfSuite.cpp - RunParams.cpp + SOURCES ${RAJAPERF_COMMON_SRC} DEPENDS_ON ${RAJA_PERFSUITE_DEPENDS} ) + + diff --git a/src/common/Executor.cpp b/src/common/Executor.cpp index cc32c6cf6..76e50aafc 100644 --- a/src/common/Executor.cpp +++ b/src/common/Executor.cpp @@ -6,16 +6,20 @@ // SPDX-License-Identifier: (BSD-3-Clause) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - +// status: functions copied over, need to reconcile #include "Executor.hpp" #include "common/KernelBase.hpp" #include "common/OutputUtils.hpp" -// Warmup kernels to run first to help reduce startup overheads in timings +// Warmup kernels will be run if NOT in a RAJAPerf Suite infrastructure build +// The purpose of warm up runs reduce startup overheads +// This overhead should not be reflected in perf testing timing +#ifndef RAJAPERF_INFRASTRUCTURE_ONLY #include "basic/DAXPY.hpp" #include "basic/REDUCE3_INT.hpp" #include "algorithm/SORT.hpp" +#endif #include #include @@ -26,7 +30,7 @@ #include #include #include - +// Defines miscellaneous symbolic constants and types, and declares miscellaneous functions. #include @@ -34,13 +38,15 @@ namespace rajaperf { using namespace std; +// Kokkos Design: +// Executor constructor Executor::Executor(int argc, char** argv) : run_params(argc, argv), reference_vid(NumVariants) { } - +// Executor destructor Executor::~Executor() { for (size_t ik = 0; ik < kernels.size(); ++ik) { @@ -58,15 +64,18 @@ void Executor::setupSuite() cout << "\nSetting up suite based on input..." << endl; + // + // Kokkoks Design: + // using Slist = list; using Svector = vector; using KIDset = set; using VIDset = set; - // - // Determine which kernels to exclude from input. - // exclude_kern will be non-duplicated ordered set of IDs of kernel to exclude. - // + + // Determine which kernels and features to exclude from input + // Store excluded inputs vector of strings. + const Svector& exclude_kernel_input = run_params.getExcludeKernelInput(); const Svector& exclude_feature_input = run_params.getExcludeFeatureInput(); @@ -74,15 +83,14 @@ void Executor::setupSuite() if ( !exclude_kernel_input.empty() ) { - // Make list copy of exclude kernel name input to manipulate for - // processing potential group names and/or kernel names, next + // Create list of excluded kernel names. + // In subsequent steps, this list will be used to form the group and/or kernel names to be run Slist exclude_kern_names(exclude_kernel_input.begin(), exclude_kernel_input.end()); - // - // Search exclude_kern_names for matching group names. - // groups2exclude will contain names of groups to exclude. - // + + // groups2exclude, a vector of strings, will contain names of groups to exclude. Svector groups2exclude; + // Search exclude_kern_names list for matching group names. for (Slist::iterator it = exclude_kern_names.begin(); it != exclude_kern_names.end(); ++it) { for (size_t ig = 0; ig < NumGroups; ++ig) { @@ -93,10 +101,10 @@ void Executor::setupSuite() } } - // - // If group name(s) found in exclude_kern_names, assemble kernels in group(s) - // to run and remove those group name(s) from exclude_kern_names list. - // + + // If group name(s) found in the list of exclude_kern_names, assemble kernels in group(s) + // to run, and remove the identified group name(s) from exclude_kern_names list. + for (size_t ig = 0; ig < groups2exclude.size(); ++ig) { const string& gname(groups2exclude[ig]); @@ -106,17 +114,16 @@ void Executor::setupSuite() exclude_kern.insert(kid); } } - + // List of kernel names to be excluded; + // Here, removing errant / erroneous group names from this list exclude_kern_names.remove(gname); } // - // Look for matching names of individual kernels in remaining exclude_kern_names. - // - // Assemble invalid input for warning message. - // + // Vector of strings containing invalid input + // A warning message is associated with invalid input Svector invalid; - + // Search for matching names of kernels in remaining exclude_kern_names for (Slist::iterator it = exclude_kern_names.begin(); it != exclude_kern_names.end(); ++it) { bool found_it = false; @@ -128,7 +135,7 @@ void Executor::setupSuite() found_it = true; } } - + // If kernel not found, add to the vector "invalid" if ( !found_it ) invalid.push_back(*it); } @@ -138,7 +145,7 @@ void Executor::setupSuite() if ( !exclude_feature_input.empty() ) { - // First, check for invalid exclude_feature input. + // Check for invalid exclude_feature input. // Assemble invalid input for warning message. // Svector invalid; @@ -158,8 +165,9 @@ void Executor::setupSuite() run_params.setInvalidExcludeFeatureInput(invalid); // - // If feature input is valid, determine which kernels use - // input-specified features and add to set of kernels to run. + // Kokkos TODO: Ask David Beckingsale & Rich Hornung (LLNL) if this is correct: + // If feature input is valid, determine which kernels to use + // input-specified features, and add to set of kernels to run. // if ( run_params.getInvalidExcludeFeatureInput().empty() ) { @@ -167,6 +175,8 @@ void Executor::setupSuite() const string& feature = exclude_feature_input[i]; +// COMMENTED OUT BY KOKKOS; FEATURES DO NOT YET WORK IN OUR DESIGN +/* bool found_it = false; for (size_t fid = 0; fid < NumFeatures && !found_it; ++fid) { FeatureID tfid = static_cast(fid); @@ -175,7 +185,7 @@ void Executor::setupSuite() for (int kid = 0; kid < NumKernels; ++kid) { KernelID tkid = static_cast(kid); - KernelBase* kern = getKernelObject(tkid, run_params); + //KernelBase* kern = getKernelObject(tkid, run_params); if ( kern->usesFeature(tfid) ) { exclude_kern.insert( tkid ); } @@ -184,47 +194,37 @@ void Executor::setupSuite() } // if input feature name matches feature id } // loop over feature ids until name match is found - +*/ } // loop over feature name input } // if feature name input is valid } // - // Determine which kernels to execute from input. - // run_kern will be non-duplicated ordered set of IDs of kernel to run. + // Determine which kernels to execute from input + // run_kern is an ordered set of KernelID to run // const Svector& kernel_input = run_params.getKernelInput(); const Svector& feature_input = run_params.getFeatureInput(); - + // Set of KernelID objects KIDset run_kern; if ( kernel_input.empty() && feature_input.empty() ) { - // // No kernels or features specified in input, run them all... - // - for (size_t kid = 0; kid < NumKernels; ++kid) { - KernelID tkid = static_cast(kid); - if (exclude_kern.find(tkid) == exclude_kern.end()) { - run_kern.insert( tkid ); - } + for (auto iter_input: allKernels) { + kernels.push_back(iter_input.second); } } else { - // - // Need to parse input to determine which kernels to run - // - - // // Look for kernels using features if such input provided - // if ( !feature_input.empty() ) { - // First, check for invalid feature input. - // Assemble invalid input for warning message. - // +// Ask David Beckingsale & Rich H. what to do here +// Kokkos Design: +// FEATURE DOES NOT YET WORK WITH KOKKOS +/** TODO: Kokkos, reimplement! Svector invalid; for (size_t i = 0; i < feature_input.size(); ++i) { @@ -273,9 +273,9 @@ void Executor::setupSuite() } // loop over feature name input } // if feature name input is valid - +*/ } // if !feature_input.empty() - + /** // Make list copy of kernel name input to manipulate for // processing potential group names and/or kernel names, next Slist kern_names(kernel_input.begin(), kernel_input.end()); @@ -299,6 +299,7 @@ void Executor::setupSuite() // If group name(s) found in kern_names, assemble kernels in group(s) // to run and remove those group name(s) from kern_names list. // + for (size_t ig = 0; ig < groups2run.size(); ++ig) { const string& gname(groups2run[ig]); @@ -338,14 +339,29 @@ void Executor::setupSuite() } run_params.setInvalidKernelInput(invalid); + */ + Svector invalid; + for (auto kernelName: kernel_input) { + std::vector matchingKernelsVec = lookUpKernelByName(kernelName); + // Check -- everything that matched is added kernels vector + + if (matchingKernelsVec.empty()) { + invalid.push_back(kernelName); + } else { + + for (auto iter_kern: matchingKernelsVec) { + kernels.push_back(iter_kern); + + } + } + } + run_params.setInvalidKernelInput(invalid); } - // + // Assemble set of available variants to run - // (based on compile-time configuration). - // VIDset available_var; for (size_t iv = 0; iv < NumVariants; ++iv) { VariantID vid = static_cast(iv); @@ -355,9 +371,7 @@ void Executor::setupSuite() } - // - // Determine variants to execute from input. - // run_var will be non-duplicated ordered set of IDs of variants to run. + // Declare and set exclude_variant_names (from run parameter inputs) // const Svector& exclude_variant_names = run_params.getExcludeVariantInput(); @@ -365,12 +379,8 @@ void Executor::setupSuite() if ( !exclude_variant_names.empty() ) { - // - // Parse input to determine which variants to exclude. - // + // Assemble invalid input for warning message. - // - Svector invalid; for (size_t it = 0; it < exclude_variant_names.size(); ++it) { @@ -393,8 +403,8 @@ void Executor::setupSuite() } // - // Determine variants to execute from input. - // run_var will be non-duplicated ordered set of IDs of variants to run. + // Determine variants to run based on user input (stored in run_params). + // run_var will be an ordered set of unique variant IDs to run. // const Svector& variant_names = run_params.getVariantInput(); @@ -403,7 +413,7 @@ void Executor::setupSuite() if ( variant_names.empty() ) { // - // No variants specified in input options, run all available. + // If no variants specified in input options, run all available. // Also, set reference variant if specified. // for (VIDset::iterator vid_it = available_var.begin(); @@ -426,15 +436,6 @@ void Executor::setupSuite() } else { - // - // Parse input to determine which variants to run: - // - variants to run will be the intersection of available variants - // and those specified in input - // - reference variant will be set to specified input if available - // and variant will be run; else first variant that will be run. - // - // Assemble invalid input for warning message. - // Svector invalid; @@ -469,12 +470,8 @@ void Executor::setupSuite() } - // - // Create kernel objects and variants to execute. If invalid input is not - // empty for either case, then there were unmatched input items. - // - // A message will be emitted later so user can sort it out... - // + // Kokkos Design: + // Create kernel objects and variants to execute. if ( !(run_params.getInvalidKernelInput().empty()) || !(run_params.getInvalidExcludeKernelInput().empty()) ) { @@ -492,9 +489,9 @@ void Executor::setupSuite() kid != run_kern.end(); ++kid) { /// RDH DISABLE COUPLE KERNEL until we find a reasonable way to do /// complex numbers in GPU code - if ( *kid != Apps_COUPLE ) { - kernels.push_back( getKernelObject(*kid, run_params) ); - } + //if ( *kid != Apps_COUPLE ) { + // kernels.push_back( getKernelObject(*kid, run_params) ); + //} } if ( !(run_params.getInvalidVariantInput().empty()) || @@ -509,8 +506,7 @@ void Executor::setupSuite() variant_ids.push_back( *vid ); } - // - // If we've gotten to this point, we have good input to run. + // If the bloc of code below executes, we have good input to run. // if ( run_params.getInputState() != RunParams::DryRun && run_params.getInputState() != RunParams::CheckRun ) { @@ -703,11 +699,15 @@ void Executor::runSuite() in_state != RunParams::CheckRun ) { return; } +// Kokkos Design: +#ifndef RAJAPERF_INFRASTRUCTURE_ONLY cout << "\n\nRun warmup kernels...\n"; vector warmup_kernels; + // The warm-up kernels that will be run if RAJAPERF_INFRASTUCTURE_ONLY NOT + // enabled warmup_kernels.push_back(new basic::DAXPY(run_params)); warmup_kernels.push_back(new basic::REDUCE3_INT(run_params)); warmup_kernels.push_back(new algorithm::SORT(run_params)); @@ -732,6 +732,7 @@ void Executor::runSuite() delete warmup_kernels[ik]; } +#endif cout << "\n\nRunning specified kernels and variants...\n"; @@ -1326,6 +1327,141 @@ void Executor::getFOMGroups(vector& fom_groups) #endif } + // Kokkos Desgin: + // Function to register new Kokkos and /or RAJA group and kernel ID + // The return type is Executor::groupID + + Executor::groupID Executor::registerGroup(std::string groupName) { + auto checkIfGroupExists = kernelsPerGroup.find(groupName); + + + if (checkIfGroupExists == kernelsPerGroup.end()) { + // If groupName not found, set that groupName in kernelsPerGroup to an empty kernelSet obj + kernelsPerGroup[groupName] = kernelSet(); + } else { + // ERROR CONDITION: DUPLICATING GROUPS + // Error lists exsiting group, and kills program. + + std::cout << "The Group Name " << groupName << " already exists. Program is exiting." << std::endl; + + // In kernelsPerGroup, groupName is the second position / value in the set + auto fullKernelSet = checkIfGroupExists->second; + + // fullKernelSet is of type std::set + for (auto kernel: fullKernelSet) { + + std::cout << kernel->getName() << std::endl; + + } + + exit(1); + + } + // getNewGroupID() is an object of type Executor::groupID, an int + return getNewGroupID(); + + + } + + // Kokkos Design: + // Function to register new kernels + // The return type -- Executor::kernelID, returning getNewKernelID() + + Executor::kernelID Executor::registerKernel(std::string groupName, KernelBase *kernel) { + // Declaring and setting kernelName to de-referenced kernel pointer obj (passed in as as argument), an instance of KernelBase* + auto kernelName = kernel->getName(); + // Check if kernel exists; "allKernels" maps named kernels to their IDs; + auto checkIfKernelExists = allKernels.find(kernelName); + // Check if checkKernelExists value IS NOT in the map of all kernels + // to determine if a new kernel should be created + if (checkIfKernelExists == allKernels.end()) { + // If the kernel name IS NOT in the allKernels map, set kernelName to kernel, a KernelBase* instance + allKernels[kernelName] = kernel; + } else { + // ERROR CONDITION: if the kernel is found / exists, make the program exit + // kernelName is the key, or first element of allKernels + + std::cout << "Kernel " << checkIfKernelExists->first << " already exists. Program is exiting." + << std::endl; + + exit(1); + } + // Kokkos Desgin: + // ERROR CONDITION : adding a groupName (to kernelsPerGroup) before checking if the (kernel) group exists. + // + auto checkIfGroupExists = kernelsPerGroup.find(groupName); + + + if (checkIfGroupExists == kernelsPerGroup.end()) { + + } else { + // If the groupName DOES EXIST in kernelsPerGroup, then insert the associated kernel (instance of KernelBase*) + // at the second (value) position of the allKernels map to associate correctly the kernel and its groupName + checkIfGroupExists->second.insert(kernel); + + } + + // getNewKernelID is an obj of type Executor::kernelID + return getNewKernelID(); + } + + // Kokkos Design: + // Function of the Executor class that returns a vector of all kernelBase* objects. + + std::vector Executor::lookUpKernelByName(std::string kernelOrGroupName) { + + // This variable will contain the set of kernels to run + std::vector kernelsByNameVect; + // kernelsPerGroup: first (key) is kernel, second (value), is group + auto checkLookUpGroupNameIterator = kernelsPerGroup.find(kernelOrGroupName); + auto checkLookUpKernelNameIterator = allKernels.find(kernelOrGroupName); + + if (checkLookUpGroupNameIterator != kernelsPerGroup.end()) { + // Gather the kernel groups that will be perf tested + auto groupSetForTests = checkLookUpGroupNameIterator->second; + // Capture the group name, and store in kernelsByNameVect + for (auto item: groupSetForTests) { + kernelsByNameVect.push_back(item); + } + // Check -- if kernel name not an empty element, i.e., it exists, + // capture the name of the kernel, and store in kernelsByNameVect + } else if (checkLookUpKernelNameIterator != allKernels.end()) { + + auto kernel = checkLookUpKernelNameIterator->second; + + kernelsByNameVect.push_back(kernel); + + } + + // kernelsByNameVect is an object of type std::vector that will be used by + return kernelsByNameVect; + + } + + // Kokkos Desgin: + // Take user-entered run parameters in by reference, and return + const RunParams &Executor::getRunParams() { + + + return run_params; + } + + // Function to register a new kernel group for an instance of an Executor + // object + void free_register_group(Executor *exec, std::string groupName) { + exec->registerGroup(groupName); + } + // Function to register a new kernel for an instance of an Executor + // object + void free_register_kernel(Executor *exec, std::string groupName, KernelBase *kernel) { + exec->registerKernel(groupName, kernel); + } + // Function to populate an instance of an Executor object with run parameters + + const RunParams& getRunParams(Executor* exec){ + return exec->getRunParams(); + + } } // closing brace for rajaperf namespace diff --git a/src/common/Executor.hpp b/src/common/Executor.hpp index 32e978f9a..e3c88a22b 100644 --- a/src/common/Executor.hpp +++ b/src/common/Executor.hpp @@ -12,10 +12,17 @@ #include "common/RAJAPerfSuite.hpp" #include "common/RunParams.hpp" +#include #include #include #include + /////////////////////////////////////////////////// + // Logic: + // Need the full set of kernels + // Associate group names (e.g., lcals, basic) with kernel sets + // Interface to add new kernels (e.g., DAXPY) and groups (basic) + // for Kokkos Performance Testing namespace rajaperf { class KernelBase; @@ -43,6 +50,23 @@ class Executor void outputRunData(); + // Interface for adding new Kokkos groups and kernels + using groupID = int; + using kernelSet = std::set; + using kernelMap = std::map; + using groupMap = std::map; + using kernelID = int; + + + groupID registerGroup(std::string groupName); + + kernelID registerKernel(std::string, KernelBase*); + + std::vector lookUpKernelByName(std::string kernelOrGroupName); + + const RunParams& getRunParams(); + + private: Executor() = delete; @@ -72,13 +96,52 @@ class Executor void writeFOMReport(const std::string& filename); void getFOMGroups(std::vector& fom_groups); + + // Kokkos Design: + // Kokkos add group and kernel ID inline functions + // The newGroupID and newKerneID, both type int, will be shared amongst invocations of these inline functions. + + inline groupID getNewGroupID() { + + static groupID newGroupID; + + return newGroupID++; + + } + + inline kernelID getNewKernelID() { + + static kernelID newKernelID; + return newKernelID++; + + } + + // Required data members: + // running parameters, specific kernels (e.g., DAXPY), variants (e.g., + // Kokkos, CUDA, Sequential, etc.) RunParams run_params; std::vector kernels; std::vector variant_ids; VariantID reference_vid; + + // "allKernels" is an instance of kernelMap, a std::map that takes a std::string name (key) and pointer to the associated KernelBase object (value). + kernelMap allKernels; + // "kernelsPerGroup" is an instance of the groupMap type, a std::map that takes a std::string name (key) and a kernelSet object, + // containing the set of unique kernels (in a kernel group, such as basic, + // lcals, etc.) to be run. + groupMap kernelsPerGroup; + + }; +// Kokkos design: +// Register a new kernel group (see: PerfsuiteKernelDefinitions.*): +void free_register_group(Executor*, std::string); +// Register a new kernel (that belongs to a particular kernel group): +void free_register_kernel(Executor*, std::string, KernelBase*); +// Take in run parameters by reference +const RunParams& getRunParams(Executor* exec); } // closing brace for rajaperf namespace diff --git a/src/common/KernelBase.cpp b/src/common/KernelBase.cpp index 69d195700..4034faa0c 100644 --- a/src/common/KernelBase.cpp +++ b/src/common/KernelBase.cpp @@ -51,6 +51,24 @@ KernelBase::KernelBase(KernelID kid, const RunParams& params) : } } + KernelBase::KernelBase(std::string name, const RunParams& params) + : run_params(params), + kernel_id(Basic_DAXPY), // TODO DZP: better + name(name), + default_prob_size(0), + default_reps(0), + running_variant(NumVariants) + { + for (size_t ivar = 0; ivar < NumVariants; ++ivar) { + checksum[ivar] = 0.0; + num_exec[ivar] = 0; + min_time[ivar] = std::numeric_limits::max(); + max_time[ivar] = -std::numeric_limits::max(); + tot_time[ivar] = 0.0; + has_variant_defined[ivar] = false; + } + } + KernelBase::~KernelBase() { @@ -91,10 +109,10 @@ void KernelBase::execute(VariantID vid) running_variant = vid; resetTimer(); - +#ifndef RAJAPERF_INFRASTRUCTURE_ONLY resetDataInitCount(); +#endif this->setUp(vid); - this->runKernel(vid); this->updateChecksum(vid); @@ -108,7 +126,7 @@ void KernelBase::recordExecTime() { num_exec[running_variant]++; - RAJA::Timer::ElapsedType exec_time = timer.elapsed(); + TimerType::ElapsedType exec_time = timer.elapsed(); min_time[running_variant] = std::min(min_time[running_variant], exec_time); max_time[running_variant] = std::max(max_time[running_variant], exec_time); tot_time[running_variant] += exec_time; @@ -176,6 +194,14 @@ void KernelBase::runKernel(VariantID vid) break; } +#if defined(RUN_KOKKOS) or defined (RAJAPERF_INFRASTRUCTURE_ONLY) + case Kokkos_Lambda : + case Kokkos_Functor : + { + runKokkosVariant(vid); + break; + } +#endif // RUN_KOKKOS default : { #if 0 std::cout << "\n " << getName() diff --git a/src/common/KernelBase.hpp b/src/common/KernelBase.hpp index 8c2dfb799..48f16f3de 100644 --- a/src/common/KernelBase.hpp +++ b/src/common/KernelBase.hpp @@ -11,10 +11,15 @@ #include "common/RAJAPerfSuite.hpp" #include "common/RPTypes.hpp" -#include "common/DataUtils.hpp" #include "common/RunParams.hpp" +#ifndef RAJAPERF_INFRASTRUCTURE_ONLY #include "RAJA/util/Timer.hpp" +#include "common/DataUtils.hpp" +#else +#include "common/BuiltinTimer.hpp" +#endif + #if defined(RAJA_ENABLE_CUDA) #include "RAJA/policy/cuda/raja_cudaerrchk.hpp" #endif @@ -39,11 +44,19 @@ class KernelBase { public: KernelBase(KernelID kid, const RunParams& params); + KernelBase(std::string name, const RunParams& params); + +#ifndef RAJAPERF_INFRASTRUCTURE_ONLY + using TimerType = RAJA::Timer; +#else + using TimerType = rajaperf::ChronoTimer; +#endif virtual ~KernelBase(); KernelID getKernelID() const { return kernel_id; } const std::string& getName() const { return name; } + void setName(const std::string& new_name) { name = new_name; } // // Methods called in kernel subclass constructors to set kernel @@ -118,12 +131,18 @@ class KernelBase void startTimer() { synchronize(); + #ifdef RUN_KOKKOS + Kokkos::Tools::pushRegion(this->getName()); + #endif timer.start(); } void stopTimer() { synchronize(); + #ifdef RUN_KOKKOS + Kokkos::Tools::popRegion(); + #endif timer.stop(); recordExecTime(); } @@ -156,8 +175,12 @@ class KernelBase virtual void runOpenMPTargetVariant(VariantID vid) = 0; #endif +#if defined(RUN_KOKKOS) or defined(RAJAPERF_INFRASTRUCTURE_ONLY) + virtual void runKokkosVariant(VariantID vid) = 0; +#endif // RUN_KOKKOS + protected: - const RunParams& run_params; + const RunParams run_params; Checksum_type checksum[NumVariants]; Checksum_type checksum_scale_factor; @@ -194,11 +217,15 @@ class KernelBase int num_exec[NumVariants]; - RAJA::Timer timer; - RAJA::Timer::ElapsedType min_time[NumVariants]; - RAJA::Timer::ElapsedType max_time[NumVariants]; - RAJA::Timer::ElapsedType tot_time[NumVariants]; + TimerType timer; + + TimerType::ElapsedType min_time[NumVariants]; + TimerType::ElapsedType max_time[NumVariants]; + TimerType::ElapsedType tot_time[NumVariants]; + + + }; } // closing brace for rajaperf namespace diff --git a/src/common/PerfsuiteKernelDefinitions.cpp b/src/common/PerfsuiteKernelDefinitions.cpp new file mode 100644 index 000000000..1f97494d2 --- /dev/null +++ b/src/common/PerfsuiteKernelDefinitions.cpp @@ -0,0 +1,171 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +// Created by Poliakoff, David Zoeller on 4/26/21. +// + +// Basic kernels... +// +#include "basic/DAXPY.hpp" +#include "basic/IF_QUAD.hpp" +#include "basic/INIT3.hpp" +#include "basic/INIT_VIEW1D.hpp" +#include "basic/INIT_VIEW1D_OFFSET.hpp" +#include "basic/MAT_MAT_SHARED.hpp" +#include "basic/MULADDSUB.hpp" +#include "basic/NESTED_INIT.hpp" +#include "basic/PI_ATOMIC.hpp" +#include "basic/PI_REDUCE.hpp" +#include "basic/REDUCE3_INT.hpp" +#include "basic/TRAP_INT.hpp" + +// +// Lcals kernels... +// +#include "lcals/DIFF_PREDICT.hpp" +#include "lcals/EOS.hpp" +#include "lcals/FIRST_DIFF.hpp" +#include "lcals/FIRST_MIN.hpp" +#include "lcals/FIRST_SUM.hpp" +#include "lcals/GEN_LIN_RECUR.hpp" +#include "lcals/HYDRO_1D.hpp" +#include "lcals/HYDRO_2D.hpp" +#include "lcals/INT_PREDICT.hpp" +#include "lcals/PLANCKIAN.hpp" +#include "lcals/TRIDIAG_ELIM.hpp" + +// +// Polybench kernels... +// +#include "polybench/POLYBENCH_2MM.hpp" +#include "polybench/POLYBENCH_3MM.hpp" +#include "polybench/POLYBENCH_ADI.hpp" +#include "polybench/POLYBENCH_ATAX.hpp" +#include "polybench/POLYBENCH_FDTD_2D.hpp" +#include "polybench/POLYBENCH_FLOYD_WARSHALL.hpp" +#include "polybench/POLYBENCH_GEMM.hpp" +#include "polybench/POLYBENCH_GEMVER.hpp" +#include "polybench/POLYBENCH_GESUMMV.hpp" +#include "polybench/POLYBENCH_HEAT_3D.hpp" +#include "polybench/POLYBENCH_JACOBI_1D.hpp" +#include "polybench/POLYBENCH_JACOBI_2D.hpp" +#include "polybench/POLYBENCH_MVT.hpp" + +// +// Stream kernels... +// +#include "stream/COPY.hpp" +#include "stream/MUL.hpp" +#include "stream/ADD.hpp" +#include "stream/TRIAD.hpp" +#include "stream/DOT.hpp" + +// +// Apps kernels... +// +//#include "apps/WIP-COUPLE.hpp" +#include "apps/DEL_DOT_VEC_2D.hpp" +#include "apps/DIFFUSION3DPA.hpp" +#include "apps/ENERGY.hpp" +#include "apps/FIR.hpp" +#include "apps/HALOEXCHANGE.hpp" +#include "apps/HALOEXCHANGE_FUSED.hpp" +#include "apps/LTIMES.hpp" +#include "apps/LTIMES_NOVIEW.hpp" +#include "apps/MASS3DPA.hpp" +#include "apps/PRESSURE.hpp" +#include "apps/VOL3D.hpp" + +// +// Algorithm kernels... +// +#include "algorithm/SORT.hpp" +#include "algorithm/SORTPAIRS.hpp" + + +#include +namespace rajaperf { + +void make_perfsuite_executor(rajaperf::Executor *exec, int argc, char *argv[]) { + RunParams run_params(argc, argv); + + free_register_group(exec, std::string("Basic")); + free_register_group(exec, std::string("Lcals")); + free_register_group(exec, std::string("Polybench")); + free_register_group(exec, std::string("Stream")); + free_register_group(exec, std::string("Apps")); + free_register_group(exec, std::string("Algorithm")); + + // Basic + + free_register_kernel(exec, "Basic", new basic::PI_ATOMIC(run_params)); + free_register_kernel(exec, "Basic", new basic::DAXPY(run_params)); + free_register_kernel(exec, "Basic", new basic::IF_QUAD(run_params)); + free_register_kernel(exec, "Basic", new basic::INIT3(run_params)); + free_register_kernel(exec, "Basic", new basic::INIT_VIEW1D(run_params)); + free_register_kernel(exec, "Basic", new basic::INIT_VIEW1D_OFFSET(run_params)); + free_register_kernel(exec, "Basic", new basic::MULADDSUB(run_params)); + free_register_kernel(exec, "Basic", new basic::NESTED_INIT(run_params)); + free_register_kernel(exec, "Basic", new basic::REDUCE3_INT(run_params)); + free_register_kernel(exec, "Basic", new basic::TRAP_INT(run_params)); + + // Lcals + free_register_kernel(exec, "Lcals", new lcals::DIFF_PREDICT(run_params)); + free_register_kernel(exec, "Lcals", new lcals::EOS(run_params)); + free_register_kernel(exec, "Lcals", new lcals::FIRST_DIFF(run_params)); + free_register_kernel(exec, "Lcals", new lcals::FIRST_MIN(run_params)); + free_register_kernel(exec, "Lcals", new lcals::FIRST_SUM(run_params)); + free_register_kernel(exec, "Lcals", new lcals::GEN_LIN_RECUR(run_params)); + free_register_kernel(exec, "Lcals", new lcals::HYDRO_1D(run_params)); + free_register_kernel(exec, "Lcals", new lcals::HYDRO_2D(run_params)); + free_register_kernel(exec, "Lcals", new lcals::INT_PREDICT(run_params)); + free_register_kernel(exec, "Lcals", new lcals::PLANCKIAN(run_params)); + free_register_kernel(exec, "Lcals", new lcals::TRIDIAG_ELIM(run_params)); + + // Nota bene: No Kokkos translations of polybench yet, + // only stub implementations + // Polybench + + free_register_kernel(exec, "Polybench", new polybench::POLYBENCH_2MM(run_params)); + free_register_kernel(exec, "Polybench", new polybench::POLYBENCH_3MM(run_params)); + free_register_kernel(exec, "Polybench", new polybench::POLYBENCH_ADI(run_params)); + free_register_kernel(exec, "Polybench", new polybench::POLYBENCH_ATAX(run_params)); + free_register_kernel(exec, "Polybench", new polybench::POLYBENCH_FDTD_2D(run_params)); + free_register_kernel(exec, "Polybench", new polybench::POLYBENCH_FLOYD_WARSHALL(run_params)); + free_register_kernel(exec, "Polybench", new polybench::POLYBENCH_GEMM(run_params)); + free_register_kernel(exec, "Polybench", new polybench::POLYBENCH_GEMVER(run_params)); + free_register_kernel(exec, "Polybench", new polybench::POLYBENCH_GESUMMV(run_params)); + free_register_kernel(exec, "Polybench", new polybench::POLYBENCH_HEAT_3D(run_params)); + free_register_kernel(exec, "Polybench", new polybench::POLYBENCH_JACOBI_1D(run_params)); + free_register_kernel(exec, "Polybench", new polybench::POLYBENCH_JACOBI_2D(run_params)); + free_register_kernel(exec, "Polybench", new polybench::POLYBENCH_MVT(run_params)); + + // Stream + free_register_kernel(exec, "Stream", new stream::ADD(run_params)); + free_register_kernel(exec, "Stream", new stream::COPY(run_params)); + free_register_kernel(exec, "Stream", new stream::DOT(run_params)); + free_register_kernel(exec, "Stream", new stream::MUL(run_params)); + free_register_kernel(exec, "Stream", new stream::TRIAD(run_params)); + + // Apps + //free_register_kernel(exec, "Apps", new apps::COUPLE(run_params)); + free_register_kernel(exec, "Apps", new apps::DEL_DOT_VEC_2D(run_params)); + free_register_kernel(exec, "Apps", new apps::ENERGY(run_params)); + free_register_kernel(exec, "Apps", new apps::FIR(run_params)); + free_register_kernel(exec, "Apps", new apps::HALOEXCHANGE(run_params)); + free_register_kernel(exec, "Apps", new apps::LTIMES(run_params)); + free_register_kernel(exec, "Apps", new apps::LTIMES_NOVIEW(run_params)); + free_register_kernel(exec, "Apps", new apps::PRESSURE(run_params)); + free_register_kernel(exec, "Apps", new apps::VOL3D(run_params)); + + // Algorithm + free_register_kernel(exec, "Algorithm", new algorithm::SORT(run_params)); + free_register_kernel(exec, "Algorithm", new algorithm::SORTPAIRS(run_params)); + +} +} // Closing namespace rajaperf diff --git a/src/common/PerfsuiteKernelDefinitions.hpp b/src/common/PerfsuiteKernelDefinitions.hpp new file mode 100644 index 000000000..cbbd50c1a --- /dev/null +++ b/src/common/PerfsuiteKernelDefinitions.hpp @@ -0,0 +1,16 @@ +// +// Created by Poliakoff, David Zoeller on 4/26/21. +// + +#ifndef RAJAPERFSUITE_PERFSUITEKERNELDEFINITIONS_HPP +#define RAJAPERFSUITE_PERFSUITEKERNELDEFINITIONS_HPP + +namespace rajaperf{ + + class Executor; +} +void make_perfsuite_executor(rajaperf::Executor *exec, int argc, char *argv[]); + + + +#endif //RAJAPERFSUITE_PERFSUITEKERNELDEFINITIONS_HPP diff --git a/src/common/QuickKernelBase.hpp b/src/common/QuickKernelBase.hpp new file mode 100644 index 000000000..2cd7a60c6 --- /dev/null +++ b/src/common/QuickKernelBase.hpp @@ -0,0 +1,147 @@ +#ifndef RAJAPERFSUITE_QUICKKERNELBASE_HPP +#define RAJAPERFSUITE_QUICKKERNELBASE_HPP + +#include "KernelBase.hpp" +#include + +namespace rajaperf { + + struct SureBuddyOkay { + bool validate_checksum(double reference, double variant) { + return true; + } + }; + + template + class QuickKernelBase : public rajaperf::KernelBase { + SetUp m_setup; + Execute m_execute; + Checksum m_checksum; + struct empty { + }; + using runData_helper = decltype(m_setup(0, 0)); + using runData = typename std::conditional::value, empty, runData_helper>::type; + using is_empty = std::is_same; + runData *rd; + public: + QuickKernelBase(std::string &name, const RunParams ¶ms, SetUp se, Execute ex, Checksum ch) : KernelBase( + name, + params), + m_setup(se), + m_execute(ex), + m_checksum( + ch) { + + + +setVariantDefined(Kokkos_Lambda); + setDefaultProblemSize(100000); + setActualProblemSize(100000); + setDefaultReps(5000); + +} + + QuickKernelBase(std::string &name, const RunParams ¶ms, SetUp se, Execute ex) : KernelBase(name, + params), + m_setup(se), + m_execute(ex), + m_checksum( + SureBuddyOkay() +) { + +setVariantDefined(Kokkos_Lambda); + setDefaultProblemSize(100000); + setActualProblemSize(100000); + setDefaultReps(5000); + +} + ~QuickKernelBase(){ + free(rd); +} + Real_type m_y; + + void setUpHelper(std::true_type) { + } + + void setUpHelper(std::false_type) { + rd = new runData(m_setup(getItsPerRep(), getActualProblemSize())); + } + + void setUp(VariantID vid) override { + setUpHelper(is_empty()); + } + + void updateChecksum(VariantID vid) override { + checksum[vid] += m_y; + } + + void tearDown(VariantID vID) override {} + + void runSeqVariant(VariantID vID) override {} + +#if defined(RAJA_ENABLE_OPENMP) && defined(RUN_OPENMP) + void runOpenMPVariant(VariantID vid) override { + auto size = getActualProblemSize(); + for(int x =0; x< getRunReps(); ++x){ + m_execute(x, size); + } + } +#endif +#if defined(RAJA_ENABLE_CUDA) + void runCudaVariant(VariantID vid) override {} +#endif +#if defined(RAJA_ENABLE_HIP) + void runHipVariant(VariantID vid) override {} +#endif +#if defined(RAJA_ENABLE_TARGET_OPENMP) + void runOpenMPTargetVariant(VariantID vid) override {} +#endif + +#if defined(RUN_KOKKOS) or defined(RAJAPERF_INFRASTRUCTURE_ONLY) + + + template + void rkv_helper(std::index_sequence) { + auto size = getActualProblemSize(); + for (int x = 0; x < getRunReps(); ++x) { + m_execute(x, size, std::get(*rd)...); + } + } + + void rkv_helper(empty em) { + auto size = getActualProblemSize(); + for (int x = 0; x < getRunReps(); ++x) { + m_execute(x, size); + } + } + + void rkv_switch_on_empty(std::false_type) { + using index_seq = typename + std::make_index_sequence::value>; + rkv_helper(index_seq()); + } + + void rkv_switch_on_empty(std::true_type) { + rkv_helper(empty()); + + } + + void runKokkosVariant(VariantID vid) override { + Kokkos::fence(); + startTimer(); + rkv_switch_on_empty(is_empty()); + Kokkos::fence(); + stopTimer(); + + } + +#endif // RUN_KOKKOS + }; + + template + KernelBase *make_kernel_base(std::string name, const RunParams ¶ms, Lambdas... lambdas) { + return new QuickKernelBase(name, params, lambdas...); + } + +} // end namespace rajaperf +#endif //RAJAPERFSUITE_QUICKKERNELBASE_HPP diff --git a/src/common/RAJAPerfSuite.cpp b/src/common/RAJAPerfSuite.cpp index 748fb1325..baed495b2 100644 --- a/src/common/RAJAPerfSuite.cpp +++ b/src/common/RAJAPerfSuite.cpp @@ -10,90 +10,13 @@ #include "RunParams.hpp" -// -// Basic kernels... -// -#include "basic/DAXPY.hpp" -#include "basic/IF_QUAD.hpp" -#include "basic/INIT3.hpp" -#include "basic/INIT_VIEW1D.hpp" -#include "basic/INIT_VIEW1D_OFFSET.hpp" -#include "basic/MAT_MAT_SHARED.hpp" -#include "basic/MULADDSUB.hpp" -#include "basic/NESTED_INIT.hpp" -#include "basic/PI_ATOMIC.hpp" -#include "basic/PI_REDUCE.hpp" -#include "basic/REDUCE3_INT.hpp" -#include "basic/TRAP_INT.hpp" - -// -// Lcals kernels... -// -#include "lcals/DIFF_PREDICT.hpp" -#include "lcals/EOS.hpp" -#include "lcals/FIRST_DIFF.hpp" -#include "lcals/FIRST_MIN.hpp" -#include "lcals/FIRST_SUM.hpp" -#include "lcals/GEN_LIN_RECUR.hpp" -#include "lcals/HYDRO_1D.hpp" -#include "lcals/HYDRO_2D.hpp" -#include "lcals/INT_PREDICT.hpp" -#include "lcals/PLANCKIAN.hpp" -#include "lcals/TRIDIAG_ELIM.hpp" - -// -// Polybench kernels... -// -#include "polybench/POLYBENCH_2MM.hpp" -#include "polybench/POLYBENCH_3MM.hpp" -#include "polybench/POLYBENCH_ADI.hpp" -#include "polybench/POLYBENCH_ATAX.hpp" -#include "polybench/POLYBENCH_FDTD_2D.hpp" -#include "polybench/POLYBENCH_FLOYD_WARSHALL.hpp" -#include "polybench/POLYBENCH_GEMM.hpp" -#include "polybench/POLYBENCH_GEMVER.hpp" -#include "polybench/POLYBENCH_GESUMMV.hpp" -#include "polybench/POLYBENCH_HEAT_3D.hpp" -#include "polybench/POLYBENCH_JACOBI_1D.hpp" -#include "polybench/POLYBENCH_JACOBI_2D.hpp" -#include "polybench/POLYBENCH_MVT.hpp" - -// -// Stream kernels... -// -#include "stream/COPY.hpp" -#include "stream/MUL.hpp" -#include "stream/ADD.hpp" -#include "stream/TRIAD.hpp" -#include "stream/DOT.hpp" - -// -// Apps kernels... -// -#include "apps/WIP-COUPLE.hpp" -#include "apps/DEL_DOT_VEC_2D.hpp" -#include "apps/DIFFUSION3DPA.hpp" -#include "apps/ENERGY.hpp" -#include "apps/FIR.hpp" -#include "apps/HALOEXCHANGE.hpp" -#include "apps/HALOEXCHANGE_FUSED.hpp" -#include "apps/LTIMES.hpp" -#include "apps/LTIMES_NOVIEW.hpp" -#include "apps/MASS3DPA.hpp" -#include "apps/PRESSURE.hpp" -#include "apps/VOL3D.hpp" - -// -// Algorithm kernels... -// -#include "algorithm/SORT.hpp" -#include "algorithm/SORTPAIRS.hpp" - +#ifndef RAJAPERF_INFRASTRUCTURE_ONLY +#include "PerfsuiteKernelDefinitions.hpp" +#endif #include -namespace rajaperf -{ +namespace rajaperf { /*! ******************************************************************************* @@ -107,18 +30,18 @@ namespace rajaperf * ******************************************************************************* */ -static const std::string GroupNames [] = -{ - std::string("Basic"), - std::string("Lcals"), - std::string("Polybench"), - std::string("Stream"), - std::string("Apps"), - std::string("Algorithm"), + static const std::string GroupNames[] = + { + std::string("Basic"), + std::string("Lcals"), + std::string("Polybench"), + std::string("Stream"), + std::string("Apps"), + std::string("Algorithm"), - std::string("Unknown Group") // Keep this at the end and DO NOT remove.... + std::string("Unknown Group") // Keep this at the end and DO NOT remove.... -}; // END GroupNames + }; // END GroupNames /*! @@ -133,10 +56,9 @@ static const std::string GroupNames [] = * ******************************************************************************* */ -static const std::string KernelNames [] = -{ + static const std::string KernelNames[] = + { -// // Basic kernels... // std::string("Basic_DAXPY"), @@ -154,7 +76,7 @@ static const std::string KernelNames [] = // // Lcals kernels... -// +//// std::string("Lcals_DIFF_PREDICT"), std::string("Lcals_EOS"), std::string("Lcals_FIRST_DIFF"), @@ -166,10 +88,9 @@ static const std::string KernelNames [] = std::string("Lcals_INT_PREDICT"), std::string("Lcals_PLANCKIAN"), std::string("Lcals_TRIDIAG_ELIM"), - -// -// Polybench kernels... // +//// Polybench kernels... +//// Nota bene: Kokkos variants have not yet been created std::string("Polybench_2MM"), std::string("Polybench_3MM"), std::string("Polybench_ADI"), @@ -183,20 +104,19 @@ static const std::string KernelNames [] = std::string("Polybench_JACOBI_1D"), std::string("Polybench_JACOBI_2D"), std::string("Polybench_MVT"), - -// -// Stream kernels... // +//// +//// Stream kernels... +//// std::string("Stream_ADD"), std::string("Stream_COPY"), std::string("Stream_DOT"), std::string("Stream_MUL"), std::string("Stream_TRIAD"), - // // Apps kernels... // - std::string("Apps_COUPLE"), + //std::string("Apps_COUPLE"), std::string("Apps_DEL_DOT_VEC_2D"), std::string("Apps_DIFFUSION3DPA"), std::string("Apps_ENERGY"), @@ -209,15 +129,14 @@ static const std::string KernelNames [] = std::string("Apps_PRESSURE"), std::string("Apps_VOL3D"), -// // Algorithm kernels... // std::string("Algorithm_SORT"), std::string("Algorithm_SORTPAIRS"), - std::string("Unknown Kernel") // Keep this at the end and DO NOT remove.... + std::string("Unknown Kernel") // Keep this at the end and DO NOT remove.... -}; // END KernelNames + }; // END KernelNames /*! @@ -232,19 +151,19 @@ static const std::string KernelNames [] = * ******************************************************************************* */ -static const std::string VariantNames [] = -{ + static const std::string VariantNames[] = + { - std::string("Base_Seq"), - std::string("Lambda_Seq"), - std::string("RAJA_Seq"), + std::string("Base_Seq"), + std::string("Lambda_Seq"), + std::string("RAJA_Seq"), - std::string("Base_OpenMP"), - std::string("Lambda_OpenMP"), - std::string("RAJA_OpenMP"), + std::string("Base_OpenMP"), + std::string("Lambda_OpenMP"), + std::string("RAJA_OpenMP"), - std::string("Base_OMPTarget"), - std::string("RAJA_OMPTarget"), + std::string("Base_OMPTarget"), + std::string("RAJA_OMPTarget"), std::string("Base_CUDA"), std::string("Lambda_CUDA"), @@ -254,9 +173,12 @@ static const std::string VariantNames [] = std::string("Lambda_HIP"), std::string("RAJA_HIP"), - std::string("Unknown Variant") // Keep this at the end and DO NOT remove.... + std::string("Kokkos_Lambda"), + std::string("Kokkos_Functor"), -}; // END VariantNames + std::string("Unknown Variant") // Keep this at the end and DO NOT remove.... + + }; // END VariantNames /*! @@ -312,12 +234,11 @@ const std::string& getGroupName(GroupID gid) * ******************************************************************************* */ -std::string getKernelName(KernelID kid) -{ - std::string::size_type pos = KernelNames[kid].find("_"); - std::string kname(KernelNames[kid].substr(pos+1, std::string::npos)); - return kname; -} + std::string getKernelName(KernelID kid) { + std::string::size_type pos = KernelNames[kid].find("_"); + std::string kname(KernelNames[kid].substr(pos + 1, std::string::npos)); + return kname; + } /* @@ -327,10 +248,9 @@ std::string getKernelName(KernelID kid) * ******************************************************************************* */ -const std::string& getFullKernelName(KernelID kid) -{ - return KernelNames[kid]; -} + const std::string &getFullKernelName(KernelID kid) { + return KernelNames[kid]; + } /* @@ -340,10 +260,9 @@ const std::string& getFullKernelName(KernelID kid) * ******************************************************************************* */ -const std::string& getVariantName(VariantID vid) -{ - return VariantNames[vid]; -} + const std::string &getVariantName(VariantID vid) { + return VariantNames[vid]; + } /*! ******************************************************************************* @@ -353,33 +272,39 @@ const std::string& getVariantName(VariantID vid) * ******************************************************************************* */ -bool isVariantAvailable(VariantID vid) -{ - bool ret_val = false; + bool isVariantAvailable(VariantID vid) { + bool ret_val = false; - if ( vid == Base_Seq ) { - ret_val = true; - } + if (vid == Base_Seq) { + ret_val = true; + } #if defined(RUN_RAJA_SEQ) - if ( vid == Lambda_Seq || - vid == RAJA_Seq ) { - ret_val = true; - } + if (vid == Lambda_Seq || + vid == RAJA_Seq) { + ret_val = true; + } #endif +#if defined(RUN_KOKKOS) or defined(RAJAPERF_INFRASTRUCTURE_ONLY) + if (vid == Kokkos_Lambda || + vid == Kokkos_Functor) { + ret_val = true; + } +#endif // RUN_KOKKOS + #if defined(RAJA_ENABLE_OPENMP) && defined(RUN_OPENMP) - if ( vid == Base_OpenMP || - vid == Lambda_OpenMP || - vid == RAJA_OpenMP ) { - ret_val = true; - } + if ( vid == Base_OpenMP || + vid == Lambda_OpenMP || + vid == RAJA_OpenMP ) { + ret_val = true; + } #endif #if defined(RAJA_ENABLE_TARGET_OPENMP) - if ( vid == Base_OpenMPTarget || - vid == RAJA_OpenMPTarget ) { - ret_val = true; - } + if ( vid == Base_OpenMPTarget || + vid == RAJA_OpenMPTarget ) { + ret_val = true; + } #endif #if defined(RAJA_ENABLE_CUDA) @@ -398,8 +323,8 @@ bool isVariantAvailable(VariantID vid) } #endif - return ret_val; -} + return ret_val; + } /* ******************************************************************************* @@ -420,264 +345,4 @@ const std::string& getFeatureName(FeatureID fid) * ******************************************************************************* */ -KernelBase* getKernelObject(KernelID kid, - const RunParams& run_params) -{ - KernelBase* kernel = 0; - - switch ( kid ) { - - // - // Basic kernels... - // - case Basic_DAXPY : { - kernel = new basic::DAXPY(run_params); - break; - } - case Basic_IF_QUAD : { - kernel = new basic::IF_QUAD(run_params); - break; - } - case Basic_INIT3 : { - kernel = new basic::INIT3(run_params); - break; - } - case Basic_INIT_VIEW1D : { - kernel = new basic::INIT_VIEW1D(run_params); - break; - } - case Basic_INIT_VIEW1D_OFFSET : { - kernel = new basic::INIT_VIEW1D_OFFSET(run_params); - break; - } - case Basic_MAT_MAT_SHARED : { - kernel = new basic::MAT_MAT_SHARED(run_params); - break; - } - case Basic_MULADDSUB : { - kernel = new basic::MULADDSUB(run_params); - break; - } - case Basic_NESTED_INIT : { - kernel = new basic::NESTED_INIT(run_params); - break; - } - case Basic_PI_ATOMIC : { - kernel = new basic::PI_ATOMIC(run_params); - break; - } - case Basic_PI_REDUCE : { - kernel = new basic::PI_REDUCE(run_params); - break; - } - case Basic_REDUCE3_INT : { - kernel = new basic::REDUCE3_INT(run_params); - break; - } - case Basic_TRAP_INT : { - kernel = new basic::TRAP_INT(run_params); - break; - } - -// -// Lcals kernels... -// - case Lcals_DIFF_PREDICT : { - kernel = new lcals::DIFF_PREDICT(run_params); - break; - } - case Lcals_EOS : { - kernel = new lcals::EOS(run_params); - break; - } - case Lcals_FIRST_DIFF : { - kernel = new lcals::FIRST_DIFF(run_params); - break; - } - case Lcals_FIRST_MIN : { - kernel = new lcals::FIRST_MIN(run_params); - break; - } - case Lcals_FIRST_SUM : { - kernel = new lcals::FIRST_SUM(run_params); - break; - } - case Lcals_GEN_LIN_RECUR : { - kernel = new lcals::GEN_LIN_RECUR(run_params); - break; - } - case Lcals_HYDRO_1D : { - kernel = new lcals::HYDRO_1D(run_params); - break; - } - case Lcals_HYDRO_2D : { - kernel = new lcals::HYDRO_2D(run_params); - break; - } - case Lcals_INT_PREDICT : { - kernel = new lcals::INT_PREDICT(run_params); - break; - } - case Lcals_PLANCKIAN : { - kernel = new lcals::PLANCKIAN(run_params); - break; - } - case Lcals_TRIDIAG_ELIM : { - kernel = new lcals::TRIDIAG_ELIM(run_params); - break; - } - -// -// Polybench kernels... -// - case Polybench_2MM : { - kernel = new polybench::POLYBENCH_2MM(run_params); - break; - } - case Polybench_3MM : { - kernel = new polybench::POLYBENCH_3MM(run_params); - break; - } - case Polybench_ADI : { - kernel = new polybench::POLYBENCH_ADI(run_params); - break; - } - case Polybench_ATAX : { - kernel = new polybench::POLYBENCH_ATAX(run_params); - break; - } - case Polybench_FDTD_2D : { - kernel = new polybench::POLYBENCH_FDTD_2D(run_params); - break; - } - case Polybench_FLOYD_WARSHALL : { - kernel = new polybench::POLYBENCH_FLOYD_WARSHALL(run_params); - break; - } - case Polybench_GEMM : { - kernel = new polybench::POLYBENCH_GEMM(run_params); - break; - } - case Polybench_GEMVER : { - kernel = new polybench::POLYBENCH_GEMVER(run_params); - break; - } - case Polybench_GESUMMV : { - kernel = new polybench::POLYBENCH_GESUMMV(run_params); - break; - } - case Polybench_HEAT_3D : { - kernel = new polybench::POLYBENCH_HEAT_3D(run_params); - break; - } - case Polybench_JACOBI_1D : { - kernel = new polybench::POLYBENCH_JACOBI_1D(run_params); - break; - } - case Polybench_JACOBI_2D : { - kernel = new polybench::POLYBENCH_JACOBI_2D(run_params); - break; - } - case Polybench_MVT : { - kernel = new polybench::POLYBENCH_MVT(run_params); - break; - } - -// -// Stream kernels... -// - case Stream_ADD : { - kernel = new stream::ADD(run_params); - break; - } - case Stream_COPY : { - kernel = new stream::COPY(run_params); - break; - } - case Stream_DOT : { - kernel = new stream::DOT(run_params); - break; - } - case Stream_MUL : { - kernel = new stream::MUL(run_params); - break; - } - case Stream_TRIAD : { - kernel = new stream::TRIAD(run_params); - break; - } - -// -// Apps kernels... -// - case Apps_COUPLE : { - kernel = new apps::COUPLE(run_params); - break; - } - case Apps_DEL_DOT_VEC_2D : { - kernel = new apps::DEL_DOT_VEC_2D(run_params); - break; - } - case Apps_DIFFUSION3DPA : { - kernel = new apps::DIFFUSION3DPA(run_params); - break; - } - case Apps_ENERGY : { - kernel = new apps::ENERGY(run_params); - break; - } - case Apps_FIR : { - kernel = new apps::FIR(run_params); - break; - } - case Apps_HALOEXCHANGE : { - kernel = new apps::HALOEXCHANGE(run_params); - break; - } - case Apps_HALOEXCHANGE_FUSED : { - kernel = new apps::HALOEXCHANGE_FUSED(run_params); - break; - } - case Apps_LTIMES : { - kernel = new apps::LTIMES(run_params); - break; - } - case Apps_LTIMES_NOVIEW : { - kernel = new apps::LTIMES_NOVIEW(run_params); - break; - } - case Apps_MASS3DPA : { - kernel = new apps::MASS3DPA(run_params); - break; - } - case Apps_PRESSURE : { - kernel = new apps::PRESSURE(run_params); - break; - } - case Apps_VOL3D : { - kernel = new apps::VOL3D(run_params); - break; - } - -// -// Algorithm kernels... -// - case Algorithm_SORT: { - kernel = new algorithm::SORT(run_params); - break; - } - case Algorithm_SORTPAIRS: { - kernel = new algorithm::SORTPAIRS(run_params); - break; - } - - default: { - std::cout << "\n Unknown Kernel ID = " << kid << std::endl; - } - - } // end switch on kernel id - - return kernel; -} - } // closing brace for rajaperf namespace diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index ca4f10f1d..c0a47515f 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -6,19 +6,120 @@ // SPDX-License-Identifier: (BSD-3-Clause) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -/// -/// Tyoes and methods for managing Suite kernels, variants, features, etc.. -/// +/// Declare types, methods and namespaces to enable RAJAPerf Suite to handle Kokkos kernels, variants, features, etc. #ifndef RAJAPerfSuite_HPP #define RAJAPerfSuite_HPP +#ifndef RAJAPERF_INFRASTRUCTURE_ONLY #include "RAJA/config.hpp" -#include +#if defined(RUN_KOKKOS) +#include "Kokkos_Core.hpp" +#endif // RUN_KOKKOS +#endif // RAJAPERF_INFRASTRUCTURE_ONLY +#include namespace rajaperf { +class RunParams; +class Executor; +class KernelBase; + +const RunParams& getRunParams(Executor*); +void free_register_group(Executor*, std::string); +void free_register_kernel(Executor*, std::string, KernelBase*); +void make_perfsuite_executor(Executor* exec, int argc, char* argv[]); + +#if defined(RUN_KOKKOS) +template +struct PointerOfNdimensions; + +template +struct PointerOfNdimensions { + using type = PointedAt; +}; + +template +struct PointerOfNdimensions { + using type = + typename PointerOfNdimensions::type *; +}; + +// This templated function is used to wrap pointers +// (declared and defined in RAJAPerf Suite kernels) in Kokkos Views +// +template +auto getViewFromPointer(PointedAt *kokkos_ptr, Boundaries... boundaries) + -> typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultExecutionSpace::memory_space> + +{ + + using host_view_type = typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultHostExecutionSpace::memory_space>; + + using device_view_type = typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultExecutionSpace::memory_space>; + + + using mirror_view_type = typename device_view_type::HostMirror; + + + host_view_type pointer_holder(kokkos_ptr, boundaries...); + + // The boundaries parameter pack contains the array dimenions; + // An allocation is implicitly made here + device_view_type device_data_copy("StringName", boundaries...); + + mirror_view_type cpu_to_gpu_mirror = + Kokkos::create_mirror_view(device_data_copy); + + + Kokkos::deep_copy(cpu_to_gpu_mirror, pointer_holder); + + Kokkos::deep_copy(device_data_copy, cpu_to_gpu_mirror); + + // Kokkos::View return type + + return device_data_copy; +} + +// This function will move data in a Kokkos::View back to host from device, +// and will be stored in the existing pointer(s) +template +void moveDataToHostFromKokkosView(PointedAt *kokkos_ptr, ExistingView my_view, + Boundaries... boundaries) +{ + + using host_view_type = typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultHostExecutionSpace::memory_space>; + + using device_view_type = typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultExecutionSpace::memory_space>; + + using mirror_view_type = typename device_view_type::HostMirror; + + + host_view_type pointer_holder(kokkos_ptr, boundaries...); + + // Layout is optimal for gpu, but data are actually located on CPU + mirror_view_type cpu_to_gpu_mirror = Kokkos::create_mirror_view(my_view); + + // Actual copying of the data from the gpu (my_view) back to the cpu + Kokkos::deep_copy(cpu_to_gpu_mirror, my_view); + + // This copies from the mirror on the host cpu back to the existing + // pointer(s) + Kokkos::deep_copy(pointer_holder, cpu_to_gpu_mirror); +} + +#endif // RUN_KOKKOS class KernelBase; class RunParams; @@ -113,7 +214,7 @@ enum KernelID { Polybench_JACOBI_2D, Polybench_MVT, -// + // Stream kernels... // Stream_ADD, @@ -124,8 +225,7 @@ enum KernelID { // // Apps kernels... -// - Apps_COUPLE, + //Apps_COUPLE, Apps_DEL_DOT_VEC_2D, Apps_DIFFUSION3DPA, Apps_ENERGY, @@ -182,6 +282,9 @@ enum VariantID { Lambda_HIP, RAJA_HIP, + Kokkos_Lambda, + Kokkos_Functor, + NumVariants // Keep this one last and NEVER comment out (!!) }; diff --git a/src/common/RPTypes.hpp b/src/common/RPTypes.hpp index e48ba53c1..7f0388b9e 100644 --- a/src/common/RPTypes.hpp +++ b/src/common/RPTypes.hpp @@ -6,16 +6,16 @@ // SPDX-License-Identifier: (BSD-3-Clause) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -/// -/// Basic data types used in the Suite. +/// Basic data types used in RAJAPerf Suite. /// #ifndef RAJAPerf_RPTypes_HPP #define RAJAPerf_RPTypes_HPP - +// This macro, RAJAPERF_INFRASTRUCTURE_ONLY, is for Kokkos and Kokkos Kernels +#ifndef RAJAPERF_INFRASTRUCTURE_ONLY #include "RAJA/util/types.hpp" +#endif -// // Only one of the following (double or float) should be defined. // #define RP_USE_DOUBLE @@ -56,11 +56,13 @@ using RepIndex_type = volatile int; * ****************************************************************************** */ +#ifndef RAJAPERF_INFRASTRUCTURE_ONLY using Index_type = RAJA::Index_type; -/// +#else +using Index_type = int64_t; +#endif using Index_ptr = Index_type*; - /*! ****************************************************************************** * @@ -113,8 +115,6 @@ using Complex_ptr = Complex_type*; #endif - - } // closing brace for rajaperf namespace #endif // closing endif for header file include guard diff --git a/src/common/RunParams.cpp b/src/common/RunParams.cpp index e038863c1..fe41ffdc9 100644 --- a/src/common/RunParams.cpp +++ b/src/common/RunParams.cpp @@ -52,6 +52,7 @@ RunParams::RunParams(int argc, char** argv) outfile_prefix("RAJAPerf") { parseCommandLineOptions(argc, argv); + } @@ -589,12 +590,7 @@ void RunParams::printKernelNames(std::ostream& str) const { str << "\nAvailable kernels:"; str << "\n------------------\n"; - for (int kid = 0; kid < NumKernels; ++kid) { -/// RDH DISABLE COUPLE KERNEL - if (static_cast(kid) != Apps_COUPLE) { - str << getKernelName(static_cast(kid)) << std::endl; - } - } + str.flush(); } @@ -603,12 +599,7 @@ void RunParams::printFullKernelNames(std::ostream& str) const { str << "\nAvailable kernels (_):"; str << "\n-----------------------------------------\n"; - for (int kid = 0; kid < NumKernels; ++kid) { -/// RDH DISABLE COUPLE KERNEL - if (static_cast(kid) != Apps_COUPLE) { - str << getFullKernelName(static_cast(kid)) << std::endl; - } - } + str.flush(); } @@ -651,24 +642,16 @@ void RunParams::printFeatureKernels(std::ostream& str) const for (int fid = 0; fid < NumFeatures; ++fid) { FeatureID tfid = static_cast(fid); str << getFeatureName(tfid) << std::endl; - for (int kid = 0; kid < NumKernels; ++kid) { - KernelID tkid = static_cast(kid); -/// RDH DISABLE COUPLE KERNEL - if (tkid != Apps_COUPLE) { - KernelBase* kern = getKernelObject(tkid, *this); - if ( kern->usesFeature(tfid) ) { - str << "\t" << getFullKernelName(tkid) << std::endl; - } - delete kern; - } - } // loop over kernels + str << std::endl; } // loop over features str.flush(); } - +// TODO for Kokkos Team: Commenting function body, because this infrastructure +// has not yet been integrated with Kokkos testing infrastructure void RunParams::printKernelFeatures(std::ostream& str) const { +/* str << "\nAvailable kernels and features each uses:"; str << "\n-----------------------------------------\n"; for (int kid = 0; kid < NumKernels; ++kid) { @@ -687,6 +670,8 @@ void RunParams::printKernelFeatures(std::ostream& str) const } } // loop over kernels str.flush(); + +*/ } } // closing brace for rajaperf namespace diff --git a/src/kokkos-mechanics/CMakeLists.txt b/src/kokkos-mechanics/CMakeLists.txt new file mode 100644 index 000000000..4cb068473 --- /dev/null +++ b/src/kokkos-mechanics/CMakeLists.txt @@ -0,0 +1,20 @@ +############################################################################### +# Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +# and RAJA Performance Suite project contributors. +# See the RAJAPerf/COPYRIGHT file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +blt_add_library( + NAME kokkos-mechanics + SOURCES ViewAllocate.cpp + ViewAllocate-Stubs.cpp + ViewAllocate-KokkosSeq.cpp + ViewAllocate-KokkosCuda.cpp + ViewStreamAdd.cpp + ViewStreamAdd-Stubs.cpp + ViewStreamAdd-KokkosSeq.cpp + ViewStreamAdd-KokkosCuda.cpp + DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} + ) diff --git a/src/kokkos-mechanics/ViewAllocate-KokkosCuda.cpp b/src/kokkos-mechanics/ViewAllocate-KokkosCuda.cpp new file mode 100644 index 000000000..1575b8dc3 --- /dev/null +++ b/src/kokkos-mechanics/ViewAllocate-KokkosCuda.cpp @@ -0,0 +1,72 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "ViewAllocate.hpp" + +#include "RAJA/RAJA.hpp" +#if defined (RAJA_ENABLE_CUDA) + +#include + +namespace rajaperf +{ +namespace kokkos_mechanics +{ + + +// Kokkos-ify here + +void ViewAllocate::runKokkosCudaVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type data_size = getRunSize(); + + + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + // AJP added (following DAXPY example) -- + +//#if defined(RUN_KOKKOS) +//#if defined(RUN_OPENMP) + + + + case Kokkos_Lambda_CUDA : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // Test Device case / GPU + Kokkos::View + kk_view("kk_view", data_size); + + } + stopTimer(); + + break; + } + + default : { + std::cout << "\n ViewAllocate : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + + + +} + +} // end namespace basic +} // end namespace rajaperf +#endif // RAJA_ENABLE_CUDA diff --git a/src/kokkos-mechanics/ViewAllocate-KokkosSeq.cpp b/src/kokkos-mechanics/ViewAllocate-KokkosSeq.cpp new file mode 100644 index 000000000..4795c58d1 --- /dev/null +++ b/src/kokkos-mechanics/ViewAllocate-KokkosSeq.cpp @@ -0,0 +1,76 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "ViewAllocate.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace kokkos_mechanics +{ + + +// Kokkos-ify here +//void ViewAllocate::runSeqVariant(VariantID vid) + +void ViewAllocate::runKokkosSeqVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type data_size = getRunSize(); + + + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + // AJP added (following DAXPY example) -- + +//#if defined(RUN_KOKKOS) +//#if defined(RUN_OPENMP) + + +#if defined(RUN_RAJA_SEQ) + + case Kokkos_Lambda_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +/* RAJA::forall( + RAJA::RangeSegment(ibegin, iend), ifquad_lam); +*/ + // Test host case / CPU + Kokkos::View + kk_view("kk_view", data_size); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_SEQ + + default : { + std::cout << "\n ViewAllocate : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + + + +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/kokkos-mechanics/ViewAllocate-Stubs.cpp b/src/kokkos-mechanics/ViewAllocate-Stubs.cpp new file mode 100644 index 000000000..dd4c9325f --- /dev/null +++ b/src/kokkos-mechanics/ViewAllocate-Stubs.cpp @@ -0,0 +1,30 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "ViewAllocate.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace kokkos_mechanics +{ + +void ViewAllocate::runSeqVariant(VariantID vid) +{ +} + +void ViewAllocate::runOpenMPVariant(VariantID vid) {} +void ViewAllocate::runCudaVariant(VariantID vid) {} +void ViewAllocate::runHipVariant(VariantID vid) {} +void ViewAllocate::runOpenMPTargetVariant(VariantID vid){} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/kokkos-mechanics/ViewAllocate.cpp b/src/kokkos-mechanics/ViewAllocate.cpp new file mode 100644 index 000000000..b9013a882 --- /dev/null +++ b/src/kokkos-mechanics/ViewAllocate.cpp @@ -0,0 +1,52 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "ViewAllocate.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/DataUtils.hpp" + +namespace rajaperf +{ +namespace kokkos_mechanics +{ + +// Syntax for C++ constructor +ViewAllocate::ViewAllocate(const RunParams& params) + : KernelBase(rajaperf::KokkosMechanics_ViewAllocate, params) +{ + setDefaultSize(100000); + setDefaultReps(5000); + + setVariantDefined( Kokkos_Lambda_Seq); + setVariantDefined( Kokkos_Lambda_OpenMP); + setVariantDefined( Kokkos_Lambda_OpenMPTarget); + setVariantDefined( Kokkos_Lambda_CUDA); +} +//Defining the destructor (for the struct) +ViewAllocate::~ViewAllocate() +{ +} + +void ViewAllocate::setUp(VariantID vid) +{ +} + +void ViewAllocate::updateChecksum(VariantID vid) +{ +// checksum[vid] += calcChecksum(m_y, getRunSize()); +} + +void ViewAllocate::tearDown(VariantID vid) +{ + (void) vid; +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/kokkos-mechanics/ViewAllocate.hpp b/src/kokkos-mechanics/ViewAllocate.hpp new file mode 100644 index 000000000..3e8d45ff7 --- /dev/null +++ b/src/kokkos-mechanics/ViewAllocate.hpp @@ -0,0 +1,56 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +/// +/// ViewAllocate kernel reference implementation: +/// +/// for (Index_type i = ibegin; i < iend; ++i ) { +/// y[i] += a * x[i] ; +/// } +/// + +#ifndef RAJAPerf_Basic_ViewAllocate_HPP +#define RAJAPerf_Basic_ViewAllocate_HPP + +#include "common/KernelBase.hpp" + +namespace rajaperf +{ +class RunParams; + +namespace kokkos_mechanics +{ + +class ViewAllocate : public KernelBase +{ +public: + + ViewAllocate(const RunParams& params); + + ~ViewAllocate(); + + void setUp(VariantID vid); + void updateChecksum(VariantID vid); + void tearDown(VariantID vid); + + void runSeqVariant(VariantID vid); + void runOpenMPVariant(VariantID vid); + void runCudaVariant(VariantID vid); + void runHipVariant(VariantID vid); + void runOpenMPTargetVariant(VariantID vid); + + void runKokkosSeqVariant(VariantID vid); + void runKokkosOpenMPVariant(VariantID vid); + void runKokkosCudaVariant(VariantID vid); + void runKokkosOpenMPTargetVariant(VariantID vid); +}; + +} // end namespace basic +} // end namespace rajaperf + +#endif // closing endif for header file include guard diff --git a/src/kokkos-mechanics/ViewStreamAdd-KokkosCuda.cpp b/src/kokkos-mechanics/ViewStreamAdd-KokkosCuda.cpp new file mode 100644 index 000000000..d3ad7ea5d --- /dev/null +++ b/src/kokkos-mechanics/ViewStreamAdd-KokkosCuda.cpp @@ -0,0 +1,82 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "ViewStreamAdd.hpp" + +#include "RAJA/RAJA.hpp" +#if defined (RAJA_ENABLE_CUDA) + +#include + +namespace rajaperf +{ +namespace kokkos_mechanics +{ + + +// Kokkos-ify here + +void ViewStreamAdd::runKokkosCudaVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type data_size = getRunSize(); + + + +#if defined(RUN_KOKKOS) + + + Kokkos::View d_a("device_a",getRunSize()); + Kokkos::View d_b("device_b",getRunSize()); + Kokkos::View d_c("device_c",getRunSize()); + + Kokkos::deep_copy(d_a,h_a); + Kokkos::deep_copy(d_b,h_b); + Kokkos::deep_copy(d_c,h_c); + + switch ( vid ) { + + // AJP added (following DAXPY example) -- + +//#if defined(RUN_KOKKOS) +//#if defined(RUN_OPENMP) + + + + case Kokkos_Lambda_CUDA : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // Test Device case / GPU + Kokkos::parallel_for("perfsuite.kokkos_mechanics.view_stream_add.cuda.lambda",Kokkos::RangePolicy(0,data_size), [=] __device__ (int i) { + d_c[i] = d_a[i] + d_b[i]; + }); + + } + stopTimer(); + + break; + } + + default : { + std::cout << "\n ViewStreamAdd : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + + + +} + +} // end namespace basic +} // end namespace rajaperf +#endif // RAJA_ENABLE_CUDA diff --git a/src/kokkos-mechanics/ViewStreamAdd-KokkosSeq.cpp b/src/kokkos-mechanics/ViewStreamAdd-KokkosSeq.cpp new file mode 100644 index 000000000..029486f73 --- /dev/null +++ b/src/kokkos-mechanics/ViewStreamAdd-KokkosSeq.cpp @@ -0,0 +1,77 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "ViewStreamAdd.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace kokkos_mechanics +{ + + +// Kokkos-ify here +//void ViewStreamAdd::runSeqVariant(VariantID vid) + +void ViewStreamAdd::runKokkosSeqVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type data_size = getRunSize(); + + + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + // AJP added (following DAXPY example) -- + +//#if defined(RUN_KOKKOS) +//#if defined(RUN_OPENMP) + + +#if defined(RUN_RAJA_SEQ) + + case Kokkos_Lambda_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +/* RAJA::forall( + RAJA::RangeSegment(ibegin, iend), ifquad_lam); +*/ + // Test host case / CPU + Kokkos::parallel_for("perfsuite.kokkos_mechanics.view_stream_add.seq.lambda",Kokkos::RangePolicy(0,data_size), [=](int i) { + h_c[i] = h_a[i] + h_b[i]; + }); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_SEQ + + default : { + std::cout << "\n ViewStreamAdd : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + + + +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/kokkos-mechanics/ViewStreamAdd-Stubs.cpp b/src/kokkos-mechanics/ViewStreamAdd-Stubs.cpp new file mode 100644 index 000000000..c43d14ad4 --- /dev/null +++ b/src/kokkos-mechanics/ViewStreamAdd-Stubs.cpp @@ -0,0 +1,30 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "ViewStreamAdd.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace kokkos_mechanics +{ + +void ViewStreamAdd::runSeqVariant(VariantID vid) +{ +} + +void ViewStreamAdd::runOpenMPVariant(VariantID vid) {} +void ViewStreamAdd::runCudaVariant(VariantID vid) {} +void ViewStreamAdd::runHipVariant(VariantID vid) {} +void ViewStreamAdd::runOpenMPTargetVariant(VariantID vid){} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/kokkos-mechanics/ViewStreamAdd.cpp b/src/kokkos-mechanics/ViewStreamAdd.cpp new file mode 100644 index 000000000..c48a1c4fc --- /dev/null +++ b/src/kokkos-mechanics/ViewStreamAdd.cpp @@ -0,0 +1,57 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "ViewStreamAdd.hpp" + +#include "RAJA/RAJA.hpp" + +#include "common/DataUtils.hpp" + +namespace rajaperf +{ +namespace kokkos_mechanics +{ + +// Syntax for C++ constructor +ViewStreamAdd::ViewStreamAdd(const RunParams& params) + : KernelBase(rajaperf::KokkosMechanics_ViewStreamAdd, params) +{ + setDefaultSize(100000); + setDefaultReps(5000); + + setVariantDefined( Kokkos_Lambda_Seq); + setVariantDefined( Kokkos_Lambda_OpenMP); + setVariantDefined( Kokkos_Lambda_OpenMPTarget); + setVariantDefined( Kokkos_Lambda_CUDA); +} +//Defining the destructor (for the struct) +ViewStreamAdd::~ViewStreamAdd() +{ +} + +void ViewStreamAdd::setUp(VariantID vid) +{ + h_a = VT("host_a",getRunSize()); + h_b = VT("host_b",getRunSize()); + h_c = VT("host_c",getRunSize()); + Kokkos::deep_copy(h_a,1.0f); + Kokkos::deep_copy(h_b,2.0f); +} + +void ViewStreamAdd::updateChecksum(VariantID vid) +{ +// checksum[vid] += calcChecksum(m_y, getRunSize()); +} + +void ViewStreamAdd::tearDown(VariantID vid) +{ + (void) vid; +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/kokkos-mechanics/ViewStreamAdd.hpp b/src/kokkos-mechanics/ViewStreamAdd.hpp new file mode 100644 index 000000000..62861a8f4 --- /dev/null +++ b/src/kokkos-mechanics/ViewStreamAdd.hpp @@ -0,0 +1,75 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +/// +/// ViewStreamAdd kernel reference implementation: +/// +/// for (Index_type i = ibegin; i < iend; ++i ) { +/// y[i] += a * x[i] ; +/// } +/// + +#ifndef RAJAPerf_Basic_ViewStreamAdd_HPP +#define RAJAPerf_Basic_ViewStreamAdd_HPP + +#define ViewStreamAdd_DATA_SETUP \ + Real_ptr x = m_x; \ + Real_ptr y = m_y; \ + Real_type a = m_a; + +#define ViewStreamAdd_FUNCTOR_CONSTRUCT \ + x(m_x),\ + y(m_y), \ + a(m_a) + +#define ViewStreamAdd_BODY \ + y[i] += a * x[i] ; + + +#include "common/KernelBase.hpp" + +namespace rajaperf +{ +class RunParams; + +namespace kokkos_mechanics +{ + +class ViewStreamAdd : public KernelBase +{ +public: + + ViewStreamAdd(const RunParams& params); + + ~ViewStreamAdd(); + + void setUp(VariantID vid); + void updateChecksum(VariantID vid); + void tearDown(VariantID vid); + + void runSeqVariant(VariantID vid); + void runOpenMPVariant(VariantID vid); + void runCudaVariant(VariantID vid); + void runHipVariant(VariantID vid); + void runOpenMPTargetVariant(VariantID vid); + + void runKokkosSeqVariant(VariantID vid); + void runKokkosOpenMPVariant(VariantID vid); + void runKokkosCudaVariant(VariantID vid); + void runKokkosOpenMPTargetVariant(VariantID vid); +private: + using VT=Kokkos::View; + VT h_a; + VT h_b; + VT h_c; +}; + +} // end namespace basic +} // end namespace rajaperf + +#endif // closing endif for header file include guard diff --git a/src/lcals-kokkos/CMakeLists.txt b/src/lcals-kokkos/CMakeLists.txt new file mode 100644 index 000000000..b032d67e9 --- /dev/null +++ b/src/lcals-kokkos/CMakeLists.txt @@ -0,0 +1,26 @@ +############################################################################### +# Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +# and RAJA Performance Suite project contributors. +# See the RAJAPerf/COPYRIGHT file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + + +include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/../lcals) + +blt_add_library( + NAME lcals-kokkos + SOURCES DIFF_PREDICT-Kokkos.cpp + EOS-Kokkos.cpp + FIRST_DIFF-Kokkos.cpp + FIRST_MIN-Kokkos.cpp + FIRST_SUM-Kokkos.cpp + GEN_LIN_RECUR-Kokkos.cpp + HYDRO_1D-Kokkos.cpp + HYDRO_2D-Kokkos.cpp + INT_PREDICT-Kokkos.cpp + PLANCKIAN-Kokkos.cpp + TRIDIAG_ELIM-Kokkos.cpp + DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} + ) diff --git a/src/lcals-kokkos/DIFF_PREDICT-Kokkos.cpp b/src/lcals-kokkos/DIFF_PREDICT-Kokkos.cpp new file mode 100644 index 000000000..76753574e --- /dev/null +++ b/src/lcals-kokkos/DIFF_PREDICT-Kokkos.cpp @@ -0,0 +1,104 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DIFF_PREDICT.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace lcals +{ + +template +void diff_predict_helper(Index_type run_reps, + Index_type ibegin, + Index_type iend, + Index_type offset, + // a Kokkos View + px_type& px, + // a Kokkos View + cx_type& cx){ + + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for("DIFF_PREDICT_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + DIFF_PREDICT_BODY + }); + + } +} + + + +void DIFF_PREDICT::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + + DIFF_PREDICT_DATA_SETUP; + + + // Instiating KokkosViews using getViewFromPointer; + // Wrapping pointers in KokkosViews + + // You need to know the actual array size here to catch errors; + + auto px_view = getViewFromPointer(px, iend*14); + auto cx_view = getViewFromPointer(cx, iend*14); + + + auto diffpredict_lam = [=](Index_type i) { + DIFF_PREDICT_BODY; + }; + + #if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + Kokkos::fence(); + startTimer(); + + diff_predict_helper(run_reps, + ibegin, + iend, + offset, + px_view, + cx_view); + + + Kokkos::fence(); + stopTimer(); + break; + + } + + + default : { + std::cout << "\n DIFF_PREDICT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(px, px_view, iend*14); + moveDataToHostFromKokkosView(cx, cx_view, iend*14); + +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals-kokkos/EOS-Kokkos.cpp b/src/lcals-kokkos/EOS-Kokkos.cpp new file mode 100644 index 000000000..0bc5cd1bc --- /dev/null +++ b/src/lcals-kokkos/EOS-Kokkos.cpp @@ -0,0 +1,95 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "EOS.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void EOS::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + EOS_DATA_SETUP; + + auto x_view = getViewFromPointer(x, iend + 7); + auto y_view = getViewFromPointer(y, iend + 7); + auto z_view = getViewFromPointer(z, iend + 7); + auto u_view = getViewFromPointer(u, iend + 7); + + + auto eos_lam = [=](Index_type i) { + EOS_BODY; + }; + + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + Kokkos::parallel_for("EOS_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + /* + #define EOS_BODY \ + x[i] = u[i] + r*( z[i] + r*y[i] ) + \ + t*( u[i+3] + r*( u[i+2] + r*u[i+1] ) + \ + t*( u[i+6] + q*( u[i+5] + q*u[i+4] ) ) ); + */ + // Declare variables need in the function + // body + //const Real_type q; + //const Real_type r; + //const Real_type t; + + x_view[i] = u_view[i] + r*( z_view[i] + r*y_view[i] ) + \ + t*( u_view[i+3] + r*( u_view[i+2] + r*u_view[i+1] ) + \ + t*( u_view[i+6] + q*( u_view[i+5] + q*u_view[i+4] ) ) ); + }); + + } + Kokkos::fence(); + stopTimer(); + + break; + } + + + + default : { + std::cout << "\n EOS : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(x, x_view, iend + 7); + moveDataToHostFromKokkosView(y, y_view, iend + 7); + moveDataToHostFromKokkosView(z, z_view, iend + 7); + moveDataToHostFromKokkosView(u, u_view, iend + 7); + + +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals-kokkos/FIRST_DIFF-Kokkos.cpp b/src/lcals-kokkos/FIRST_DIFF-Kokkos.cpp new file mode 100644 index 000000000..fcd641f16 --- /dev/null +++ b/src/lcals-kokkos/FIRST_DIFF-Kokkos.cpp @@ -0,0 +1,90 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "FIRST_DIFF.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void FIRST_DIFF::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + FIRST_DIFF_DATA_SETUP; + +// From FIRST_DIFF.hpp +/* +#define FIRST_DIFF_DATA_SETUP \ + Real_ptr x = m_x; \ + Real_ptr y = m_y; + +*/ + // lcals = livermore compiler analysis loops suite + // Instiating KokkosViews using getViewFromPointer; + // Wrapping pointers in KokkosViews + +// attn: look at the definition in setup in FIRST_DIFF.cpp: + auto x_view = getViewFromPointer(x, iend + 1); + auto y_view = getViewFromPointer(y, iend + 1); + + auto firstdiff_lam = [=](Index_type i) { + FIRST_DIFF_BODY; + }; + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + Kokkos::parallel_for("FIRST_DIFF_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + /* #define FIRST_DIFF_BODY \ + x[i] = y[i+1] - y[i]; + */ + x_view[i] = y_view[i + 1] - y_view[i]; + }); + + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default : { + std::cout << "\n FIRST_DIFF : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + // ATTN: View dimensions must match array dimensions! + moveDataToHostFromKokkosView(x, x_view, iend + 1); + moveDataToHostFromKokkosView(y, y_view, iend + 1); + +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals-kokkos/FIRST_MIN-Kokkos.cpp b/src/lcals-kokkos/FIRST_MIN-Kokkos.cpp new file mode 100644 index 000000000..04d1f18b5 --- /dev/null +++ b/src/lcals-kokkos/FIRST_MIN-Kokkos.cpp @@ -0,0 +1,100 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "FIRST_MIN.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void FIRST_MIN::runKokkosVariant(VariantID vid) + +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + FIRST_MIN_DATA_SETUP; + +// #define FIRST_MIN_DATA_SETUP \ +// Real_ptr x = m_x; + + auto x_view = getViewFromPointer(x, iend); + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + +// https://github.com/kokkos/kokkos/wiki/Kokkos::MinLoc +// MinLoc::value_type result; +// parallel_reduce(N,Functor,MinLoc(result)); + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // The 3rd template argument is the memory space in which the + // result will be stored; the result will be in the place the + // kernel is called from , i.e., the Host + using reducer_type = Kokkos::MinLoc; + // must hold the value and the location; + // Create a variable to hold the result from parallel_reduce + reducer_type::value_type min_result_obj; + + Kokkos::parallel_reduce("FIRST_MIN_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i, reducer_type::value_type& mymin) { + + // #define FIRST_MIN_BODY + // if ( x[i] < mymin.val ) { + // mymin.val = x[i]; + // mymin.loc = i; + // } + + if (x_view[i] < mymin.val) { + mymin.val = x_view[i]; + mymin.loc = i; + } + + // Kokkos can handle a MinLoc type + }, reducer_type(min_result_obj)); + + + // Kokkos translation of line below + // m_minloc = RAJA_MAX(m_minloc, loc.getLoc()); + m_minloc = min_result_obj.loc; + + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default : { + std::cout << "\n FIRST_MIN : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(x, x_view, iend); +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals-kokkos/FIRST_SUM-Kokkos.cpp b/src/lcals-kokkos/FIRST_SUM-Kokkos.cpp new file mode 100644 index 000000000..77ebeefcb --- /dev/null +++ b/src/lcals-kokkos/FIRST_SUM-Kokkos.cpp @@ -0,0 +1,79 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "FIRST_SUM.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void FIRST_SUM::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 1; + const Index_type iend = getActualProblemSize(); + + FIRST_SUM_DATA_SETUP; + + // wrap pointers in Kokkos Views + auto x_view = getViewFromPointer(x, iend); + auto y_view = getViewFromPointer(y, iend); + + auto firstsum_lam = [=](Index_type i) { + FIRST_SUM_BODY; + }; + + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for("FIRST_SUM_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + //#define FIRST_SUM_BODY + //x[i] = y[i-1] + y[i]; + x_view[i] = y_view[i - 1] + y_view[i]; + }); + + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + + default : { + std::cout << "\n FIRST_SUM : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(x, x_view, iend); + moveDataToHostFromKokkosView(y, y_view, iend); + +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals-kokkos/GEN_LIN_RECUR-Kokkos.cpp b/src/lcals-kokkos/GEN_LIN_RECUR-Kokkos.cpp new file mode 100644 index 000000000..ed4806972 --- /dev/null +++ b/src/lcals-kokkos/GEN_LIN_RECUR-Kokkos.cpp @@ -0,0 +1,118 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "GEN_LIN_RECUR.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void GEN_LIN_RECUR::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 1; + const Index_type iend = getActualProblemSize(); + + GEN_LIN_RECUR_DATA_SETUP; + +// wrap pointers in Kokkos Views + + auto b5_view = getViewFromPointer(b5, iend); + auto sa_view = getViewFromPointer(sa, iend); + auto sb_view = getViewFromPointer(sb, iend); + auto stb5_view = getViewFromPointer(stb5, iend); + +// RAJAPerf Suite Lambdas + + auto genlinrecur_lam1 = [=](Index_type k) { + GEN_LIN_RECUR_BODY1; + }; + auto genlinrecur_lam2 = [=](Index_type i) { + GEN_LIN_RECUR_BODY2; + }; + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // NOTA BENE: + // Index_type kb5i = m_kb5i; + // Index_type N = m_N; + + Kokkos::parallel_for("GEN_LIN_RECUR_Kokkos Kokkos Lambda -- BODY1", + // Here, RAJAPerf Suite (RPS) indices are (0, N) for BODY1 + Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA(Index_type k) { + /* + * #define GEN_LIN_RECUR_BODY1 + * b5[k+kb5i] = sa[k] + stb5[k]*sb[k]; + * stb5[k] = b5[k+kb5i] - stb5[k]; + * */ + b5_view[k+kb5i] = sa_view[k] + stb5_view[k]*sb_view[k]; + stb5_view[k] = b5_view[k+kb5i] - stb5_view[k]; + }); + + + + Kokkos::parallel_for("GEN_LIN_RECUR_Kokkos Kokkos Lambda -- BODY2", + // ATTN: you must adjust indices to align with + // RPS design intent here; + // RPS indices are (1, N+1) for BODY2 + Kokkos::RangePolicy(1, N+1), + KOKKOS_LAMBDA(Index_type i) { + /* + #define GEN_LIN_RECUR_BODY2 \ + Index_type k = N - i ; \ + b5[k+kb5i] = sa[k] + stb5[k]*sb[k]; \ + stb5[k] = b5[k+kb5i] - stb5[k]; + */ + Index_type k = N - i ; + + b5_view[k+kb5i] = sa_view[k] + stb5_view[k]*sb_view[k]; + stb5_view[k] = b5_view[k+kb5i] - stb5_view[k]; + + }); + + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default : { + std::cout << "\n GEN_LIN_RECUR : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(b5, b5_view, iend); + moveDataToHostFromKokkosView(sa, sa_view, iend); + moveDataToHostFromKokkosView(sb, sb_view, iend); + moveDataToHostFromKokkosView(stb5, stb5_view, iend); + +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals-kokkos/HYDRO_1D-Kokkos.cpp b/src/lcals-kokkos/HYDRO_1D-Kokkos.cpp new file mode 100644 index 000000000..27a8d4f12 --- /dev/null +++ b/src/lcals-kokkos/HYDRO_1D-Kokkos.cpp @@ -0,0 +1,94 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "HYDRO_1D.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void HYDRO_1D::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + HYDRO_1D_DATA_SETUP; + + // Wrap pointers in Kokkos Views + /* + * #define HYDRO_1D_DATA_SETUP \ + Real_ptr x = m_x; \ + Real_ptr y = m_y; \ + Real_ptr z = m_z; + */ + + auto x_view = getViewFromPointer(x, iend + 12); + auto y_view = getViewFromPointer(y, iend + 12); + auto z_view = getViewFromPointer(z, iend + 12); + + + auto hydro1d_lam = [=](Index_type i) { + HYDRO_1D_BODY; + }; + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for("HYDRO_1D_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + // #define HYDRO_1D_BODY + // x[i] = q + y[i]*( r*z[i+10] + t*z[i+11] ); + KOKKOS_LAMBDA(Index_type i) { + x_view[i] = q + y_view[i]*( r*z_view[i+10] + t*z_view[i+11] ); + }); + + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + + default : { + std::cout << "\n HYDRO_1D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + // ATTN: Adjust array dimensions to be congruent with the setup + // in the .cpp file: + // m_array_length = getActualProblemSize() + 12; + + + moveDataToHostFromKokkosView(x, x_view, iend + 12); + moveDataToHostFromKokkosView(y, y_view, iend + 12); + moveDataToHostFromKokkosView(z, z_view, iend + 12); + +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals-kokkos/HYDRO_2D-Kokkos.cpp b/src/lcals-kokkos/HYDRO_2D-Kokkos.cpp new file mode 100644 index 000000000..7f7aada10 --- /dev/null +++ b/src/lcals-kokkos/HYDRO_2D-Kokkos.cpp @@ -0,0 +1,196 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "HYDRO_2D.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void HYDRO_2D::runKokkosVariant(VariantID vid) +{ + + const Index_type run_reps = getRunReps(); + const Index_type kbeg = 1; + const Index_type kend = m_kn - 1; + const Index_type jbeg = 1; + const Index_type jend = m_jn - 1; + + HYDRO_2D_DATA_SETUP; + + // Wrap input pointers in Kokkos::Views +/* +#define HYDRO_2D_DATA_SETUP \ + Real_ptr zadat = m_za; \ + Real_ptr zbdat = m_zb; \ + Real_ptr zmdat = m_zm; \ + Real_ptr zpdat = m_zp; \ + Real_ptr zqdat = m_zq; \ + Real_ptr zrdat = m_zr; \ + Real_ptr zudat = m_zu; \ + Real_ptr zvdat = m_zv; \ + Real_ptr zzdat = m_zz; \ +\ + Real_ptr zroutdat = m_zrout; \ + Real_ptr zzoutdat = m_zzout; \ +\ + +*/ +// ATTN: THESE INPUTS ARE 2D Views: +// + auto zadat_view = getViewFromPointer(zadat, kn, jn ); + auto zbdat_view = getViewFromPointer(zbdat, kn, jn ); + auto zmdat_view = getViewFromPointer(zmdat, kn, jn ); + auto zpdat_view = getViewFromPointer(zpdat, kn, jn ); + auto zqdat_view = getViewFromPointer(zqdat, kn, jn ); + auto zrdat_view = getViewFromPointer(zrdat, kn, jn ); + auto zudat_view = getViewFromPointer(zudat, kn, jn ); + auto zvdat_view = getViewFromPointer(zvdat, kn, jn ); + auto zzdat_view = getViewFromPointer(zzdat, kn, jn ); + + // Wrap output pointers into Kokkos::Views + + auto zroutdat_view = getViewFromPointer(zroutdat, kn, jn ); + auto zzoutdat_view = getViewFromPointer(zzoutdat, kn, jn ); + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + + // Use MDRangePolicy for multidimensional arrays + // https://github.com/kokkos/kokkos/wiki/Kokkos::MDRangePolicy + + Kokkos::parallel_for("HYDRO_2D_Kokkos Kokkos_Lambda--BODY1", + Kokkos::MDRangePolicy>({kbeg,jbeg}, {kend,jend}), + KOKKOS_LAMBDA(int64_t k, int64_t j) { + /* + #define HYDRO_2D_BODY1_RAJA \ + za(k,j) = ( zp(k+1,j-1) + zq(k+1,j-1) - zp(k,j-1) - zq(k,j-1) ) * \ + ( zr(k,j) + zr(k,j-1) ) / ( zm(k,j-1) + zm(k+1,j-1) ); \ + zb(k,j) = ( zp(k,j-1) + zq(k,j-1) - zp(k,j) - zq(k,j) ) * \ + ( zr(k,j) + zr(k-1,j) ) / ( zm(k,j) + zm(k,j-1)); + */ + zadat_view(k,j) = ( zpdat_view(k+1,j-1) + zqdat_view(k+1,j-1) - zpdat_view(k,j-1) - zqdat_view(k,j-1) ) * \ + ( zrdat_view(k,j) + zrdat_view(k,j-1) ) / ( zmdat_view(k,j-1) + zmdat_view(k+1,j-1) ); \ + + zbdat_view(k,j) = ( zpdat_view(k,j-1) + zqdat_view(k,j-1) - zpdat_view(k,j) - zqdat_view(k,j) ) * \ + ( zrdat_view(k,j) + zrdat_view(k-1,j) ) / ( zmdat_view(k,j) + zmdat_view(k,j-1)); + }); + + + Kokkos::parallel_for("HYDRO_2D_Kokkos Kokkos_Lambda--BODY2", + Kokkos::MDRangePolicy>({kbeg,jbeg}, {kend,jend}), + KOKKOS_LAMBDA(int64_t k, int64_t j) { + + /* + #define HYDRO_2D_BODY2_RAJA \ + zu(k,j) += s*( za(k,j) * ( zz(k,j) - zz(k,j+1) ) - \ + za(k,j-1) * ( zz(k,j) - zz(k,j-1) ) - \ + zb(k,j) * ( zz(k,j) - zz(k-1,j) ) + \ + zb(k+1,j) * ( zz(k,j) - zz(k+1,j) ) ); \ + zv(k,j) += s*( za(k,j) * ( zr(k,j) - zr(k,j+1) ) - \ + za(k,j-1) * ( zr(k,j) - zr(k,j-1) ) - \ + zb(k,j) * ( zr(k,j) - zr(k-1,j) ) + \ + zb(k+1,j) * ( zr(k,j) - zr(k+1,j) ) ); + */ + + zudat_view(k,j) += s*( zadat_view(k,j) * ( zzdat_view(k,j) - zzdat_view(k,j+1) ) - \ + zadat_view(k,j-1) * (zzdat_view(k,j) - zzdat_view(k,j-1) ) - \ + zbdat_view(k,j) * ( zzdat_view(k,j) - zzdat_view(k-1,j) ) + \ + zbdat_view(k+1,j) * ( zzdat_view(k,j) - zzdat_view(k+1,j) ) ); \ + zvdat_view(k,j) += s*( zadat_view(k,j) * ( zrdat_view(k,j) - zrdat_view(k,j+1) ) - \ + zadat_view(k,j-1) * ( zrdat_view(k,j) - zrdat_view(k,j-1) ) - \ + zbdat_view(k,j) * ( zrdat_view(k,j) - zrdat_view(k-1,j) ) + \ + zbdat_view(k+1,j) * ( zrdat_view(k,j) - zrdat_view(k+1,j) ) ); + + }); + + + Kokkos::parallel_for("HYDRO_2D_Kokkos Kokkos_Lambda--BODY3", + Kokkos::MDRangePolicy>({kbeg,jbeg}, {kend,jend}), + KOKKOS_LAMBDA(int64_t k, int64_t j) { + /* + #define HYDRO_2D_BODY3_RAJA \ + zrout(k,j) = zr(k,j) + t*zu(k,j); \ + zzout(k,j) = zz(k,j) + t*zv(k,j); + */ + + zroutdat_view(k,j) = zrdat_view(k,j) + t*zudat_view(k,j); \ + zzoutdat_view(k,j) = zzdat_view(k,j) + t*zvdat_view(k,j); + }); + + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + + default : { + std::cout << "\n HYDRO_2D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + + + // Wrap input pointers in Kokkos::Views +/* +#define HYDRO_2D_DATA_SETUP \ + Real_ptr zadat = m_za; \ + Real_ptr zbdat = m_zb; \ + Real_ptr zmdat = m_zm; \ + Real_ptr zpdat = m_zp; \ + Real_ptr zqdat = m_zq; \ + Real_ptr zrdat = m_zr; \ + Real_ptr zudat = m_zu; \ + Real_ptr zvdat = m_zv; \ + Real_ptr zzdat = m_zz; \ +\ + Real_ptr zroutdat = m_zrout; \ + Real_ptr zzoutdat = m_zzout; \ +\ + +*/ + + // There are 9 input views: + moveDataToHostFromKokkosView(zadat, zadat_view, kn, jn); + moveDataToHostFromKokkosView(zbdat, zbdat_view, kn, jn); + moveDataToHostFromKokkosView(zmdat, zmdat_view, kn, jn); + moveDataToHostFromKokkosView(zpdat, zpdat_view, kn, jn); + moveDataToHostFromKokkosView(zqdat, zqdat_view, kn, jn); + moveDataToHostFromKokkosView(zrdat, zrdat_view, kn, jn); + moveDataToHostFromKokkosView(zudat, zudat_view, kn, jn); + moveDataToHostFromKokkosView(zvdat, zvdat_view, kn, jn); + moveDataToHostFromKokkosView(zzdat, zzdat_view, kn, jn); + + // There are 2 output views: + moveDataToHostFromKokkosView(zroutdat, zroutdat_view, kn, jn); + moveDataToHostFromKokkosView(zzoutdat, zzoutdat_view, kn, jn); + +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals-kokkos/INT_PREDICT-Kokkos.cpp b/src/lcals-kokkos/INT_PREDICT-Kokkos.cpp new file mode 100644 index 000000000..e21d0f1ad --- /dev/null +++ b/src/lcals-kokkos/INT_PREDICT-Kokkos.cpp @@ -0,0 +1,113 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INT_PREDICT.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void INT_PREDICT::runKokkosVariant(VariantID vid) +{ + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + INT_PREDICT_DATA_SETUP; + +/* + *#define INT_PREDICT_DATA_SETUP \ + Real_ptr px = m_px; \ + Real_type dm22 = m_dm22; \ + Real_type dm23 = m_dm23; \ + Real_type dm24 = m_dm24; \ + Real_type dm25 = m_dm25; \ + Real_type dm26 = m_dm26; \ + Real_type dm27 = m_dm27; \ + Real_type dm28 = m_dm28; \ + Real_type c0 = m_c0; \ + +*/ + + // Wrap pointer in Kokkos View, and adjust indices + auto px_view = getViewFromPointer(px, iend*13); + + + auto intpredict_lam = [=](Index_type i) { + INT_PREDICT_BODY; + }; + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // Declare variables in INT_PREDICT.hpp + Real_type dm22 = m_dm22; + Real_type dm23 = m_dm23; + Real_type dm24 = m_dm24; + Real_type dm25 = m_dm25; + Real_type dm26 = m_dm26; + Real_type dm27 = m_dm27; + Real_type dm28 = m_dm28; + + /* + #define INT_PREDICT_BODY \ + px[i] = dm28*px[i + offset * 12] + dm27*px[i + offset * 11] + \ + dm26*px[i + offset * 10] + dm25*px[i + offset * 9] + \ + dm24*px[i + offset * 8] + dm23*px[i + offset * 7] + \ + dm22*px[i + offset * 6] + \ + c0*( px[i + offset * 4] + px[i + offset * 5] ) + \ + px[i + offset * 2]; + */ + Kokkos::parallel_for("INT_PREDICT_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i){ + // #define INT_PREDICT_BODY + px_view[i] = dm28*px_view[i + offset * 12] + dm27*px_view[i + offset * 11] + \ + dm26*px_view[i + offset * 10] + dm25*px_view[i + offset * 9] + \ + dm24*px_view[i + offset * 8] + dm23*px_view[i + offset * 7] + \ + dm22*px_view[i + offset * 6] + \ + c0*( px_view[i + offset * 4] + px_view[i + offset * 5] ) + \ + px_view[i + offset * 2]; + }); + + } + Kokkos::fence(); + stopTimer(); + + break; + } + + + default : { + std::cout << "\n INT_PREDICT : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(px, px_view, iend*13); +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals-kokkos/PLANCKIAN-Kokkos.cpp b/src/lcals-kokkos/PLANCKIAN-Kokkos.cpp new file mode 100644 index 000000000..e0081b3b8 --- /dev/null +++ b/src/lcals-kokkos/PLANCKIAN-Kokkos.cpp @@ -0,0 +1,96 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "PLANCKIAN.hpp" + +#include "RAJA/RAJA.hpp" + +#include +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void PLANCKIAN::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + PLANCKIAN_DATA_SETUP; + + /* +#define PLANCKIAN_DATA_SETUP \ + Real_ptr x = m_x; \ + Real_ptr y = m_y; \ + Real_ptr u = m_u; \ + Real_ptr v = m_v; \ + Real_ptr w = m_w; +*/ + + auto x_view = getViewFromPointer(x, iend); + auto y_view = getViewFromPointer(y, iend); + auto u_view = getViewFromPointer(u, iend); + auto v_view = getViewFromPointer(v, iend); + auto w_view = getViewFromPointer(w, iend); + + + auto planckian_lam = [=](Index_type i) { + PLANCKIAN_BODY; + }; + +# if defined (RUN_KOKKOS) + switch ( vid ) { + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for("PLANCKIAN_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i){ + /* #define PLANCKIAN_BODY \ + * y[i] = u[i] / v[i]; \ + * w[i] = x[i] / ( exp( y[i] ) - 1.0 ); + */ + y_view[i] = u_view[i] / v_view[i]; + w_view[i] = x_view[i] / ( exp( y_view[i] ) - 1.0 ); + }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default : { + std::cout << "\n PLANCKIAN : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(x, x_view, iend); + moveDataToHostFromKokkosView(y, y_view, iend); + moveDataToHostFromKokkosView(u, u_view, iend); + moveDataToHostFromKokkosView(v, v_view, iend); + moveDataToHostFromKokkosView(w, w_view, iend); + + + +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals-kokkos/TRIDIAG_ELIM-Kokkos.cpp b/src/lcals-kokkos/TRIDIAG_ELIM-Kokkos.cpp new file mode 100644 index 000000000..fffcf40fb --- /dev/null +++ b/src/lcals-kokkos/TRIDIAG_ELIM-Kokkos.cpp @@ -0,0 +1,88 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "TRIDIAG_ELIM.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace lcals +{ + + +void TRIDIAG_ELIM::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 1; + const Index_type iend = m_N; + + TRIDIAG_ELIM_DATA_SETUP; + +/* +#define TRIDIAG_ELIM_DATA_SETUP \ + Real_ptr xout = m_xout; \ + Real_ptr xin = m_xin; \ + Real_ptr y = m_y; \ + Real_ptr z = m_z; +*/ + + + auto xout_view = getViewFromPointer(xout, iend); + auto xin_view = getViewFromPointer(xin, iend); + auto y_view = getViewFromPointer(y, iend); + auto z_view = getViewFromPointer(z, iend); + + + auto tridiag_elim_lam = [=](Index_type i) { + TRIDIAG_ELIM_BODY; + }; + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for("TRIDIAG_ELIM_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i){ + // #define TRIDIAG_ELIM_BODY + // xout[i] = z[i] * ( y[i] - xin[i-1] ); + xout_view[i] = z_view[i] * ( y_view[i] - xin_view[i-1] ); + }); + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default : { + std::cout << "\n TRIDIAG_ELIM : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(xout, xout_view, iend); + moveDataToHostFromKokkosView(xin, xin_view, iend); + moveDataToHostFromKokkosView(y, y_view, iend); + moveDataToHostFromKokkosView(z, z_view, iend); + +} + +} // end namespace lcals +} // end namespace rajaperf diff --git a/src/lcals/DIFF_PREDICT.cpp b/src/lcals/DIFF_PREDICT.cpp index d1a96a101..8b851132f 100644 --- a/src/lcals/DIFF_PREDICT.cpp +++ b/src/lcals/DIFF_PREDICT.cpp @@ -49,6 +49,9 @@ DIFF_PREDICT::DIFF_PREDICT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined(Kokkos_Lambda); + } DIFF_PREDICT::~DIFF_PREDICT() diff --git a/src/lcals/DIFF_PREDICT.hpp b/src/lcals/DIFF_PREDICT.hpp index 504dd8bd7..4c0a330c5 100644 --- a/src/lcals/DIFF_PREDICT.hpp +++ b/src/lcals/DIFF_PREDICT.hpp @@ -59,7 +59,6 @@ px[i + offset * 8] = br; \ ar = cr - px[i + offset * 9]; \ px[i + offset * 9] = cr; \ - br = ar - px[i + offset * 10]; \ px[i + offset * 10] = ar; \ cr = br - px[i + offset * 11]; \ px[i + offset * 11] = br; \ @@ -88,6 +87,7 @@ class DIFF_PREDICT : public KernelBase void updateChecksum(VariantID vid); void tearDown(VariantID vid); + void runKokkosVariant(VariantID vid); void runSeqVariant(VariantID vid); void runOpenMPVariant(VariantID vid); void runCudaVariant(VariantID vid); diff --git a/src/lcals/EOS.cpp b/src/lcals/EOS.cpp index 4a8671172..b3b2de5ba 100644 --- a/src/lcals/EOS.cpp +++ b/src/lcals/EOS.cpp @@ -57,6 +57,8 @@ EOS::EOS(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined(Kokkos_Lambda); } EOS::~EOS() diff --git a/src/lcals/EOS.hpp b/src/lcals/EOS.hpp index 82a779ac2..7d2da09ba 100644 --- a/src/lcals/EOS.hpp +++ b/src/lcals/EOS.hpp @@ -57,6 +57,7 @@ class EOS : public KernelBase void updateChecksum(VariantID vid); void tearDown(VariantID vid); + void runKokkosVariant(VariantID vid); void runSeqVariant(VariantID vid); void runOpenMPVariant(VariantID vid); void runCudaVariant(VariantID vid); diff --git a/src/lcals/FIRST_DIFF.cpp b/src/lcals/FIRST_DIFF.cpp index c37c41aac..ccd7f1c9a 100644 --- a/src/lcals/FIRST_DIFF.cpp +++ b/src/lcals/FIRST_DIFF.cpp @@ -53,6 +53,8 @@ FIRST_DIFF::FIRST_DIFF(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } FIRST_DIFF::~FIRST_DIFF() diff --git a/src/lcals/FIRST_DIFF.hpp b/src/lcals/FIRST_DIFF.hpp index 21c279b89..7fc590c7c 100644 --- a/src/lcals/FIRST_DIFF.hpp +++ b/src/lcals/FIRST_DIFF.hpp @@ -47,6 +47,8 @@ class FIRST_DIFF : public KernelBase void updateChecksum(VariantID vid); void tearDown(VariantID vid); + void runKokkosVariant(VariantID vid); + void runSeqVariant(VariantID vid); void runOpenMPVariant(VariantID vid); void runCudaVariant(VariantID vid); diff --git a/src/lcals/FIRST_MIN.cpp b/src/lcals/FIRST_MIN.cpp index c6138e46a..76e44acf9 100644 --- a/src/lcals/FIRST_MIN.cpp +++ b/src/lcals/FIRST_MIN.cpp @@ -57,6 +57,9 @@ FIRST_MIN::FIRST_MIN(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + } FIRST_MIN::~FIRST_MIN() diff --git a/src/lcals/FIRST_MIN.hpp b/src/lcals/FIRST_MIN.hpp index a9b48c1b3..c8dbb30c5 100644 --- a/src/lcals/FIRST_MIN.hpp +++ b/src/lcals/FIRST_MIN.hpp @@ -76,6 +76,8 @@ class FIRST_MIN : public KernelBase void updateChecksum(VariantID vid); void tearDown(VariantID vid); + void runKokkosVariant(VariantID vid); + void runSeqVariant(VariantID vid); void runOpenMPVariant(VariantID vid); void runCudaVariant(VariantID vid); diff --git a/src/lcals/FIRST_SUM.cpp b/src/lcals/FIRST_SUM.cpp index ceaa9bc8b..fcdc4db97 100644 --- a/src/lcals/FIRST_SUM.cpp +++ b/src/lcals/FIRST_SUM.cpp @@ -51,7 +51,9 @@ FIRST_SUM::FIRST_SUM(const RunParams& params) setVariantDefined( RAJA_CUDA ); setVariantDefined( Base_HIP ); - setVariantDefined( RAJA_HIP ); + setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } FIRST_SUM::~FIRST_SUM() diff --git a/src/lcals/FIRST_SUM.hpp b/src/lcals/FIRST_SUM.hpp index d828ac896..bb93f8b90 100644 --- a/src/lcals/FIRST_SUM.hpp +++ b/src/lcals/FIRST_SUM.hpp @@ -50,6 +50,8 @@ class FIRST_SUM : public KernelBase void updateChecksum(VariantID vid); void tearDown(VariantID vid); + void runKokkosVariant(VariantID vid); + void runSeqVariant(VariantID vid); void runOpenMPVariant(VariantID vid); void runCudaVariant(VariantID vid); diff --git a/src/lcals/GEN_LIN_RECUR.cpp b/src/lcals/GEN_LIN_RECUR.cpp index 6534633da..08e8e55fc 100644 --- a/src/lcals/GEN_LIN_RECUR.cpp +++ b/src/lcals/GEN_LIN_RECUR.cpp @@ -57,6 +57,10 @@ GEN_LIN_RECUR::GEN_LIN_RECUR(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + + } GEN_LIN_RECUR::~GEN_LIN_RECUR() diff --git a/src/lcals/GEN_LIN_RECUR.hpp b/src/lcals/GEN_LIN_RECUR.hpp index 3fa49e69f..25f039035 100644 --- a/src/lcals/GEN_LIN_RECUR.hpp +++ b/src/lcals/GEN_LIN_RECUR.hpp @@ -71,6 +71,8 @@ class GEN_LIN_RECUR : public KernelBase void updateChecksum(VariantID vid); void tearDown(VariantID vid); + void runKokkosVariant(VariantID vid); + void runSeqVariant(VariantID vid); void runOpenMPVariant(VariantID vid); void runCudaVariant(VariantID vid); diff --git a/src/lcals/HYDRO_1D.cpp b/src/lcals/HYDRO_1D.cpp index 08198ca0f..249ef27ac 100644 --- a/src/lcals/HYDRO_1D.cpp +++ b/src/lcals/HYDRO_1D.cpp @@ -56,6 +56,8 @@ HYDRO_1D::HYDRO_1D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } HYDRO_1D::~HYDRO_1D() diff --git a/src/lcals/HYDRO_1D.hpp b/src/lcals/HYDRO_1D.hpp index 029065be8..d33424fe8 100644 --- a/src/lcals/HYDRO_1D.hpp +++ b/src/lcals/HYDRO_1D.hpp @@ -52,6 +52,8 @@ class HYDRO_1D : public KernelBase void updateChecksum(VariantID vid); void tearDown(VariantID vid); + void runKokkosVariant(VariantID vid); + void runSeqVariant(VariantID vid); void runOpenMPVariant(VariantID vid); void runCudaVariant(VariantID vid); diff --git a/src/lcals/HYDRO_2D.cpp b/src/lcals/HYDRO_2D.cpp index e51237f82..c9c13be79 100644 --- a/src/lcals/HYDRO_2D.cpp +++ b/src/lcals/HYDRO_2D.cpp @@ -71,6 +71,8 @@ HYDRO_2D::HYDRO_2D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } HYDRO_2D::~HYDRO_2D() diff --git a/src/lcals/HYDRO_2D.hpp b/src/lcals/HYDRO_2D.hpp index 2525c8c89..4fa42ac4e 100644 --- a/src/lcals/HYDRO_2D.hpp +++ b/src/lcals/HYDRO_2D.hpp @@ -148,6 +148,8 @@ class HYDRO_2D : public KernelBase void updateChecksum(VariantID vid); void tearDown(VariantID vid); + void runKokkosVariant(VariantID vid); + void runSeqVariant(VariantID vid); void runOpenMPVariant(VariantID vid); void runCudaVariant(VariantID vid); diff --git a/src/lcals/INT_PREDICT.cpp b/src/lcals/INT_PREDICT.cpp index 096a074ac..df304a47e 100644 --- a/src/lcals/INT_PREDICT.cpp +++ b/src/lcals/INT_PREDICT.cpp @@ -49,6 +49,9 @@ INT_PREDICT::INT_PREDICT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + } INT_PREDICT::~INT_PREDICT() diff --git a/src/lcals/INT_PREDICT.hpp b/src/lcals/INT_PREDICT.hpp index 1253e1a6e..451f1c2b9 100644 --- a/src/lcals/INT_PREDICT.hpp +++ b/src/lcals/INT_PREDICT.hpp @@ -67,6 +67,8 @@ class INT_PREDICT : public KernelBase void updateChecksum(VariantID vid); void tearDown(VariantID vid); + void runKokkosVariant(VariantID vid); + void runSeqVariant(VariantID vid); void runOpenMPVariant(VariantID vid); void runCudaVariant(VariantID vid); diff --git a/src/lcals/PLANCKIAN.cpp b/src/lcals/PLANCKIAN.cpp index 564a71a7e..0db4a0f65 100644 --- a/src/lcals/PLANCKIAN.cpp +++ b/src/lcals/PLANCKIAN.cpp @@ -49,6 +49,8 @@ PLANCKIAN::PLANCKIAN(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } PLANCKIAN::~PLANCKIAN() diff --git a/src/lcals/PLANCKIAN.hpp b/src/lcals/PLANCKIAN.hpp index 1e5b744db..8536c28cb 100644 --- a/src/lcals/PLANCKIAN.hpp +++ b/src/lcals/PLANCKIAN.hpp @@ -52,6 +52,8 @@ class PLANCKIAN : public KernelBase void updateChecksum(VariantID vid); void tearDown(VariantID vid); + void runKokkosVariant(VariantID vid); + void runSeqVariant(VariantID vid); void runOpenMPVariant(VariantID vid); void runCudaVariant(VariantID vid); diff --git a/src/lcals/TRIDIAG_ELIM.cpp b/src/lcals/TRIDIAG_ELIM.cpp index d35c08a51..7881bccce 100644 --- a/src/lcals/TRIDIAG_ELIM.cpp +++ b/src/lcals/TRIDIAG_ELIM.cpp @@ -51,6 +51,8 @@ TRIDIAG_ELIM::TRIDIAG_ELIM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } TRIDIAG_ELIM::~TRIDIAG_ELIM() diff --git a/src/lcals/TRIDIAG_ELIM.hpp b/src/lcals/TRIDIAG_ELIM.hpp index 73ffeb341..581dbc90b 100644 --- a/src/lcals/TRIDIAG_ELIM.hpp +++ b/src/lcals/TRIDIAG_ELIM.hpp @@ -52,6 +52,8 @@ class TRIDIAG_ELIM : public KernelBase void updateChecksum(VariantID vid); void tearDown(VariantID vid); + void runKokkosVariant(VariantID vid); + void runSeqVariant(VariantID vid); void runOpenMPVariant(VariantID vid); void runCudaVariant(VariantID vid); diff --git a/src/polybench-kokkos/CMakeLists.txt b/src/polybench-kokkos/CMakeLists.txt new file mode 100644 index 000000000..68c53dde2 --- /dev/null +++ b/src/polybench-kokkos/CMakeLists.txt @@ -0,0 +1,27 @@ +############################################################################### +# Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +# and RAJA Performance Suite project contributors. +# See the RAJAPerf/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/../polybench) + +blt_add_library( + NAME polybench-kokkos + SOURCES POLYBENCH_2MM-Kokkos.cpp + POLYBENCH_3MM-Kokkos.cpp + POLYBENCH_ADI-Kokkos.cpp + POLYBENCH_ATAX-Kokkos.cpp + POLYBENCH_FDTD_2D-Kokkos.cpp + POLYBENCH_FLOYD_WARSHALL-Kokkos.cpp + POLYBENCH_GEMM-Kokkos.cpp + POLYBENCH_GEMVER-Kokkos.cpp + POLYBENCH_GESUMMV-Kokkos.cpp + POLYBENCH_HEAT_3D-Kokkos.cpp + POLYBENCH_JACOBI_1D-Kokkos.cpp + POLYBENCH_JACOBI_2D-Kokkos.cpp + POLYBENCH_MVT-Kokkos.cpp + DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} + ) diff --git a/src/polybench-kokkos/POLYBENCH_2MM-Kokkos.cpp b/src/polybench-kokkos/POLYBENCH_2MM-Kokkos.cpp new file mode 100644 index 000000000..ad97430b8 --- /dev/null +++ b/src/polybench-kokkos/POLYBENCH_2MM-Kokkos.cpp @@ -0,0 +1,196 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_2MM.hpp" + +#include "RAJA/RAJA.hpp" + +#include + + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_2MM::runKokkosVariant(VariantID vid) +{ + + // Kokkos stub + return; + + + const Index_type run_reps= getRunReps(); + + POLYBENCH_2MM_DATA_SETUP; + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = 0; i < ni; i++ ) { + for (Index_type j = 0; j < nj; j++) { + POLYBENCH_2MM_BODY1; + for (Index_type k = 0; k < nk; k++) { + POLYBENCH_2MM_BODY2; + } + POLYBENCH_2MM_BODY3; + } + } + + for (Index_type i = 0; i < ni; i++) { + for (Index_type l = 0; l < nl; l++) { + POLYBENCH_2MM_BODY4; + for (Index_type j = 0; j < nj; j++) { + POLYBENCH_2MM_BODY5; + } + POLYBENCH_2MM_BODY6; + } + } + + } + stopTimer(); + + break; + } + + +#if defined(RUN_RAJA_SEQ) + case Lambda_Seq : { + + auto poly_2mm_base_lam2 = [=](Index_type i, Index_type j, + Index_type k, Real_type &dot) { + POLYBENCH_2MM_BODY2; + }; + auto poly_2mm_base_lam3 = [=](Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_2MM_BODY3; + }; + auto poly_2mm_base_lam5 = [=](Index_type i, Index_type l, + Index_type j, Real_type &dot) { + POLYBENCH_2MM_BODY5; + }; + auto poly_2mm_base_lam6 = [=](Index_type i, Index_type l, + Real_type &dot) { + POLYBENCH_2MM_BODY6; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = 0; i < ni; i++ ) { + for(Index_type j = 0; j < nj; j++) { + POLYBENCH_2MM_BODY1; + for (Index_type k = 0; k < nk; k++) { + poly_2mm_base_lam2(i, j, k, dot); + } + poly_2mm_base_lam3(i, j, dot); + } + } + + for(Index_type i = 0; i < ni; i++) { + for(Index_type l = 0; l < nl; l++) { + POLYBENCH_2MM_BODY4; + for (Index_type j = 0; j < nj; j++) { + poly_2mm_base_lam5(i, l, j, dot); + } + poly_2mm_base_lam6(i, l, dot); + } + } + + } + stopTimer(); + + break; + } + + case RAJA_Seq : { + + POLYBENCH_2MM_VIEWS_RAJA; + + auto poly_2mm_lam1 = [=](Real_type &dot) { + POLYBENCH_2MM_BODY1_RAJA; + }; + auto poly_2mm_lam2 = [=](Index_type i, Index_type j, Index_type k, + Real_type &dot) { + POLYBENCH_2MM_BODY2_RAJA; + }; + auto poly_2mm_lam3 = [=](Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_2MM_BODY3_RAJA; + }; + auto poly_2mm_lam4 = [=](Real_type &dot) { + POLYBENCH_2MM_BODY4_RAJA; + }; + auto poly_2mm_lam5 = [=](Index_type i, Index_type l, Index_type j, + Real_type &dot) { + POLYBENCH_2MM_BODY5_RAJA; + }; + auto poly_2mm_lam6 = [=](Index_type i, Index_type l, + Real_type &dot) { + POLYBENCH_2MM_BODY6_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Params<0>>, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1,2>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0,1>, RAJA::Params<0>> + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, ni}, + RAJA::RangeSegment{0, nj}, + RAJA::RangeSegment{0, nk}), + RAJA::tuple{0.0}, + + poly_2mm_lam1, + poly_2mm_lam2, + poly_2mm_lam3 + ); + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, ni}, + RAJA::RangeSegment{0, nl}, + RAJA::RangeSegment{0, nj}), + RAJA::tuple{0.0}, + + poly_2mm_lam4, + poly_2mm_lam5, + poly_2mm_lam6 + ); + + } + stopTimer(); + break; + } + +#endif // RUN_RAJA_SEQ + + default : { + std::cout << "\n POLYBENCH_2MM : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench-kokkos/POLYBENCH_3MM-Kokkos.cpp b/src/polybench-kokkos/POLYBENCH_3MM-Kokkos.cpp new file mode 100644 index 000000000..170e442fc --- /dev/null +++ b/src/polybench-kokkos/POLYBENCH_3MM-Kokkos.cpp @@ -0,0 +1,249 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_3MM.hpp" + +#include "RAJA/RAJA.hpp" + +#include +#include + + +namespace rajaperf +{ +namespace polybench +{ + + +void POLYBENCH_3MM::runKokkosVariant(VariantID vid) +{ + + // Kokkos stub + return; + + const Index_type run_reps = getRunReps(); + + POLYBENCH_3MM_DATA_SETUP; + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = 0; i < ni; i++ ) { + for (Index_type j = 0; j < nj; j++) { + POLYBENCH_3MM_BODY1; + for (Index_type k = 0; k < nk; k++) { + POLYBENCH_3MM_BODY2; + } + POLYBENCH_3MM_BODY3; + } + } + + for (Index_type j = 0; j < nj; j++) { + for (Index_type l = 0; l < nl; l++) { + POLYBENCH_3MM_BODY4; + for (Index_type m = 0; m < nm; m++) { + POLYBENCH_3MM_BODY5; + } + POLYBENCH_3MM_BODY6; + } + } + + for (Index_type i = 0; i < ni; i++) { + for (Index_type l = 0; l < nl; l++) { + POLYBENCH_3MM_BODY7; + for (Index_type j = 0; j < nj; j++) { + POLYBENCH_3MM_BODY8; + } + POLYBENCH_3MM_BODY9; + } + } + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_SEQ) + case Lambda_Seq : { + + auto poly_3mm_base_lam2 = [=] (Index_type i, Index_type j, Index_type k, + Real_type &dot) { + POLYBENCH_3MM_BODY2; + }; + auto poly_3mm_base_lam3 = [=] (Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_3MM_BODY3; + }; + auto poly_3mm_base_lam5 = [=] (Index_type j, Index_type l, Index_type m, + Real_type &dot) { + POLYBENCH_3MM_BODY5; + }; + auto poly_3mm_base_lam6 = [=] (Index_type j, Index_type l, + Real_type &dot) { + POLYBENCH_3MM_BODY6; + }; + auto poly_3mm_base_lam8 = [=] (Index_type i, Index_type l, Index_type j, + Real_type &dot) { + POLYBENCH_3MM_BODY8; + }; + auto poly_3mm_base_lam9 = [=] (Index_type i, Index_type l, + Real_type &dot) { + POLYBENCH_3MM_BODY9; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = 0; i < ni; i++ ) { + for (Index_type j = 0; j < nj; j++) { + POLYBENCH_3MM_BODY1; + for (Index_type k = 0; k < nk; k++) { + poly_3mm_base_lam2(i, j, k, dot); + } + poly_3mm_base_lam3(i, j, dot); + } + } + + for (Index_type j = 0; j < nj; j++) { + for (Index_type l = 0; l < nl; l++) { + POLYBENCH_3MM_BODY4; + for (Index_type m = 0; m < nm; m++) { + poly_3mm_base_lam5(j, l, m, dot); + } + poly_3mm_base_lam6(j, l, dot); + } + } + + for (Index_type i = 0; i < ni; i++) { + for (Index_type l = 0; l < nl; l++) { + POLYBENCH_3MM_BODY7; + for (Index_type j = 0; j < nj; j++) { + poly_3mm_base_lam8(i, l, j, dot); + } + poly_3mm_base_lam9(i, l, dot); + } + } + + } + stopTimer(); + + break; + } + + case RAJA_Seq : { + + POLYBENCH_3MM_VIEWS_RAJA; + + auto poly_3mm_lam1 = [=] (Real_type &dot) { + POLYBENCH_3MM_BODY1_RAJA; + }; + auto poly_3mm_lam2 = [=] (Index_type i, Index_type j, Index_type k, + Real_type &dot) { + POLYBENCH_3MM_BODY2_RAJA; + }; + auto poly_3mm_lam3 = [=] (Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_3MM_BODY3_RAJA; + }; + auto poly_3mm_lam4 = [=] (Real_type &dot) { + POLYBENCH_3MM_BODY4_RAJA; + }; + auto poly_3mm_lam5 = [=] (Index_type j, Index_type l, Index_type m, + Real_type &dot) { + POLYBENCH_3MM_BODY5_RAJA; + }; + auto poly_3mm_lam6 = [=] (Index_type j, Index_type l, + Real_type &dot) { + POLYBENCH_3MM_BODY6_RAJA; + }; + auto poly_3mm_lam7 = [=] (Real_type &dot) { + POLYBENCH_3MM_BODY7_RAJA; + }; + auto poly_3mm_lam8 = [=] (Index_type i, Index_type l, Index_type j, + Real_type &dot) { + POLYBENCH_3MM_BODY8_RAJA; + }; + auto poly_3mm_lam9 = [=] (Index_type i, Index_type l, + Real_type &dot) { + POLYBENCH_3MM_BODY9_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Params<0>>, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1,2>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0,1>, RAJA::Params<0>> + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, ni}, + RAJA::RangeSegment{0, nj}, + RAJA::RangeSegment{0, nk}), + RAJA::tuple{0.0}, + + poly_3mm_lam1, + poly_3mm_lam2, + poly_3mm_lam3 + + ); + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, nj}, + RAJA::RangeSegment{0, nl}, + RAJA::RangeSegment{0, nm}), + RAJA::tuple{0.0}, + + poly_3mm_lam4, + poly_3mm_lam5, + poly_3mm_lam6 + + ); + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, ni}, + RAJA::RangeSegment{0, nl}, + RAJA::RangeSegment{0, nj}), + RAJA::tuple{0.0}, + + poly_3mm_lam7, + poly_3mm_lam8, + poly_3mm_lam9 + + ); + + } // end run_reps + stopTimer(); + + break; + } +#endif // RUN_RAJA_SEQ + + default : { + std::cout << "\n POLYBENCH_2MM : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/polybench-kokkos/POLYBENCH_ADI-Kokkos.cpp b/src/polybench-kokkos/POLYBENCH_ADI-Kokkos.cpp new file mode 100644 index 000000000..7056724ed --- /dev/null +++ b/src/polybench-kokkos/POLYBENCH_ADI-Kokkos.cpp @@ -0,0 +1,222 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_ADI.hpp" + +#include "RAJA/RAJA.hpp" + +#include +#include + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_ADI::runKokkosVariant(VariantID vid) +{ + // Kokkos stub + return; + + const Index_type run_reps = getRunReps(); + + POLYBENCH_ADI_DATA_SETUP; + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 1; t <= tsteps; ++t) { + + for (Index_type i = 1; i < n-1; ++i) { + POLYBENCH_ADI_BODY2; + for (Index_type j = 1; j < n-1; ++j) { + POLYBENCH_ADI_BODY3; + } + POLYBENCH_ADI_BODY4; + for (Index_type k = n-2; k >= 1; --k) { + POLYBENCH_ADI_BODY5; + } + } + + for (Index_type i = 1; i < n-1; ++i) { + POLYBENCH_ADI_BODY6; + for (Index_type j = 1; j < n-1; ++j) { + POLYBENCH_ADI_BODY7; + } + POLYBENCH_ADI_BODY8; + for (Index_type k = n-2; k >= 1; --k) { + POLYBENCH_ADI_BODY9; + } + } + + } // tstep loop + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_SEQ) + case Lambda_Seq : { + + auto poly_adi_base_lam2 = [=](Index_type i) { + POLYBENCH_ADI_BODY2; + }; + auto poly_adi_base_lam3 = [=](Index_type i, Index_type j) { + POLYBENCH_ADI_BODY3; + }; + auto poly_adi_base_lam4 = [=](Index_type i) { + POLYBENCH_ADI_BODY4; + }; + auto poly_adi_base_lam5 = [=](Index_type i, Index_type k) { + POLYBENCH_ADI_BODY5; + }; + auto poly_adi_base_lam6 = [=](Index_type i) { + POLYBENCH_ADI_BODY6; + }; + auto poly_adi_base_lam7 = [=](Index_type i, Index_type j) { + POLYBENCH_ADI_BODY7; + }; + auto poly_adi_base_lam8 = [=](Index_type i) { + POLYBENCH_ADI_BODY8; + }; + auto poly_adi_base_lam9 = [=](Index_type i, Index_type k) { + POLYBENCH_ADI_BODY9; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 1; t <= tsteps; ++t) { + + for (Index_type i = 1; i < n-1; ++i) { + poly_adi_base_lam2(i); + for (Index_type j = 1; j < n-1; ++j) { + poly_adi_base_lam3(i, j); + } + poly_adi_base_lam4(i); + for (Index_type k = n-2; k >= 1; --k) { + poly_adi_base_lam5(i, k); + } + } + + for (Index_type i = 1; i < n-1; ++i) { + poly_adi_base_lam6(i); + for (Index_type j = 1; j < n-1; ++j) { + poly_adi_base_lam7(i, j); + } + poly_adi_base_lam8(i); + for (Index_type k = n-2; k >= 1; --k) { + poly_adi_base_lam9(i, k); + } + } + + } // tstep loop + + } // run_reps + stopTimer(); + + break; + } + + case RAJA_Seq : { + + POLYBENCH_ADI_VIEWS_RAJA; + + auto poly_adi_lam2 = [=](Index_type i) { + POLYBENCH_ADI_BODY2_RAJA; + }; + auto poly_adi_lam3 = [=](Index_type i, Index_type j) { + POLYBENCH_ADI_BODY3_RAJA; + }; + auto poly_adi_lam4 = [=](Index_type i) { + POLYBENCH_ADI_BODY4_RAJA; + }; + auto poly_adi_lam5 = [=](Index_type i, Index_type k) { + POLYBENCH_ADI_BODY5_RAJA; + }; + auto poly_adi_lam6 = [=](Index_type i) { + POLYBENCH_ADI_BODY6_RAJA; + }; + auto poly_adi_lam7 = [=](Index_type i, Index_type j) { + POLYBENCH_ADI_BODY7_RAJA; + }; + auto poly_adi_lam8 = [=](Index_type i) { + POLYBENCH_ADI_BODY8_RAJA; + }; + auto poly_adi_lam9 = [=](Index_type i, Index_type k) { + POLYBENCH_ADI_BODY9_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Segs<0>>, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0>>, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<3, RAJA::Segs<0,2>> + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 1; t <= tsteps; ++t) { + + RAJA::kernel( + RAJA::make_tuple(RAJA::RangeSegment{1, n-1}, + RAJA::RangeSegment{1, n-1}, + RAJA::RangeStrideSegment{n-2, 0, -1}), + + poly_adi_lam2, + poly_adi_lam3, + poly_adi_lam4, + poly_adi_lam5 + + ); + + RAJA::kernel( + RAJA::make_tuple(RAJA::RangeSegment{1, n-1}, + RAJA::RangeSegment{1, n-1}, + RAJA::RangeStrideSegment{n-2, 0, -1}), + + poly_adi_lam6, + poly_adi_lam7, + poly_adi_lam8, + poly_adi_lam9 + + ); + + } // tstep loop + + } // run_reps + stopTimer(); + + break; + } +#endif // RUN_RAJA_SEQ + + default : { + std::cout << "\nPOLYBENCH_ADI Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench-kokkos/POLYBENCH_ATAX-Kokkos.cpp b/src/polybench-kokkos/POLYBENCH_ATAX-Kokkos.cpp new file mode 100644 index 000000000..d2a99c3ad --- /dev/null +++ b/src/polybench-kokkos/POLYBENCH_ATAX-Kokkos.cpp @@ -0,0 +1,193 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_ATAX.hpp" + +#include "RAJA/RAJA.hpp" + +#include + + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_ATAX::runKokkosVariant(VariantID vid) +{ + + // Kokkos stub + return; + + const Index_type run_reps= getRunReps(); + + POLYBENCH_ATAX_DATA_SETUP; + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = 0; i < N; ++i ) { + POLYBENCH_ATAX_BODY1; + for (Index_type j = 0; j < N; ++j ) { + POLYBENCH_ATAX_BODY2; + } + POLYBENCH_ATAX_BODY3; + } + + for (Index_type j = 0; j < N; ++j ) { + POLYBENCH_ATAX_BODY4; + for (Index_type i = 0; i < N; ++i ) { + POLYBENCH_ATAX_BODY5; + } + POLYBENCH_ATAX_BODY6; + } + + } + stopTimer(); + + break; + } + + +#if defined(RUN_RAJA_SEQ) + case Lambda_Seq : { + + auto poly_atax_base_lam2 = [=] (Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_ATAX_BODY2; + }; + auto poly_atax_base_lam3 = [=] (Index_type i, + Real_type &dot) { + POLYBENCH_ATAX_BODY3; + }; + auto poly_atax_base_lam5 = [=] (Index_type i, Index_type j , + Real_type &dot) { + POLYBENCH_ATAX_BODY5; + }; + auto poly_atax_base_lam6 = [=] (Index_type j, + Real_type &dot) { + POLYBENCH_ATAX_BODY6; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = 0; i < N; ++i ) { + POLYBENCH_ATAX_BODY1; + for (Index_type j = 0; j < N; ++j ) { + poly_atax_base_lam2(i, j, dot); + } + poly_atax_base_lam3(i, dot); + } + + for (Index_type j = 0; j < N; ++j ) { + POLYBENCH_ATAX_BODY4; + for (Index_type i = 0; i < N; ++i ) { + poly_atax_base_lam5(i, j, dot); + } + poly_atax_base_lam6(j, dot); + } + + } + stopTimer(); + + break; + } + + case RAJA_Seq : { + + POLYBENCH_ATAX_VIEWS_RAJA; + + auto poly_atax_lam1 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_ATAX_BODY1_RAJA; + }; + auto poly_atax_lam2 = [=] (Index_type i, Index_type j, Real_type &dot) { + POLYBENCH_ATAX_BODY2_RAJA; + }; + auto poly_atax_lam3 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_ATAX_BODY3_RAJA; + }; + auto poly_atax_lam4 = [=] (Index_type j, Real_type &dot) { + POLYBENCH_ATAX_BODY4_RAJA; + }; + auto poly_atax_lam5 = [=] (Index_type i, Index_type j , Real_type &dot) { + POLYBENCH_ATAX_BODY5_RAJA; + }; + auto poly_atax_lam6 = [=] (Index_type j, Real_type &dot) { + POLYBENCH_ATAX_BODY6_RAJA; + }; + + using EXEC_POL1 = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Segs<0>, RAJA::Params<0>>, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> + > + >; + + using EXEC_POL2 = + RAJA::KernelPolicy< + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Segs<1>, RAJA::Params<0>>, + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<1>, RAJA::Params<0>> + > + >; + + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, N}, + RAJA::RangeSegment{0, N}), + RAJA::tuple{0.0}, + + poly_atax_lam1, + poly_atax_lam2, + poly_atax_lam3 + + ); + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, N}, + RAJA::RangeSegment{0, N}), + RAJA::tuple{0.0}, + + poly_atax_lam4, + poly_atax_lam5, + poly_atax_lam6 + + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_SEQ + + default : { + std::cout << "\n POLYBENCH_ATAX : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench-kokkos/POLYBENCH_FDTD_2D-Kokkos.cpp b/src/polybench-kokkos/POLYBENCH_FDTD_2D-Kokkos.cpp new file mode 100644 index 000000000..e296985cf --- /dev/null +++ b/src/polybench-kokkos/POLYBENCH_FDTD_2D-Kokkos.cpp @@ -0,0 +1,199 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_FDTD_2D.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace polybench +{ + + +void POLYBENCH_FDTD_2D::runKokkosVariant(VariantID vid) +{ + // Kokkos stub + return; + + + const Index_type run_reps = getRunReps(); + + POLYBENCH_FDTD_2D_DATA_SETUP; + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (t = 0; t < tsteps; ++t) { + + for (Index_type j = 0; j < ny; j++) { + POLYBENCH_FDTD_2D_BODY1; + } + for (Index_type i = 1; i < nx; i++) { + for (Index_type j = 0; j < ny; j++) { + POLYBENCH_FDTD_2D_BODY2; + } + } + for (Index_type i = 0; i < nx; i++) { + for (Index_type j = 1; j < ny; j++) { + POLYBENCH_FDTD_2D_BODY3; + } + } + for (Index_type i = 0; i < nx - 1; i++) { + for (Index_type j = 0; j < ny - 1; j++) { + POLYBENCH_FDTD_2D_BODY4; + } + } + + } // tstep loop + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_SEQ) + case Lambda_Seq : { + + // + // Note: first lambda must use capture by reference so that the + // scalar variable 't' used in it is updated for each + // t-loop iteration. + // + auto poly_fdtd2d_base_lam1 = [&](Index_type j) { + POLYBENCH_FDTD_2D_BODY1; + }; + auto poly_fdtd2d_base_lam2 = [=](Index_type i, Index_type j) { + POLYBENCH_FDTD_2D_BODY2; + }; + auto poly_fdtd2d_base_lam3 = [=](Index_type i, Index_type j) { + POLYBENCH_FDTD_2D_BODY3; + }; + auto poly_fdtd2d_base_lam4 = [=](Index_type i, Index_type j) { + POLYBENCH_FDTD_2D_BODY4; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (t = 0; t < tsteps; ++t) { + + for (Index_type j = 0; j < ny; j++) { + poly_fdtd2d_base_lam1(j); + } + for (Index_type i = 1; i < nx; i++) { + for (Index_type j = 0; j < ny; j++) { + poly_fdtd2d_base_lam2(i, j); + } + } + for (Index_type i = 0; i < nx; i++) { + for (Index_type j = 1; j < ny; j++) { + poly_fdtd2d_base_lam3(i, j); + } + } + for (Index_type i = 0; i < nx - 1; i++) { + for (Index_type j = 0; j < ny - 1; j++) { + poly_fdtd2d_base_lam4(i, j); + } + } + + } // tstep loop + + } // run_reps + stopTimer(); + + break; + } + + case RAJA_Seq : { + + POLYBENCH_FDTD_2D_VIEWS_RAJA; + + // + // Note: first lambda must use capture by reference so that the + // scalar variable 't' used in it is updated for each + // t-loop iteration. + // + auto poly_fdtd2d_lam1 = [&](Index_type j) { + POLYBENCH_FDTD_2D_BODY1_RAJA; + }; + auto poly_fdtd2d_lam2 = [=](Index_type i, Index_type j) { + POLYBENCH_FDTD_2D_BODY2_RAJA; + }; + auto poly_fdtd2d_lam3 = [=](Index_type i, Index_type j) { + POLYBENCH_FDTD_2D_BODY3_RAJA; + }; + auto poly_fdtd2d_lam4 = [=](Index_type i, Index_type j) { + POLYBENCH_FDTD_2D_BODY4_RAJA; + }; + + using EXEC_POL1 = RAJA::loop_exec; + + using EXEC_POL234 = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<0> + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (t = 0; t < tsteps; ++t) { + + RAJA::forall( RAJA::RangeSegment(0, ny), + poly_fdtd2d_lam1 + ); + + RAJA::kernel( + RAJA::make_tuple(RAJA::RangeSegment{1, nx}, + RAJA::RangeSegment{0, ny}), + poly_fdtd2d_lam2 + ); + + RAJA::kernel( + RAJA::make_tuple(RAJA::RangeSegment{0, nx}, + RAJA::RangeSegment{1, ny}), + poly_fdtd2d_lam3 + ); + + RAJA::kernel( + RAJA::make_tuple(RAJA::RangeSegment{0, nx-1}, + RAJA::RangeSegment{0, ny-1}), + poly_fdtd2d_lam4 + ); + + } // tstep loop + + } // run_reps + stopTimer(); + + break; + } + +#endif // RUN_RAJA_SEQ + + default : { + std::cout << "\nPOLYBENCH_FDTD_2D Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench-kokkos/POLYBENCH_FLOYD_WARSHALL-Kokkos.cpp b/src/polybench-kokkos/POLYBENCH_FLOYD_WARSHALL-Kokkos.cpp new file mode 100644 index 000000000..29450fb66 --- /dev/null +++ b/src/polybench-kokkos/POLYBENCH_FLOYD_WARSHALL-Kokkos.cpp @@ -0,0 +1,123 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_FLOYD_WARSHALL.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace polybench +{ + + +void POLYBENCH_FLOYD_WARSHALL::runKokkosVariant(VariantID vid) +{ + + // Kokkos stub + return; + + const Index_type run_reps= getRunReps(); + + POLYBENCH_FLOYD_WARSHALL_DATA_SETUP; + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type k = 0; k < N; ++k) { + for (Index_type i = 0; i < N; ++i) { + for (Index_type j = 0; j < N; ++j) { + POLYBENCH_FLOYD_WARSHALL_BODY; + } + } + } + + } + stopTimer(); + + break; + } + + +#if defined(RUN_RAJA_SEQ) + case Lambda_Seq : { + + auto poly_floydwarshall_base_lam = [=](Index_type k, Index_type i, + Index_type j) { + POLYBENCH_FLOYD_WARSHALL_BODY; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type k = 0; k < N; ++k) { + for (Index_type i = 0; i < N; ++i) { + for (Index_type j = 0; j < N; ++j) { + poly_floydwarshall_base_lam(k, i, j); + } + } + } + + } + stopTimer(); + + break; + } + + case RAJA_Seq : { + + POLYBENCH_FLOYD_WARSHALL_VIEWS_RAJA; + + auto poly_floydwarshall_lam = [=](Index_type k, Index_type i, + Index_type j) { + POLYBENCH_FLOYD_WARSHALL_BODY_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<0> + > + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{0, N}, + RAJA::RangeSegment{0, N}, + RAJA::RangeSegment{0, N}), + poly_floydwarshall_lam + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_SEQ + + default : { + std::cout << "\n POLYBENCH_FLOYD_WARSHALL : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench-kokkos/POLYBENCH_GEMM-Kokkos.cpp b/src/polybench-kokkos/POLYBENCH_GEMM-Kokkos.cpp new file mode 100644 index 000000000..182f31448 --- /dev/null +++ b/src/polybench-kokkos/POLYBENCH_GEMM-Kokkos.cpp @@ -0,0 +1,157 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_GEMM.hpp" + +#include "RAJA/RAJA.hpp" + +#include + + +namespace rajaperf +{ +namespace polybench +{ + + +void POLYBENCH_GEMM::runKokkosVariant(VariantID vid) +{ + + // Kokkos stub + return; + + const Index_type run_reps= getRunReps(); + + POLYBENCH_GEMM_DATA_SETUP; + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = 0; i < ni; ++i ) { + for (Index_type j = 0; j < nj; ++j ) { + POLYBENCH_GEMM_BODY1; + POLYBENCH_GEMM_BODY2; + for (Index_type k = 0; k < nk; ++k ) { + POLYBENCH_GEMM_BODY3; + } + POLYBENCH_GEMM_BODY4; + } + } + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_SEQ) + case Lambda_Seq : { + + auto poly_gemm_base_lam2 = [=](Index_type i, Index_type j) { + POLYBENCH_GEMM_BODY2; + }; + auto poly_gemm_base_lam3 = [=](Index_type i, Index_type j, Index_type k, + Real_type& dot) { + POLYBENCH_GEMM_BODY3; + }; + auto poly_gemm_base_lam4 = [=](Index_type i, Index_type j, + Real_type& dot) { + POLYBENCH_GEMM_BODY4; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = 0; i < ni; ++i ) { + for (Index_type j = 0; j < nj; ++j ) { + POLYBENCH_GEMM_BODY1; + poly_gemm_base_lam2(i, j); + for (Index_type k = 0; k < nk; ++k ) { + poly_gemm_base_lam3(i, j, k, dot); + } + poly_gemm_base_lam4(i, j, dot); + } + } + + } + stopTimer(); + + break; + } + + case RAJA_Seq : { + + POLYBENCH_GEMM_VIEWS_RAJA; + + auto poly_gemm_lam1 = [=](Real_type& dot) { + POLYBENCH_GEMM_BODY1_RAJA; + }; + auto poly_gemm_lam2 = [=](Index_type i, Index_type j) { + POLYBENCH_GEMM_BODY2_RAJA; + }; + auto poly_gemm_lam3 = [=](Index_type i, Index_type j, Index_type k, + Real_type& dot) { + POLYBENCH_GEMM_BODY3_RAJA; + }; + auto poly_gemm_lam4 = [=](Index_type i, Index_type j, + Real_type& dot) { + POLYBENCH_GEMM_BODY4_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Params<0>>, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>>, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<2, RAJA::Segs<0,1,2>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<3, RAJA::Segs<0,1>, RAJA::Params<0>> + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel_param( + + RAJA::make_tuple( RAJA::RangeSegment{0, ni}, + RAJA::RangeSegment{0, nj}, + RAJA::RangeSegment{0, nk} ), + RAJA::tuple{0.0}, // variable for dot + + poly_gemm_lam1, + poly_gemm_lam2, + poly_gemm_lam3, + poly_gemm_lam4 + + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_SEQ + + default : { + std::cout << "\n POLYBENCH_GEMM : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench-kokkos/POLYBENCH_GEMVER-Kokkos.cpp b/src/polybench-kokkos/POLYBENCH_GEMVER-Kokkos.cpp new file mode 100644 index 000000000..f32d7651b --- /dev/null +++ b/src/polybench-kokkos/POLYBENCH_GEMVER-Kokkos.cpp @@ -0,0 +1,232 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_GEMVER.hpp" + +#include "RAJA/RAJA.hpp" + +#include +#include + + +namespace rajaperf +{ +namespace polybench +{ + + +void POLYBENCH_GEMVER::runKokkosVariant(VariantID vid) +{ + + // Kokkos stub + return; + + + const Index_type run_reps = getRunReps(); + + POLYBENCH_GEMVER_DATA_SETUP; + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = 0; i < n; i++ ) { + for (Index_type j = 0; j < n; j++) { + POLYBENCH_GEMVER_BODY1; + } + } + + for (Index_type i = 0; i < n; i++ ) { + POLYBENCH_GEMVER_BODY2; + for (Index_type j = 0; j < n; j++) { + POLYBENCH_GEMVER_BODY3; + } + POLYBENCH_GEMVER_BODY4; + } + + for (Index_type i = 0; i < n; i++ ) { + POLYBENCH_GEMVER_BODY5; + } + + for (Index_type i = 0; i < n; i++ ) { + POLYBENCH_GEMVER_BODY6; + for (Index_type j = 0; j < n; j++) { + POLYBENCH_GEMVER_BODY7; + } + POLYBENCH_GEMVER_BODY8; + } + + } + stopTimer(); + + break; + } + +#if defined(RUN_RAJA_SEQ) + case Lambda_Seq : { + + auto poly_gemver_base_lam1 = [=](Index_type i, Index_type j) { + POLYBENCH_GEMVER_BODY1; + }; + auto poly_gemver_base_lam3 = [=](Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_GEMVER_BODY3; + }; + auto poly_gemver_base_lam4 = [=](Index_type i, Real_type &dot) { + POLYBENCH_GEMVER_BODY4; + }; + auto poly_gemver_base_lam5 = [=](Index_type i) { + POLYBENCH_GEMVER_BODY5; + }; + auto poly_gemver_base_lam7 = [=](Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_GEMVER_BODY7; + }; + auto poly_gemver_base_lam8 = [=](Index_type i, Real_type &dot) { + POLYBENCH_GEMVER_BODY8; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = 0; i < n; i++ ) { + for (Index_type j = 0; j < n; j++) { + poly_gemver_base_lam1(i, j); + } + } + + for (Index_type i = 0; i < n; i++ ) { + POLYBENCH_GEMVER_BODY2; + for (Index_type j = 0; j < n; j++) { + poly_gemver_base_lam3(i, j, dot); + } + poly_gemver_base_lam4(i, dot); + } + + for (Index_type i = 0; i < n; i++ ) { + poly_gemver_base_lam5(i); + } + + for (Index_type i = 0; i < n; i++ ) { + POLYBENCH_GEMVER_BODY6; + for (Index_type j = 0; j < n; j++) { + poly_gemver_base_lam7(i, j, dot); + } + poly_gemver_base_lam8(i, dot); + } + + } + stopTimer(); + + break; + } + + case RAJA_Seq : { + + POLYBENCH_GEMVER_VIEWS_RAJA; + + auto poly_gemver_lam1 = [=] (Index_type i, Index_type j) { + POLYBENCH_GEMVER_BODY1_RAJA; + }; + auto poly_gemver_lam2 = [=] (Index_type /* i */, Real_type &dot) { + POLYBENCH_GEMVER_BODY2_RAJA; + }; + auto poly_gemver_lam3 = [=] (Index_type i, Index_type j, Real_type &dot) { + POLYBENCH_GEMVER_BODY3_RAJA; + }; + auto poly_gemver_lam4 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_GEMVER_BODY4_RAJA; + }; + auto poly_gemver_lam5 = [=] (Index_type i) { + POLYBENCH_GEMVER_BODY5_RAJA; + }; + auto poly_gemver_lam6 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_GEMVER_BODY6_RAJA; + }; + auto poly_gemver_lam7 = [=] (Index_type i, Index_type j, Real_type &dot) { + POLYBENCH_GEMVER_BODY7_RAJA; + }; + auto poly_gemver_lam8 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_GEMVER_BODY8_RAJA; + }; + + using EXEC_POL1 = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Segs<0,1>> + > + > + >; + + using EXEC_POL24 = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::Lambda<0, RAJA::Segs<0>, RAJA::Params<0>>, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> + > + >; + + using EXEC_POL3 = RAJA::loop_exec; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{0, n}, + RAJA::RangeSegment{0, n}), + poly_gemver_lam1 + ); + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, n}, + RAJA::RangeSegment{0, n}), + RAJA::tuple{0.0}, + + poly_gemver_lam2, + poly_gemver_lam3, + poly_gemver_lam4 + ); + + RAJA::forall (RAJA::RangeSegment{0, n}, + poly_gemver_lam5 + ); + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, n}, + RAJA::RangeSegment{0, n}), + RAJA::tuple{0.0}, + + poly_gemver_lam6, + poly_gemver_lam7, + poly_gemver_lam8 + + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_SEQ + + default : { + std::cout << "\n POLYBENCH_GEMVER : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/polybench-kokkos/POLYBENCH_GESUMMV-Kokkos.cpp b/src/polybench-kokkos/POLYBENCH_GESUMMV-Kokkos.cpp new file mode 100644 index 000000000..77d9e0ce0 --- /dev/null +++ b/src/polybench-kokkos/POLYBENCH_GESUMMV-Kokkos.cpp @@ -0,0 +1,138 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_GESUMMV.hpp" + +#include "RAJA/RAJA.hpp" + +#include + + +namespace rajaperf +{ +namespace polybench +{ + +void POLYBENCH_GESUMMV::runKokkosVariant(VariantID vid) +{ + // Kokkos stub + return; + + const Index_type run_reps= getRunReps(); + + POLYBENCH_GESUMMV_DATA_SETUP; + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = 0; i < N; ++i ) { + POLYBENCH_GESUMMV_BODY1; + for (Index_type j = 0; j < N; ++j ) { + POLYBENCH_GESUMMV_BODY2; + } + POLYBENCH_GESUMMV_BODY3; + } + + } + stopTimer(); + + break; + } + + +#if defined(RUN_RAJA_SEQ) + case Lambda_Seq : { + + auto poly_gesummv_base_lam2 = [=](Index_type i, Index_type j, + Real_type& tmpdot, Real_type& ydot) { + POLYBENCH_GESUMMV_BODY2; + }; + auto poly_gesummv_base_lam3 = [=](Index_type i, + Real_type& tmpdot, Real_type& ydot) { + POLYBENCH_GESUMMV_BODY3; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = 0; i < N; ++i ) { + POLYBENCH_GESUMMV_BODY1; + for (Index_type j = 0; j < N; ++j ) { + poly_gesummv_base_lam2(i, j, tmpdot, ydot); + } + poly_gesummv_base_lam3(i, tmpdot, ydot); + } + + } + stopTimer(); + + break; + } + + case RAJA_Seq : { + + POLYBENCH_GESUMMV_VIEWS_RAJA; + + auto poly_gesummv_lam1 = [=](Real_type& tmpdot, Real_type& ydot) { + POLYBENCH_GESUMMV_BODY1_RAJA; + }; + auto poly_gesummv_lam2 = [=](Index_type i, Index_type j, + Real_type& tmpdot, Real_type& ydot) { + POLYBENCH_GESUMMV_BODY2_RAJA; + }; + auto poly_gesummv_lam3 = [=](Index_type i, + Real_type& tmpdot, Real_type& ydot) { + POLYBENCH_GESUMMV_BODY3_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, // i + RAJA::statement::Lambda<0, RAJA::Params<0,1>>, + RAJA::statement::For<1, RAJA::loop_exec, // j + RAJA::statement::Lambda<1, RAJA::Segs<0, 1>, RAJA::Params<0,1>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0,1>> + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::kernel_param( + RAJA::make_tuple( RAJA::RangeSegment{0, N}, + RAJA::RangeSegment{0, N} ), + RAJA::make_tuple(static_cast(0.0), + static_cast(0.0)), + + poly_gesummv_lam1, + poly_gesummv_lam2, + poly_gesummv_lam3 + ); + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_SEQ + + default : { + std::cout << "\n POLYBENCH_GESUMMV : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench-kokkos/POLYBENCH_HEAT_3D-Kokkos.cpp b/src/polybench-kokkos/POLYBENCH_HEAT_3D-Kokkos.cpp new file mode 100644 index 000000000..df91ae76e --- /dev/null +++ b/src/polybench-kokkos/POLYBENCH_HEAT_3D-Kokkos.cpp @@ -0,0 +1,170 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_HEAT_3D.hpp" + +#include "RAJA/RAJA.hpp" + +#include + + +namespace rajaperf +{ +namespace polybench +{ + + +void POLYBENCH_HEAT_3D::runKokkosVariant(VariantID vid) +{ + + // Kokkos stub + return; + + const Index_type run_reps= getRunReps(); + + POLYBENCH_HEAT_3D_DATA_SETUP; + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + for (Index_type i = 1; i < N-1; ++i ) { + for (Index_type j = 1; j < N-1; ++j ) { + for (Index_type k = 1; k < N-1; ++k ) { + POLYBENCH_HEAT_3D_BODY1; + } + } + } + + for (Index_type i = 1; i < N-1; ++i ) { + for (Index_type j = 1; j < N-1; ++j ) { + for (Index_type k = 1; k < N-1; ++k ) { + POLYBENCH_HEAT_3D_BODY2; + } + } + } + + } + + } + stopTimer(); + + POLYBENCH_HEAT_3D_DATA_RESET; + + break; + } + +#if defined(RUN_RAJA_SEQ) + case Lambda_Seq : { + + auto poly_heat3d_base_lam1 = [=](Index_type i, Index_type j, + Index_type k) { + POLYBENCH_HEAT_3D_BODY1; + }; + auto poly_heat3d_base_lam2 = [=](Index_type i, Index_type j, + Index_type k) { + POLYBENCH_HEAT_3D_BODY2; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + for (Index_type i = 1; i < N-1; ++i ) { + for (Index_type j = 1; j < N-1; ++j ) { + for (Index_type k = 1; k < N-1; ++k ) { + poly_heat3d_base_lam1(i, j, k); + } + } + } + + for (Index_type i = 1; i < N-1; ++i ) { + for (Index_type j = 1; j < N-1; ++j ) { + for (Index_type k = 1; k < N-1; ++k ) { + poly_heat3d_base_lam2(i, j, k); + } + } + } + + } + + } + stopTimer(); + + POLYBENCH_HEAT_3D_DATA_RESET; + + break; + } + + case RAJA_Seq : { + + POLYBENCH_HEAT_3D_VIEWS_RAJA; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::For<2, RAJA::loop_exec, + RAJA::statement::Lambda<0> + > + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{1, N-1}, + RAJA::RangeSegment{1, N-1}, + RAJA::RangeSegment{1, N-1}), + + [=](Index_type i, Index_type j, Index_type k) { + POLYBENCH_HEAT_3D_BODY1_RAJA; + } + + ); + + RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{1, N-1}, + RAJA::RangeSegment{1, N-1}, + RAJA::RangeSegment{1, N-1}), + + [=](Index_type i, Index_type j, Index_type k) { + POLYBENCH_HEAT_3D_BODY2_RAJA; + } + + ); + + } + + } + stopTimer(); + + POLYBENCH_HEAT_3D_DATA_RESET; + + break; + } +#endif // RUN_RAJA_SEQ + + default : { + std::cout << "\n POLYBENCH_HEAT_3D : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench-kokkos/POLYBENCH_JACOBI_1D-Kokkos.cpp b/src/polybench-kokkos/POLYBENCH_JACOBI_1D-Kokkos.cpp new file mode 100644 index 000000000..258d677e6 --- /dev/null +++ b/src/polybench-kokkos/POLYBENCH_JACOBI_1D-Kokkos.cpp @@ -0,0 +1,126 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_JACOBI_1D.hpp" + +#include "RAJA/RAJA.hpp" + +#include + + +namespace rajaperf +{ +namespace polybench +{ + + +void POLYBENCH_JACOBI_1D::runKokkosVariant(VariantID vid) +{ + + // Kokkos stub + return; + + const Index_type run_reps= getRunReps(); + + POLYBENCH_JACOBI_1D_DATA_SETUP; + + auto poly_jacobi1d_lam1 = [=] (Index_type i) { + POLYBENCH_JACOBI_1D_BODY1; + }; + auto poly_jacobi1d_lam2 = [=] (Index_type i) { + POLYBENCH_JACOBI_1D_BODY2; + }; + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + for (Index_type i = 1; i < N-1; ++i ) { + POLYBENCH_JACOBI_1D_BODY1; + } + for (Index_type i = 1; i < N-1; ++i ) { + POLYBENCH_JACOBI_1D_BODY2; + } + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_1D_DATA_RESET; + + break; + } + + +#if defined(RUN_RAJA_SEQ) + case Lambda_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + for (Index_type i = 1; i < N-1; ++i ) { + poly_jacobi1d_lam1(i); + } + for (Index_type i = 1; i < N-1; ++i ) { + poly_jacobi1d_lam2(i); + } + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_1D_DATA_RESET; + + break; + } + + case RAJA_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + RAJA::forall ( RAJA::RangeSegment{1, N-1}, + poly_jacobi1d_lam1 + ); + + RAJA::forall ( RAJA::RangeSegment{1, N-1}, + poly_jacobi1d_lam2 + ); + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_1D_DATA_RESET; + + break; + } +#endif // RUN_RAJA_SEQ + + default : { + std::cout << "\n POLYBENCH_JACOBI_1D : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench-kokkos/POLYBENCH_JACOBI_2D-Kokkos.cpp b/src/polybench-kokkos/POLYBENCH_JACOBI_2D-Kokkos.cpp new file mode 100644 index 000000000..9886fa2fc --- /dev/null +++ b/src/polybench-kokkos/POLYBENCH_JACOBI_2D-Kokkos.cpp @@ -0,0 +1,157 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_JACOBI_2D.hpp" + +#include "RAJA/RAJA.hpp" + +#include + + +namespace rajaperf +{ +namespace polybench +{ + + +void POLYBENCH_JACOBI_2D::runKokkosVariant(VariantID vid) +{ + + // Kokkos stub + return; + + const Index_type run_reps= getRunReps(); + + POLYBENCH_JACOBI_2D_DATA_SETUP; + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + for (Index_type i = 1; i < N-1; ++i ) { + for (Index_type j = 1; j < N-1; ++j ) { + POLYBENCH_JACOBI_2D_BODY1; + } + } + for (Index_type i = 1; i < N-1; ++i ) { + for (Index_type j = 1; j < N-1; ++j ) { + POLYBENCH_JACOBI_2D_BODY2; + } + } + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_2D_DATA_RESET; + + break; + } + + +#if defined(RUN_RAJA_SEQ) + case Lambda_Seq : { + + auto poly_jacobi2d_base_lam1 = [=](Index_type i, Index_type j) { + POLYBENCH_JACOBI_2D_BODY1; + }; + auto poly_jacobi2d_base_lam2 = [=](Index_type i, Index_type j) { + POLYBENCH_JACOBI_2D_BODY2; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + for (Index_type i = 1; i < N-1; ++i ) { + for (Index_type j = 1; j < N-1; ++j ) { + poly_jacobi2d_base_lam1(i, j); + } + } + + for (Index_type i = 1; i < N-1; ++i ) { + for (Index_type j = 1; j < N-1; ++j ) { + poly_jacobi2d_base_lam2(i, j); + } + } + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_2D_DATA_RESET; + + break; + } + + case RAJA_Seq : { + + POLYBENCH_JACOBI_2D_VIEWS_RAJA; + + auto poly_jacobi2d_lam1 = [=](Index_type i, Index_type j) { + POLYBENCH_JACOBI_2D_BODY1_RAJA; + }; + auto poly_jacobi2d_lam2 = [=](Index_type i, Index_type j) { + POLYBENCH_JACOBI_2D_BODY2_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<0> + > + >, + RAJA::statement::For<0, RAJA::loop_exec, + RAJA::statement::For<1, RAJA::loop_exec, + RAJA::statement::Lambda<1> + > + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type t = 0; t < tsteps; ++t) { + + RAJA::kernel( RAJA::make_tuple(RAJA::RangeSegment{1, N-1}, + RAJA::RangeSegment{1, N-1}), + + poly_jacobi2d_lam1, + poly_jacobi2d_lam2 + ); + + } + + } + stopTimer(); + + POLYBENCH_JACOBI_2D_DATA_RESET; + + break; + } +#endif // RUN_RAJA_SEQ + + default : { + std::cout << "\n POLYBENCH_JACOBI_2D : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench-kokkos/POLYBENCH_MVT-Kokkos.cpp b/src/polybench-kokkos/POLYBENCH_MVT-Kokkos.cpp new file mode 100644 index 000000000..36d30946c --- /dev/null +++ b/src/polybench-kokkos/POLYBENCH_MVT-Kokkos.cpp @@ -0,0 +1,186 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "POLYBENCH_MVT.hpp" + +#include "RAJA/RAJA.hpp" + +#include + + +namespace rajaperf +{ +namespace polybench +{ + + +void POLYBENCH_MVT::runKokkosVariant(VariantID vid) +{ + // Kokkos stub + return; + + + const Index_type run_reps= getRunReps(); + + POLYBENCH_MVT_DATA_SETUP; + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = 0; i < N; ++i ) { + POLYBENCH_MVT_BODY1; + for (Index_type j = 0; j < N; ++j ) { + POLYBENCH_MVT_BODY2; + } + POLYBENCH_MVT_BODY3; + } + + for (Index_type i = 0; i < N; ++i ) { + POLYBENCH_MVT_BODY4; + for (Index_type j = 0; j < N; ++j ) { + POLYBENCH_MVT_BODY5; + } + POLYBENCH_MVT_BODY6; + } + + } + stopTimer(); + + break; + } + + +#if defined(RUN_RAJA_SEQ) + case Lambda_Seq : { + + auto poly_mvt_base_lam2 = [=] (Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_MVT_BODY2; + }; + auto poly_mvt_base_lam3 = [=] (Index_type i, + Real_type &dot) { + POLYBENCH_MVT_BODY3; + }; + auto poly_mvt_base_lam5 = [=] (Index_type i, Index_type j, + Real_type &dot) { + POLYBENCH_MVT_BODY5; + }; + auto poly_mvt_base_lam6 = [=] (Index_type i, + Real_type &dot) { + POLYBENCH_MVT_BODY6; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + for (Index_type i = 0; i < N; ++i ) { + POLYBENCH_MVT_BODY1; + for (Index_type j = 0; j < N; ++j ) { + poly_mvt_base_lam2(i, j, dot); + } + poly_mvt_base_lam3(i, dot); + } + + for (Index_type i = 0; i < N; ++i ) { + POLYBENCH_MVT_BODY4; + for (Index_type j = 0; j < N; ++j ) { + poly_mvt_base_lam5(i, j, dot); + } + poly_mvt_base_lam6(i, dot); + } + + } + stopTimer(); + + break; + } + + case RAJA_Seq : { + + POLYBENCH_MVT_VIEWS_RAJA; + + auto poly_mvt_lam1 = [=] (Real_type &dot) { + POLYBENCH_MVT_BODY1_RAJA; + }; + auto poly_mvt_lam2 = [=] (Index_type i, Index_type j, Real_type &dot) { + POLYBENCH_MVT_BODY2_RAJA; + }; + auto poly_mvt_lam3 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_MVT_BODY3_RAJA; + }; + auto poly_mvt_lam4 = [=] (Real_type &dot) { + POLYBENCH_MVT_BODY4_RAJA; + }; + auto poly_mvt_lam5 = [=] (Index_type i, Index_type j, Real_type &dot) { + POLYBENCH_MVT_BODY5_RAJA; + }; + auto poly_mvt_lam6 = [=] (Index_type i, Real_type &dot) { + POLYBENCH_MVT_BODY6_RAJA; + }; + + using EXEC_POL = + RAJA::KernelPolicy< + RAJA::statement::For<0, RAJA::loop_exec, // i + RAJA::statement::Lambda<0, RAJA::Params<0>>, + RAJA::statement::For<1, RAJA::loop_exec, // j + RAJA::statement::Lambda<1, RAJA::Segs<0,1>, RAJA::Params<0>> + >, + RAJA::statement::Lambda<2, RAJA::Segs<0>, RAJA::Params<0>> + > + >; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + RAJA::region( [=]() { + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, N}, + RAJA::RangeSegment{0, N}), + RAJA::tuple{0.0}, + + poly_mvt_lam1, + poly_mvt_lam2, + poly_mvt_lam3 + + ); + + RAJA::kernel_param( + RAJA::make_tuple(RAJA::RangeSegment{0, N}, + RAJA::RangeSegment{0, N}), + RAJA::tuple{0.0}, + + poly_mvt_lam4, + poly_mvt_lam5, + poly_mvt_lam6 + + ); + + }); // end sequential region (for single-source code) + + } + stopTimer(); + + break; + } +#endif // RUN_RAJA_SEQ + + default : { + std::cout << "\n POLYBENCH_MVT : Unknown variant id = " << vid << std::endl; + } + + } + +} + +} // end namespace polybench +} // end namespace rajaperf diff --git a/src/polybench/POLYBENCH_2MM.cpp b/src/polybench/POLYBENCH_2MM.cpp index 7e2083c50..dcf955903 100644 --- a/src/polybench/POLYBENCH_2MM.cpp +++ b/src/polybench/POLYBENCH_2MM.cpp @@ -78,6 +78,9 @@ POLYBENCH_2MM::POLYBENCH_2MM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined(Kokkos_Lambda); + } POLYBENCH_2MM::~POLYBENCH_2MM() diff --git a/src/polybench/POLYBENCH_2MM.hpp b/src/polybench/POLYBENCH_2MM.hpp index 897eb13a3..d5bbea602 100644 --- a/src/polybench/POLYBENCH_2MM.hpp +++ b/src/polybench/POLYBENCH_2MM.hpp @@ -128,6 +128,10 @@ class POLYBENCH_2MM : public KernelBase void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + + + private: Index_type m_ni; Index_type m_nj; diff --git a/src/polybench/POLYBENCH_3MM.cpp b/src/polybench/POLYBENCH_3MM.cpp index 2c06a72ac..bf0eaf916 100644 --- a/src/polybench/POLYBENCH_3MM.cpp +++ b/src/polybench/POLYBENCH_3MM.cpp @@ -86,6 +86,9 @@ POLYBENCH_3MM::POLYBENCH_3MM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + } POLYBENCH_3MM::~POLYBENCH_3MM() diff --git a/src/polybench/POLYBENCH_3MM.hpp b/src/polybench/POLYBENCH_3MM.hpp index 80d0a2fe5..7a7ed08d5 100644 --- a/src/polybench/POLYBENCH_3MM.hpp +++ b/src/polybench/POLYBENCH_3MM.hpp @@ -154,6 +154,8 @@ class POLYBENCH_3MM : public KernelBase void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + private: Index_type m_ni; Index_type m_nj; diff --git a/src/polybench/POLYBENCH_ADI.cpp b/src/polybench/POLYBENCH_ADI.cpp index c36b41050..3e0fdf815 100644 --- a/src/polybench/POLYBENCH_ADI.cpp +++ b/src/polybench/POLYBENCH_ADI.cpp @@ -63,6 +63,9 @@ POLYBENCH_ADI::POLYBENCH_ADI(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + } POLYBENCH_ADI::~POLYBENCH_ADI() diff --git a/src/polybench/POLYBENCH_ADI.hpp b/src/polybench/POLYBENCH_ADI.hpp index bec422925..f33a477d0 100644 --- a/src/polybench/POLYBENCH_ADI.hpp +++ b/src/polybench/POLYBENCH_ADI.hpp @@ -195,6 +195,7 @@ class POLYBENCH_ADI : public KernelBase void runCudaVariant(VariantID vid); void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); private: Index_type m_n; diff --git a/src/polybench/POLYBENCH_ATAX.cpp b/src/polybench/POLYBENCH_ATAX.cpp index e06917239..5a54b9585 100644 --- a/src/polybench/POLYBENCH_ATAX.cpp +++ b/src/polybench/POLYBENCH_ATAX.cpp @@ -65,6 +65,9 @@ POLYBENCH_ATAX::POLYBENCH_ATAX(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + } POLYBENCH_ATAX::~POLYBENCH_ATAX() diff --git a/src/polybench/POLYBENCH_ATAX.hpp b/src/polybench/POLYBENCH_ATAX.hpp index d2c5ec63e..07dd31ec4 100644 --- a/src/polybench/POLYBENCH_ATAX.hpp +++ b/src/polybench/POLYBENCH_ATAX.hpp @@ -116,6 +116,8 @@ class POLYBENCH_ATAX : public KernelBase void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + private: Index_type m_N; Real_ptr m_tmp; diff --git a/src/polybench/POLYBENCH_FDTD_2D.cpp b/src/polybench/POLYBENCH_FDTD_2D.cpp index 59e03721c..a0ac53e93 100644 --- a/src/polybench/POLYBENCH_FDTD_2D.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D.cpp @@ -84,6 +84,9 @@ POLYBENCH_FDTD_2D::POLYBENCH_FDTD_2D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + } POLYBENCH_FDTD_2D::~POLYBENCH_FDTD_2D() diff --git a/src/polybench/POLYBENCH_FDTD_2D.hpp b/src/polybench/POLYBENCH_FDTD_2D.hpp index a1ead28b2..3cb4b6c2c 100644 --- a/src/polybench/POLYBENCH_FDTD_2D.hpp +++ b/src/polybench/POLYBENCH_FDTD_2D.hpp @@ -114,6 +114,8 @@ class POLYBENCH_FDTD_2D : public KernelBase void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + private: Index_type m_nx; Index_type m_ny; diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp index b3306a992..f0a77b1ce 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp @@ -60,6 +60,9 @@ POLYBENCH_FLOYD_WARSHALL::POLYBENCH_FLOYD_WARSHALL(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + } POLYBENCH_FLOYD_WARSHALL::~POLYBENCH_FLOYD_WARSHALL() diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp index ec2bcab9f..5d1c5fdfe 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL.hpp @@ -77,6 +77,8 @@ class POLYBENCH_FLOYD_WARSHALL : public KernelBase void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + private: Index_type m_N; diff --git a/src/polybench/POLYBENCH_GEMM.cpp b/src/polybench/POLYBENCH_GEMM.cpp index a50ac09da..aef2b30ec 100644 --- a/src/polybench/POLYBENCH_GEMM.cpp +++ b/src/polybench/POLYBENCH_GEMM.cpp @@ -70,6 +70,9 @@ POLYBENCH_GEMM::POLYBENCH_GEMM(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + } POLYBENCH_GEMM::~POLYBENCH_GEMM() diff --git a/src/polybench/POLYBENCH_GEMM.hpp b/src/polybench/POLYBENCH_GEMM.hpp index dd9e4a5a7..04b01adfe 100644 --- a/src/polybench/POLYBENCH_GEMM.hpp +++ b/src/polybench/POLYBENCH_GEMM.hpp @@ -100,6 +100,8 @@ class POLYBENCH_GEMM : public KernelBase void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + private: Index_type m_ni; Index_type m_nj; diff --git a/src/polybench/POLYBENCH_GEMVER.cpp b/src/polybench/POLYBENCH_GEMVER.cpp index fce83907a..01f3a8b69 100644 --- a/src/polybench/POLYBENCH_GEMVER.cpp +++ b/src/polybench/POLYBENCH_GEMVER.cpp @@ -79,6 +79,9 @@ POLYBENCH_GEMVER::POLYBENCH_GEMVER(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + } POLYBENCH_GEMVER::~POLYBENCH_GEMVER() diff --git a/src/polybench/POLYBENCH_GEMVER.hpp b/src/polybench/POLYBENCH_GEMVER.hpp index 919f18e5c..0dd0c04ed 100644 --- a/src/polybench/POLYBENCH_GEMVER.hpp +++ b/src/polybench/POLYBENCH_GEMVER.hpp @@ -153,6 +153,8 @@ class POLYBENCH_GEMVER : public KernelBase void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + private: Index_type m_n; Real_type m_alpha; diff --git a/src/polybench/POLYBENCH_GESUMMV.cpp b/src/polybench/POLYBENCH_GESUMMV.cpp index 39cb94510..63ba0e7d9 100644 --- a/src/polybench/POLYBENCH_GESUMMV.cpp +++ b/src/polybench/POLYBENCH_GESUMMV.cpp @@ -59,6 +59,9 @@ POLYBENCH_GESUMMV::POLYBENCH_GESUMMV(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + } POLYBENCH_GESUMMV::~POLYBENCH_GESUMMV() diff --git a/src/polybench/POLYBENCH_GESUMMV.hpp b/src/polybench/POLYBENCH_GESUMMV.hpp index c8cc9e191..35c024852 100644 --- a/src/polybench/POLYBENCH_GESUMMV.hpp +++ b/src/polybench/POLYBENCH_GESUMMV.hpp @@ -99,6 +99,8 @@ class POLYBENCH_GESUMMV : public KernelBase void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + private: Index_type m_N; diff --git a/src/polybench/POLYBENCH_HEAT_3D.cpp b/src/polybench/POLYBENCH_HEAT_3D.cpp index 85fd0ce38..3723fc3b5 100644 --- a/src/polybench/POLYBENCH_HEAT_3D.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D.cpp @@ -70,6 +70,9 @@ POLYBENCH_HEAT_3D::POLYBENCH_HEAT_3D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + } POLYBENCH_HEAT_3D::~POLYBENCH_HEAT_3D() diff --git a/src/polybench/POLYBENCH_HEAT_3D.hpp b/src/polybench/POLYBENCH_HEAT_3D.hpp index b21b56576..f93a08ffd 100644 --- a/src/polybench/POLYBENCH_HEAT_3D.hpp +++ b/src/polybench/POLYBENCH_HEAT_3D.hpp @@ -125,6 +125,8 @@ class POLYBENCH_HEAT_3D : public KernelBase void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + private: Index_type m_N; Index_type m_tsteps; diff --git a/src/polybench/POLYBENCH_JACOBI_1D.cpp b/src/polybench/POLYBENCH_JACOBI_1D.cpp index 48c064780..0dd246434 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D.cpp @@ -67,6 +67,9 @@ POLYBENCH_JACOBI_1D::POLYBENCH_JACOBI_1D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + } POLYBENCH_JACOBI_1D::~POLYBENCH_JACOBI_1D() diff --git a/src/polybench/POLYBENCH_JACOBI_1D.hpp b/src/polybench/POLYBENCH_JACOBI_1D.hpp index 290e26ce0..6990d489b 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D.hpp +++ b/src/polybench/POLYBENCH_JACOBI_1D.hpp @@ -71,6 +71,8 @@ class POLYBENCH_JACOBI_1D : public KernelBase void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + private: Index_type m_N; Index_type m_tsteps; diff --git a/src/polybench/POLYBENCH_JACOBI_2D.cpp b/src/polybench/POLYBENCH_JACOBI_2D.cpp index 9e204bdab..b78ee2134 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D.cpp @@ -69,6 +69,9 @@ POLYBENCH_JACOBI_2D::POLYBENCH_JACOBI_2D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + } POLYBENCH_JACOBI_2D::~POLYBENCH_JACOBI_2D() diff --git a/src/polybench/POLYBENCH_JACOBI_2D.hpp b/src/polybench/POLYBENCH_JACOBI_2D.hpp index 9a57325a1..0b0b104a3 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D.hpp +++ b/src/polybench/POLYBENCH_JACOBI_2D.hpp @@ -91,6 +91,8 @@ class POLYBENCH_JACOBI_2D : public KernelBase void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + private: Index_type m_N; Index_type m_tsteps; diff --git a/src/polybench/POLYBENCH_MVT.cpp b/src/polybench/POLYBENCH_MVT.cpp index ae2749ce5..6af5b5e45 100644 --- a/src/polybench/POLYBENCH_MVT.cpp +++ b/src/polybench/POLYBENCH_MVT.cpp @@ -62,6 +62,9 @@ POLYBENCH_MVT::POLYBENCH_MVT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + } POLYBENCH_MVT::~POLYBENCH_MVT() diff --git a/src/polybench/POLYBENCH_MVT.hpp b/src/polybench/POLYBENCH_MVT.hpp index cb72784ed..37c953c53 100644 --- a/src/polybench/POLYBENCH_MVT.hpp +++ b/src/polybench/POLYBENCH_MVT.hpp @@ -113,6 +113,8 @@ class POLYBENCH_MVT : public KernelBase void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + void runKokkosVariant(VariantID vid); + private: Index_type m_N; Real_ptr m_x1; diff --git a/src/stream-kokkos/ADD-Kokkos.cpp b/src/stream-kokkos/ADD-Kokkos.cpp new file mode 100644 index 000000000..58aa1ad16 --- /dev/null +++ b/src/stream-kokkos/ADD-Kokkos.cpp @@ -0,0 +1,85 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "ADD.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace stream +{ + + + void ADD::runKokkosVariant(VariantID vid) + { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + + ADD_DATA_SETUP; + + // Instiating views using getViewFromPointer + + auto a_view = getViewFromPointer(a, iend); + auto b_view = getViewFromPointer(b, iend); + auto c_view = getViewFromPointer(c, iend); + + + + auto add_lam = [=](Index_type i) { + ADD_BODY; + }; + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + Kokkos::parallel_for("ADD_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i){ + // ADD BODY definition in header: + // c[i] = a[i] + b[i]; + c_view[i] = a_view[i] + b_view[i]; + }); + + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + + default : { + std::cout << "\n ADD : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + + moveDataToHostFromKokkosView(a, a_view, iend); + moveDataToHostFromKokkosView(b, b_view, iend); + moveDataToHostFromKokkosView(c, c_view, iend); + +} + +} // end namespace stream +} // end namespace rajaperf diff --git a/src/stream-kokkos/CMakeLists.txt b/src/stream-kokkos/CMakeLists.txt new file mode 100644 index 000000000..ffe93cce2 --- /dev/null +++ b/src/stream-kokkos/CMakeLists.txt @@ -0,0 +1,19 @@ +############################################################################### +# Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +# and RAJA Performance Suite project contributors. +# See the RAJAPerf/COPYRIGHT file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/../stream) + +blt_add_library( + NAME stream-kokkos + SOURCES ADD-Kokkos.cpp + COPY-Kokkos.cpp + DOT-Kokkos.cpp + MUL-Kokkos.cpp + TRIAD-Kokkos.cpp + DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} + ) diff --git a/src/stream-kokkos/COPY-Kokkos.cpp b/src/stream-kokkos/COPY-Kokkos.cpp new file mode 100644 index 000000000..59dd499f6 --- /dev/null +++ b/src/stream-kokkos/COPY-Kokkos.cpp @@ -0,0 +1,79 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "COPY.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace stream +{ + + void COPY::runKokkosVariant(VariantID vid) + + { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + + COPY_DATA_SETUP; + + auto a_view = getViewFromPointer(a, iend); + auto c_view = getViewFromPointer(c, iend); + + + auto copy_lam = [=](Index_type i) { + COPY_BODY; + }; + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + Kokkos::parallel_for("COPY_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin,iend), + KOKKOS_LAMBDA(Index_type i) { + // DEFINITION IN HEADER: + // c[i] = a[i] ; + c_view[i] = a_view[i]; + }); + + } + Kokkos::fence(); + stopTimer(); + + break; + } + + + default : { + std::cout << "\n COPY : Unknown variant id = " << vid << std::endl; + } + + } + + +#endif //RUN_KOKKOS + + moveDataToHostFromKokkosView(a, a_view, iend); + moveDataToHostFromKokkosView(c, c_view, iend); + +} + +} // end namespace stream +} // end namespace rajaperf diff --git a/src/stream-kokkos/DOT-Kokkos.cpp b/src/stream-kokkos/DOT-Kokkos.cpp new file mode 100644 index 000000000..19a1c00eb --- /dev/null +++ b/src/stream-kokkos/DOT-Kokkos.cpp @@ -0,0 +1,78 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DOT.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace stream +{ + + +void DOT::runKokkosVariant(VariantID vid) { + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + DOT_DATA_SETUP; + + // Instantiation of pointer - wrapped Kokkos views: + auto a_view = getViewFromPointer(a, iend); + auto b_view = getViewFromPointer(b, iend); + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type dot = m_dot_init; + + parallel_reduce("DOT-Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(const int64_t i, Real_type& dot_res){ + + // DOT BODY definition from header: + // dot += a[i] * b[i] ; + dot_res += a_view[i]*b_view[i]; + }, dot); + m_dot += static_cast(dot); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + + default : { + std::cout << "\n DOT : Unknown variant id = " << vid << std::endl; + } + + } + + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(a, a_view, iend); + moveDataToHostFromKokkosView(b, b_view, iend); + +} + +} // end namespace stream +} // end namespace rajaperf diff --git a/src/stream-kokkos/MUL-Kokkos.cpp b/src/stream-kokkos/MUL-Kokkos.cpp new file mode 100644 index 000000000..b68ba6291 --- /dev/null +++ b/src/stream-kokkos/MUL-Kokkos.cpp @@ -0,0 +1,87 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MUL.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace stream +{ + + void MUL::runKokkosVariant(VariantID vid) + { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + + MUL_DATA_SETUP; + + /* from MUL.hpp + #define MUL_DATA_SETUP \ + Real_ptr b = m_b; \ + Real_ptr c = m_c; \ + Real_type alpha = m_alpha + +*/ + auto b_view = getViewFromPointer(b, iend); + auto c_view = getViewFromPointer(c, iend); + + + auto mul_lam = [=](Index_type i) { + MUL_BODY; + }; + + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep =0; irep < run_reps; ++irep) { + + Kokkos::parallel_for("MUL_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + // MUL BODY DEFINITION: + // b[i] = alpha * c[i] ; + b_view[i] = alpha * c_view[i]; + }); + + } + Kokkos::fence(); + stopTimer(); + + break; + + } + + default : { + std::cout << "\n MUL : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(b, b_view, iend); + moveDataToHostFromKokkosView(c, c_view, iend); + + +} + +} // end namespace stream +} // end namespace rajaperf diff --git a/src/stream-kokkos/TRIAD-Kokkos.cpp b/src/stream-kokkos/TRIAD-Kokkos.cpp new file mode 100644 index 000000000..b7b491181 --- /dev/null +++ b/src/stream-kokkos/TRIAD-Kokkos.cpp @@ -0,0 +1,86 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "TRIAD.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace stream +{ + +void TRIAD::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + + TRIAD_DATA_SETUP; +/* + #define TRIAD_DATA_SETUP \ + Real_ptr a = m_a; \ + Real_ptr b = m_b; \ + Real_ptr c = m_c; \ + Real_type alpha = m_alpha; +*/ + + auto a_view = getViewFromPointer(a, iend); + auto b_view = getViewFromPointer(b, iend); + auto c_view = getViewFromPointer(c, iend); + + + auto triad_lam = [=](Index_type i) { + TRIAD_BODY; + }; + +#if defined (RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep =0; irep < run_reps; ++irep) { + Kokkos::parallel_for("TRIAD_Kokkos, Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + // TRIAD_BODY definition in TRIAD.hpp + // a[i] = b[i] + alpha * c[i] ; + a_view[i] = b_view[i] + alpha * c_view[i]; + }); + } + + Kokkos::fence(); + stopTimer(); + + break; + + } + + + default : { + std::cout << "\n TRIAD : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(a, a_view, iend); + moveDataToHostFromKokkosView(b, b_view, iend); + moveDataToHostFromKokkosView(c, c_view, iend); + +} + +} // end namespace stream +} // end namespace rajaperf diff --git a/src/stream/ADD.cpp b/src/stream/ADD.cpp index 200172e60..e8a82d649 100644 --- a/src/stream/ADD.cpp +++ b/src/stream/ADD.cpp @@ -52,6 +52,8 @@ ADD::ADD(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined(Kokkos_Lambda); } ADD::~ADD() diff --git a/src/stream/ADD.hpp b/src/stream/ADD.hpp index 0bf45b810..cce5496e6 100644 --- a/src/stream/ADD.hpp +++ b/src/stream/ADD.hpp @@ -47,6 +47,7 @@ class ADD : public KernelBase void updateChecksum(VariantID vid); void tearDown(VariantID vid); + void runKokkosVariant(VariantID vid); void runSeqVariant(VariantID vid); void runOpenMPVariant(VariantID vid); void runCudaVariant(VariantID vid); diff --git a/src/stream/COPY.cpp b/src/stream/COPY.cpp index d8c7ec1d6..b1edec409 100644 --- a/src/stream/COPY.cpp +++ b/src/stream/COPY.cpp @@ -52,6 +52,8 @@ COPY::COPY(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } COPY::~COPY() diff --git a/src/stream/COPY.hpp b/src/stream/COPY.hpp index 010a391c8..55a6fce4a 100644 --- a/src/stream/COPY.hpp +++ b/src/stream/COPY.hpp @@ -46,6 +46,8 @@ class COPY : public KernelBase void updateChecksum(VariantID vid); void tearDown(VariantID vid); + void runKokkosVariant(VariantID vid); + void runSeqVariant(VariantID vid); void runOpenMPVariant(VariantID vid); void runCudaVariant(VariantID vid); diff --git a/src/stream/DOT.cpp b/src/stream/DOT.cpp index cca4aae4a..75a7afcb0 100644 --- a/src/stream/DOT.cpp +++ b/src/stream/DOT.cpp @@ -52,6 +52,8 @@ DOT::DOT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + // Kokkos info + setVariantDefined(Kokkos_Lambda); } DOT::~DOT() diff --git a/src/stream/DOT.hpp b/src/stream/DOT.hpp index adb9309c4..cb581ef60 100644 --- a/src/stream/DOT.hpp +++ b/src/stream/DOT.hpp @@ -46,11 +46,15 @@ class DOT : public KernelBase void updateChecksum(VariantID vid); void tearDown(VariantID vid); + void runKokkosVariant(VariantID vid); void runSeqVariant(VariantID vid); void runOpenMPVariant(VariantID vid); void runCudaVariant(VariantID vid); void runHipVariant(VariantID vid); void runOpenMPTargetVariant(VariantID vid); + + // Kokkos additions + private: Real_ptr m_a; diff --git a/src/stream/MUL.cpp b/src/stream/MUL.cpp index 6b167de04..7a925e05e 100644 --- a/src/stream/MUL.cpp +++ b/src/stream/MUL.cpp @@ -52,6 +52,8 @@ MUL::MUL(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined(Kokkos_Lambda); } MUL::~MUL() diff --git a/src/stream/MUL.hpp b/src/stream/MUL.hpp index f8fcefbcb..9270b3ad7 100644 --- a/src/stream/MUL.hpp +++ b/src/stream/MUL.hpp @@ -47,6 +47,7 @@ class MUL : public KernelBase void updateChecksum(VariantID vid); void tearDown(VariantID vid); + void runKokkosVariant(VariantID vid); void runSeqVariant(VariantID vid); void runOpenMPVariant(VariantID vid); void runCudaVariant(VariantID vid); diff --git a/src/stream/TRIAD.cpp b/src/stream/TRIAD.cpp index dfa04eda0..3976ff21d 100644 --- a/src/stream/TRIAD.cpp +++ b/src/stream/TRIAD.cpp @@ -56,6 +56,9 @@ TRIAD::TRIAD(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined(Kokkos_Lambda); + } TRIAD::~TRIAD() diff --git a/src/stream/TRIAD.hpp b/src/stream/TRIAD.hpp index 8d2f01236..94f0a653e 100644 --- a/src/stream/TRIAD.hpp +++ b/src/stream/TRIAD.hpp @@ -48,6 +48,8 @@ class TRIAD : public KernelBase void updateChecksum(VariantID vid); void tearDown(VariantID vid); + + void runKokkosVariant(VariantID vid); void runSeqVariant(VariantID vid); void runOpenMPVariant(VariantID vid); void runCudaVariant(VariantID vid); diff --git a/tpl/RAJA b/tpl/RAJA index 0506cea3a..3d0ba43b0 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 0506cea3aaad168de79df59a8df9fc6f27799aa3 +Subproject commit 3d0ba43b0d7ee97a75b5630313e61921dd22f46e diff --git a/tpl/kokkos b/tpl/kokkos new file mode 160000 index 000000000..4af934941 --- /dev/null +++ b/tpl/kokkos @@ -0,0 +1 @@ +Subproject commit 4af9349419d10d14fa42f0eb1bbcf8d8054f29ff