diff --git a/.gitmodules b/.gitmodules index 13f05ecd3..0993afb86 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "tpl/RAJA"] path = tpl/RAJA url = https://github.com/LLNL/RAJA.git +[submodule "tpl/kokkos"] + path = tpl/kokkos + url = https://github.com/kokkos/kokkos diff --git a/CMakeLists.txt b/CMakeLists.txt index 813d1e9b0..83a574af2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,6 +13,7 @@ cmake_minimum_required(VERSION 3.14.5) option(ENABLE_RAJA_SEQUENTIAL "Run sequential variants of RAJA kernels. Disable this, and all other variants, to run _only_ raw C loops." On) +option(ENABLE_KOKKOS "Include Kokkos implementations of the kernels in the RAJA Perfsuite" Off) # # Note: the BLT build system is inheritted by RAJA and is initialized by RAJA @@ -22,8 +23,13 @@ if (PERFSUITE_ENABLE_WARNINGS) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror") endif() -set(CMAKE_CXX_STANDARD 14) -set(BLT_CXX_STD c++14) +if(ENABLE_KOKKOS) + set(CMAKE_CXX_STANDARD 17) + set(BLT_CXX_STD c++17) +else() + set(CMAKE_CXX_STANDARD 14) + set(BLT_CXX_STD c++14) +endif() include(blt/SetupBLT.cmake) @@ -100,7 +106,12 @@ endif() if (ENABLE_CUDA) list(APPEND RAJA_PERFSUITE_DEPENDS cuda) endif() -if (ENABLE_HIP) + +# Kokkos requires hipcc as the CMAKE_CXX_COMPILER for HIP AMD/VEGA GPU +# platforms, whereas RAJAPerf Suite uses blt/CMake FindHIP to set HIP compiler. +# Separate RAJAPerf Suite and Kokkos handling of HIP compilers + +if ((ENABLE_HIP) AND (NOT ENABLE_KOKKOS)) message(STATUS "HIP version: ${hip_VERSION}") if("${hip_VERSION}" VERSION_LESS "3.5") message(FATAL_ERROR "Trying to use HIP/ROCm version ${hip_VERSION}. RAJA Perf Suite requires HIP/ROCm version 3.5 or newer. ") @@ -113,8 +124,13 @@ set(RAJAPERF_BUILD_SYSTYPE $ENV{SYS_TYPE}) set(RAJAPERF_BUILD_HOST $ENV{HOSTNAME}) if (ENABLE_CUDA) - set(CMAKE_CUDA_STANDARD 14) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict -arch ${CUDA_ARCH} --expt-extended-lambda --expt-relaxed-constexpr") + if (ENABLE_KOKKOS) + set(CMAKE_CUDA_STANDARD 17) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict --extended-lambda --expt-relaxed-constexpr") + else() + set(CMAKE_CUDA_STANDARD 14) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict -arch ${CUDA_ARCH} --expt-extended-lambda --expt-relaxed-constexpr") + endif() set(RAJAPERF_COMPILER "${CUDA_NVCC_EXECUTABLE}") list(APPEND RAJAPERF_COMPILER ${CMAKE_CXX_COMPILER}) @@ -135,13 +151,46 @@ configure_file(${CMAKE_SOURCE_DIR}/src/rajaperf_config.hpp.in include_directories($) -# Make sure RAJA flag propagate (we need to do some house cleaning to +# Make sure RAJA flags propagate (we need to do some tidying to # remove project-specific CMake variables that are no longer needed) set (CUDA_NVCC_FLAGS ${RAJA_NVCC_FLAGS}) # # Each directory in the perf suite has its own CMakeLists.txt file. -# + +# ENABLE_KOKKOS is A RAJAPerf Suite Option +if(ENABLE_KOKKOS) + add_definitions(-DRUN_KOKKOS) + if(ENABLE_HIP) + set(Kokkos_ENABLE_HIP ON CACHE BOOL "Kokkos builds for AMD HIP set the +Kokkos_ENABLE_HIP variable to ON") + endif() + + if(ENABLE_TARGET_OPENMP) + set(Kokkos_ENABLE_OPENMPTARGET ON CACHE BOOL "Docstring") + if(NOT CMAKE_BUILD_TYPE MATCHES Debug) + if(NOT EXPERIMENTAL_BUILD) + message(FATAL_ERROR "Kokkos builds with OpenMPTarget require a Debug build to succeed at the moment. Rebuild with CMAKE_BUILD_TYPE=Debug. If you're a compiler developer, rebuild with -DEXPERIMENTAL_BUILD=ON") + endif() + endif() + endif() + +# ENABLE_CUDA IS A RAJA PERFSUITE OPTION + if(ENABLE_CUDA) + set(Kokkos_ENABLE_CUDA ON CACHE BOOL "Docstring") + set(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "Docstring") + enable_language(CUDA) + endif() + if(ENABLE_OPENMP) + set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "Docstring") + endif() + + add_subdirectory(tpl/kokkos) + get_property(KOKKOS_INCLUDE_DIRS DIRECTORY tpl/kokkos PROPERTY INCLUDE_DIRECTORIES) + include_directories(${KOKKOS_INCLUDE_DIRS}) + list(APPEND RAJA_PERFSUITE_DEPENDS kokkos) +endif() + add_subdirectory(src) if (RAJA_PERFSUITE_ENABLE_TESTS) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 598131623..dbc4c6969 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,8 +9,9 @@ include_directories(.) add_subdirectory(common) -add_subdirectory(apps) add_subdirectory(basic) +add_subdirectory(basic-kokkos) +add_subdirectory(apps) add_subdirectory(lcals) add_subdirectory(polybench) add_subdirectory(stream) @@ -20,6 +21,7 @@ set(RAJA_PERFSUITE_EXECUTABLE_DEPENDS common apps basic + basic-kokkos lcals polybench stream diff --git a/src/RAJAPerfSuiteDriver.cpp b/src/RAJAPerfSuiteDriver.cpp index d423dcff9..17e64f0f0 100644 --- a/src/RAJAPerfSuiteDriver.cpp +++ b/src/RAJAPerfSuiteDriver.cpp @@ -6,6 +6,10 @@ // SPDX-License-Identifier: (BSD-3-Clause) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +#ifdef RUN_KOKKOS +#include +#endif + #include "common/Executor.hpp" #include @@ -24,6 +28,9 @@ int main( int argc, char** argv ) MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); rajaperf::getCout() << "\n\nRunning with " << num_ranks << " MPI ranks..." << std::endl; #endif +#ifdef RUN_KOKKOS + Kokkos::initialize(argc, argv); +#endif // STEP 1: Create suite executor object rajaperf::Executor executor(argc, argv); @@ -43,6 +50,9 @@ int main( int argc, char** argv ) rajaperf::getCout() << "\n\nDONE!!!...." << std::endl; +#ifdef RUN_KOKKOS + Kokkos::finalize(); +#endif #ifdef RAJA_PERFSUITE_ENABLE_MPI MPI_Finalize(); #endif diff --git a/src/basic-kokkos/CMakeLists.txt b/src/basic-kokkos/CMakeLists.txt new file mode 100644 index 000000000..bccb29e1d --- /dev/null +++ b/src/basic-kokkos/CMakeLists.txt @@ -0,0 +1,27 @@ +############################################################################### +# Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +# and RAJA Performance Suite project contributors. +# See the RAJAPerf/COPYRIGHT file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +#include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/../basic) + +blt_add_library( + NAME basic-kokkos + SOURCES + PI_ATOMIC-Kokkos.cpp + DAXPY-Kokkos.cpp + IF_QUAD-Kokkos.cpp + INIT3-Kokkos.cpp + INIT_VIEW1D-Kokkos.cpp + INIT_VIEW1D_OFFSET-Kokkos.cpp + MULADDSUB-Kokkos.cpp + NESTED_INIT-Kokkos.cpp + REDUCE3_INT-Kokkos.cpp + TRAP_INT-Kokkos.cpp + DAXPY_ATOMIC-Kokkos.cpp + INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/../basic + DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} + ) diff --git a/src/basic-kokkos/DAXPY-Kokkos.cpp b/src/basic-kokkos/DAXPY-Kokkos.cpp new file mode 100644 index 000000000..6c3ad5e6e --- /dev/null +++ b/src/basic-kokkos/DAXPY-Kokkos.cpp @@ -0,0 +1,69 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DAXPY.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace basic { + +struct DaxpyFunctor { + Real_ptr x; + Real_ptr y; + Real_type a; + DaxpyFunctor(Real_ptr m_x, Real_ptr m_y, Real_type m_a) + : x(m_x), y(m_y), a(m_a) {} + void operator()(Index_type i) const { DAXPY_BODY; } +}; + +void DAXPY::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + DAXPY_DATA_SETUP; + + auto x_view = getViewFromPointer(x, iend); + auto y_view = getViewFromPointer(y, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + Kokkos::parallel_for( + "DAXPY-Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { y_view[i] += a * x_view[i]; }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + default: { + std::cout << "\n DAXPY : Unknown variant id = " << vid << std::endl; + } + } + + // Move data (i.e., pointer, KokkosView-wrapped ponter) back to the host from + // the device + + moveDataToHostFromKokkosView(x, x_view, iend); + moveDataToHostFromKokkosView(y, y_view, iend); +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp b/src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp new file mode 100644 index 000000000..0f90fa86c --- /dev/null +++ b/src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp @@ -0,0 +1,70 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DAXPY_ATOMIC.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +// Delete me +// For de-bugging: +#include "RAJA/RAJA.hpp" + +namespace rajaperf { +namespace basic { + +void DAXPY_ATOMIC::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + DAXPY_ATOMIC_DATA_SETUP; + // + // Kokkos Views to wrap pointers declared in DAXPY_ATOMIC.hpp + // + + auto x_view = getViewFromPointer(x, iend); + auto y_view = getViewFromPointer(y, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for( + "DAXPY_ATOMIC_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + Kokkos::atomic_add(&y_view[i], a * x_view[i]); + }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + getCout() << "\n DAXPY_ATOMIC : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(x, x_view, iend); + moveDataToHostFromKokkosView(y, y_view, iend); +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/IF_QUAD-Kokkos.cpp b/src/basic-kokkos/IF_QUAD-Kokkos.cpp new file mode 100644 index 000000000..e1b8cc601 --- /dev/null +++ b/src/basic-kokkos/IF_QUAD-Kokkos.cpp @@ -0,0 +1,77 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "IF_QUAD.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace basic { + +void IF_QUAD::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + IF_QUAD_DATA_SETUP; + + // Instantiating views using getViewFromPointer for the IF_QUAD definition + + auto a_view = getViewFromPointer(a, iend); + auto b_view = getViewFromPointer(b, iend); + auto c_view = getViewFromPointer(c, iend); + auto x1_view = getViewFromPointer(x1, iend); + auto x2_view = getViewFromPointer(x2, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for( + "IF_QUAD_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + Real_type s = b_view[i] * b_view[i] - 4.0 * a_view[i] * c_view[i]; + if (s >= 0) { + s = sqrt(s); + x2_view[i] = (-b_view[i] + s) / (2.0 * a_view[i]); + x1_view[i] = (-b_view[i] - s) / (2.0 * a_view[i]); + } else { + x2_view[i] = 0.0; + x1_view[i] = 0.0; + } + }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n IF_QUAD : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(a, a_view, iend); + moveDataToHostFromKokkosView(b, b_view, iend); + moveDataToHostFromKokkosView(c, c_view, iend); + moveDataToHostFromKokkosView(x1, x1_view, iend); + moveDataToHostFromKokkosView(x2, x2_view, iend); +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/INIT3-Kokkos.cpp b/src/basic-kokkos/INIT3-Kokkos.cpp new file mode 100644 index 000000000..eef8ffbaa --- /dev/null +++ b/src/basic-kokkos/INIT3-Kokkos.cpp @@ -0,0 +1,73 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INIT3.hpp" +#if defined (RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace basic { + +void INIT3::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + // Instantiating Views using getViewFromPointer for the INIT3 definition + // (i.e., INIT3.hpp) + + INIT3_DATA_SETUP; + + // The pointer is the first argument, and the last index, denoted by iend, is + // your second argument + // + auto out1_view = getViewFromPointer(out1, iend); + auto out2_view = getViewFromPointer(out2, iend); + auto out3_view = getViewFromPointer(out3, iend); + auto in1_view = getViewFromPointer(in1, iend); + auto in2_view = getViewFromPointer(in2, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for( + "INIT3-Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + out1_view[i] = out2_view[i] = out3_view[i] = + -in1_view[i] - in2_view[i]; + }); + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n INIT3 : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(out1, out1_view, iend); + moveDataToHostFromKokkosView(out2, out2_view, iend); + moveDataToHostFromKokkosView(out3, out3_view, iend); + moveDataToHostFromKokkosView(in1, in1_view, iend); + moveDataToHostFromKokkosView(in2, in2_view, iend); +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp new file mode 100644 index 000000000..8d59409d1 --- /dev/null +++ b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp @@ -0,0 +1,58 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INIT_VIEW1D.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace basic { + +void INIT_VIEW1D::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + INIT_VIEW1D_DATA_SETUP; + + auto a_view = getViewFromPointer(a, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for( + "INIT_VIEW1D_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + a_view[i] = (i + 1) * v; + }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n INIT_VIEW1D : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(a, a_view, iend); +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp new file mode 100644 index 000000000..ae03fe752 --- /dev/null +++ b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp @@ -0,0 +1,60 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INIT_VIEW1D_OFFSET.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" + +#include + +namespace rajaperf { +namespace basic { + +void INIT_VIEW1D_OFFSET::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 1; + const Index_type iend = getActualProblemSize() + 1; + + INIT_VIEW1D_OFFSET_DATA_SETUP; + + auto a_view = getViewFromPointer(a, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for( + "INIT_VIEW1D_OFFSET_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { a_view[i - ibegin] = i * v; }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n INIT_VIEW1D_OFFSET : Unknown variant id = " << vid + << std::endl; + } + } + + // Move data from Kokkos View (on Device) back to Host + moveDataToHostFromKokkosView(a, a_view, iend); +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/MULADDSUB-Kokkos.cpp b/src/basic-kokkos/MULADDSUB-Kokkos.cpp new file mode 100644 index 000000000..e81cd17d5 --- /dev/null +++ b/src/basic-kokkos/MULADDSUB-Kokkos.cpp @@ -0,0 +1,72 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MULADDSUB.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" + +#include + +namespace rajaperf { +namespace basic { + +void MULADDSUB::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MULADDSUB_DATA_SETUP; + + // Define Kokkos Views that will wrap pointers defined in MULADDSUB.hpp + auto out1_view = getViewFromPointer(out1, iend); + auto out2_view = getViewFromPointer(out2, iend); + auto out3_view = getViewFromPointer(out3, iend); + auto in1_view = getViewFromPointer(in1, iend); + auto in2_view = getViewFromPointer(in2, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // If SIMD really matters , consider using Kokkos SIMD + Kokkos::parallel_for( + "MULTISUB-KokkosSeq Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + out1_view[i] = in1_view[i] * in2_view[i]; + out2_view[i] = in1_view[i] + in2_view[i]; + out3_view[i] = in1_view[i] - in2_view[i]; + }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n MULADDSUB : Unknown variant id = " << vid << std::endl; + } + } + moveDataToHostFromKokkosView(out1, out1_view, iend); + moveDataToHostFromKokkosView(out2, out2_view, iend); + moveDataToHostFromKokkosView(out3, out3_view, iend); + moveDataToHostFromKokkosView(out3, out3_view, iend); + moveDataToHostFromKokkosView(in1, in1_view, iend); + moveDataToHostFromKokkosView(in2, in2_view, iend); +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/NESTED_INIT-Kokkos.cpp b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp new file mode 100644 index 000000000..f69020e57 --- /dev/null +++ b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp @@ -0,0 +1,78 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "NESTED_INIT.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" + +#include + +namespace rajaperf { +namespace basic { + +void NESTED_INIT::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + + NESTED_INIT_DATA_SETUP; + + // Wrap the nested init array pointer in a Kokkos View + // In a Kokkos View, array arguments for array boundaries go from outmost + // to innermost dimension sizes + // See the basic NESTED_INIT.hpp file for defnition of NESTED_INIT + + auto array_kokkos_view = getViewFromPointer(array, nk, nj, ni); + // + // Used in Kokkos variant (below). Do not remove. + // + auto nestedinit_lam = [=](Index_type i, Index_type j, Index_type k) { + NESTED_INIT_BODY; + }; + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // MDRange can be optimized + Kokkos::parallel_for( + "NESTED_INIT KokkosSeq", + // Range policy to define amount of work to be done + Kokkos::MDRangePolicy, + // Execution space + Kokkos::DefaultExecutionSpace>({0, 0, 0}, + {nk, nj, ni}), + // Loop body + KOKKOS_LAMBDA(Index_type k, Index_type j, Index_type i) { + array_kokkos_view(k, j, i) = 0.00000001 * i * j * k; + }); + } + + Kokkos::fence(); + + stopTimer(); + // Moves mirror data from GPU to CPU (void, i.e., no return type). In + // this moving of data back to Host, the layout is changed back to Layout + // Right, vs. the LayoutLeft of the GPU + moveDataToHostFromKokkosView(array, array_kokkos_view, nk, nj, ni); + + break; + } + + default: { + std::cout << "\n NESTED_INIT : Unknown variant id = " << vid << std::endl; + } + } +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp new file mode 100644 index 000000000..066aca7aa --- /dev/null +++ b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp @@ -0,0 +1,69 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "PI_ATOMIC.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" +#include + +namespace rajaperf { +namespace basic { + +void PI_ATOMIC::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + PI_ATOMIC_DATA_SETUP; + + // Declare Kokkos View that will wrap the pointer defined in PI_ATOMIC.hpp + auto pi_view = getViewFromPointer(pi, 1); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // Initializing a value, pi, on the host + *pi = m_pi_init; + + pi_view = getViewFromPointer(pi, 1); + + Kokkos::parallel_for( + "PI_ATOMIC-Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + double x = (double(i) + 0.5) * dx; + // Make a reference to the 0th element of a 1D view with one + // element + Kokkos::atomic_add(&pi_view(0), dx / (1.0 + x * x)); + }); + // Moving the data on the device (held in the KokkosView) BACK to the + // pointer, pi. + moveDataToHostFromKokkosView(pi, pi_view, 1); + *pi *= 4.0; + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; + } + } +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp new file mode 100644 index 000000000..b1566d619 --- /dev/null +++ b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp @@ -0,0 +1,78 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "REDUCE3_INT.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" + +#include +#include + +namespace rajaperf { +namespace basic { + +void REDUCE3_INT::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + REDUCE3_INT_DATA_SETUP; + + // Declare KokkosView that will wrap the pointer to a vector + + auto vec_view = getViewFromPointer(vec, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Int_type max_value = m_vmax_init; + Int_type min_value = m_vmin_init; + Int_type sum = m_vsum_init; + // ADL: argument-dependent look up here + parallel_reduce( + "REDUCE3-Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(const int64_t i, Int_type &tl_max, Int_type &tl_min, + Int_type &tl_sum) { + Int_type vec_i = vec_view[i]; + if (vec_i > tl_max) + tl_max = vec_i; + if (vec_i < tl_min) + tl_min = vec_i; + tl_sum += vec_i; + }, + Kokkos::Max(max_value), Kokkos::Min(min_value), + sum); + m_vsum += static_cast(sum); + m_vmin = + Kokkos::Experimental::min(m_vmin, static_cast(min_value)); + m_vmax = + Kokkos::Experimental::max(m_vmax, static_cast(max_value)); + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n REDUCE3_INT : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(vec, vec_view, iend); +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/TRAP_INT-Kokkos.cpp b/src/basic-kokkos/TRAP_INT-Kokkos.cpp new file mode 100644 index 000000000..47c4596e6 --- /dev/null +++ b/src/basic-kokkos/TRAP_INT-Kokkos.cpp @@ -0,0 +1,69 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "TRAP_INT.hpp" +#if defined(RUN_KOKKOS) +#include "common/KokkosViewUtils.hpp" + +#include + +namespace rajaperf { +namespace basic { + +// +// Function used in TRAP_INT loop. +// +RAJA_INLINE +// +KOKKOS_FUNCTION +Real_type trap_int_func(Real_type x, Real_type y, Real_type xp, Real_type yp) { + Real_type denom = (x - xp) * (x - xp) + (y - yp) * (y - yp); + denom = 1.0 / sqrt(denom); + return denom; +} + +void TRAP_INT::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + TRAP_INT_DATA_SETUP; + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type trap_integral_val = m_sumx_init; + + Kokkos::parallel_reduce( + "TRAP_INT_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(const int64_t i, Real_type &sumx){TRAP_INT_BODY}, + trap_integral_val); + + m_sumx += static_cast(trap_integral_val) * h; + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n TRAP_INT : Unknown variant id = " << vid << std::endl; + } + } +} + +} // end namespace basic +} // end namespace rajaperf +#endif diff --git a/src/basic/DAXPY.cpp b/src/basic/DAXPY.cpp index 6d6133eb6..69a5a152e 100644 --- a/src/basic/DAXPY.cpp +++ b/src/basic/DAXPY.cpp @@ -51,6 +51,8 @@ DAXPY::DAXPY(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } DAXPY::~DAXPY() diff --git a/src/basic/DAXPY.hpp b/src/basic/DAXPY.hpp index db8501e9f..82a6fd9ff 100644 --- a/src/basic/DAXPY.hpp +++ b/src/basic/DAXPY.hpp @@ -52,6 +52,7 @@ class DAXPY : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/DAXPY_ATOMIC.cpp b/src/basic/DAXPY_ATOMIC.cpp index 1e5d4e00e..200df93db 100644 --- a/src/basic/DAXPY_ATOMIC.cpp +++ b/src/basic/DAXPY_ATOMIC.cpp @@ -51,6 +51,8 @@ DAXPY_ATOMIC::DAXPY_ATOMIC(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } DAXPY_ATOMIC::~DAXPY_ATOMIC() diff --git a/src/basic/DAXPY_ATOMIC.hpp b/src/basic/DAXPY_ATOMIC.hpp index 909939a45..dd52d777c 100644 --- a/src/basic/DAXPY_ATOMIC.hpp +++ b/src/basic/DAXPY_ATOMIC.hpp @@ -55,6 +55,7 @@ class DAXPY_ATOMIC : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/IF_QUAD.cpp b/src/basic/IF_QUAD.cpp index 69396d330..4a8d60035 100644 --- a/src/basic/IF_QUAD.cpp +++ b/src/basic/IF_QUAD.cpp @@ -55,6 +55,8 @@ IF_QUAD::IF_QUAD(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } IF_QUAD::~IF_QUAD() diff --git a/src/basic/IF_QUAD.hpp b/src/basic/IF_QUAD.hpp index 4d2a22c22..a03727a6c 100644 --- a/src/basic/IF_QUAD.hpp +++ b/src/basic/IF_QUAD.hpp @@ -69,6 +69,7 @@ class IF_QUAD : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/INIT3.cpp b/src/basic/INIT3.cpp index fc3fd024d..a504fa914 100644 --- a/src/basic/INIT3.cpp +++ b/src/basic/INIT3.cpp @@ -51,6 +51,8 @@ INIT3::INIT3(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } INIT3::~INIT3() diff --git a/src/basic/INIT3.hpp b/src/basic/INIT3.hpp index 44f3622de..0f89b7c54 100644 --- a/src/basic/INIT3.hpp +++ b/src/basic/INIT3.hpp @@ -55,6 +55,7 @@ class INIT3 : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/INIT_VIEW1D.cpp b/src/basic/INIT_VIEW1D.cpp index bd752aa06..2cb2b2376 100644 --- a/src/basic/INIT_VIEW1D.cpp +++ b/src/basic/INIT_VIEW1D.cpp @@ -52,6 +52,8 @@ INIT_VIEW1D::INIT_VIEW1D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } INIT_VIEW1D::~INIT_VIEW1D() diff --git a/src/basic/INIT_VIEW1D.hpp b/src/basic/INIT_VIEW1D.hpp index b51d38b79..b5dfbf097 100644 --- a/src/basic/INIT_VIEW1D.hpp +++ b/src/basic/INIT_VIEW1D.hpp @@ -66,6 +66,7 @@ class INIT_VIEW1D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/INIT_VIEW1D_OFFSET.cpp b/src/basic/INIT_VIEW1D_OFFSET.cpp index 165cd5544..f31395b07 100644 --- a/src/basic/INIT_VIEW1D_OFFSET.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET.cpp @@ -52,6 +52,8 @@ INIT_VIEW1D_OFFSET::INIT_VIEW1D_OFFSET(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } INIT_VIEW1D_OFFSET::~INIT_VIEW1D_OFFSET() diff --git a/src/basic/INIT_VIEW1D_OFFSET.hpp b/src/basic/INIT_VIEW1D_OFFSET.hpp index be597496d..4cc3548c7 100644 --- a/src/basic/INIT_VIEW1D_OFFSET.hpp +++ b/src/basic/INIT_VIEW1D_OFFSET.hpp @@ -65,6 +65,7 @@ class INIT_VIEW1D_OFFSET : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/MULADDSUB.cpp b/src/basic/MULADDSUB.cpp index d1c180b8e..1d4981ca2 100644 --- a/src/basic/MULADDSUB.cpp +++ b/src/basic/MULADDSUB.cpp @@ -51,6 +51,8 @@ MULADDSUB::MULADDSUB(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } MULADDSUB::~MULADDSUB() diff --git a/src/basic/MULADDSUB.hpp b/src/basic/MULADDSUB.hpp index 30ad11a54..63d637073 100644 --- a/src/basic/MULADDSUB.hpp +++ b/src/basic/MULADDSUB.hpp @@ -58,6 +58,7 @@ class MULADDSUB : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/NESTED_INIT.cpp b/src/basic/NESTED_INIT.cpp index ef9550d97..30cbd0254 100644 --- a/src/basic/NESTED_INIT.cpp +++ b/src/basic/NESTED_INIT.cpp @@ -62,6 +62,8 @@ NESTED_INIT::NESTED_INIT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } NESTED_INIT::~NESTED_INIT() diff --git a/src/basic/NESTED_INIT.hpp b/src/basic/NESTED_INIT.hpp index 13da52cf2..6849c9a73 100644 --- a/src/basic/NESTED_INIT.hpp +++ b/src/basic/NESTED_INIT.hpp @@ -58,6 +58,7 @@ class NESTED_INIT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/PI_ATOMIC.cpp b/src/basic/PI_ATOMIC.cpp index 776883232..607ad1312 100644 --- a/src/basic/PI_ATOMIC.cpp +++ b/src/basic/PI_ATOMIC.cpp @@ -53,6 +53,8 @@ PI_ATOMIC::PI_ATOMIC(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } PI_ATOMIC::~PI_ATOMIC() diff --git a/src/basic/PI_ATOMIC.hpp b/src/basic/PI_ATOMIC.hpp index 10c674dda..e69cbdb56 100644 --- a/src/basic/PI_ATOMIC.hpp +++ b/src/basic/PI_ATOMIC.hpp @@ -54,6 +54,7 @@ class PI_ATOMIC : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/REDUCE3_INT.cpp b/src/basic/REDUCE3_INT.cpp index dee6d3a5e..941d85ac1 100644 --- a/src/basic/REDUCE3_INT.cpp +++ b/src/basic/REDUCE3_INT.cpp @@ -56,6 +56,8 @@ REDUCE3_INT::REDUCE3_INT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } REDUCE3_INT::~REDUCE3_INT() diff --git a/src/basic/REDUCE3_INT.hpp b/src/basic/REDUCE3_INT.hpp index 93ad766c2..c84fa84b2 100644 --- a/src/basic/REDUCE3_INT.hpp +++ b/src/basic/REDUCE3_INT.hpp @@ -70,6 +70,7 @@ class REDUCE3_INT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/TRAP_INT.cpp b/src/basic/TRAP_INT.cpp index 3bf939f38..63da29799 100644 --- a/src/basic/TRAP_INT.cpp +++ b/src/basic/TRAP_INT.cpp @@ -51,6 +51,8 @@ TRAP_INT::TRAP_INT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } TRAP_INT::~TRAP_INT() diff --git a/src/basic/TRAP_INT.hpp b/src/basic/TRAP_INT.hpp index 50acfeb79..eff85b90e 100644 --- a/src/basic/TRAP_INT.hpp +++ b/src/basic/TRAP_INT.hpp @@ -67,6 +67,7 @@ class TRAP_INT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/common/KernelBase.cpp b/src/common/KernelBase.cpp index a07a6bbbb..36efa5170 100644 --- a/src/common/KernelBase.cpp +++ b/src/common/KernelBase.cpp @@ -129,6 +129,14 @@ void KernelBase::setVariantDefined(VariantID vid) #endif break; } +// Required for running Kokkos + case Kokkos_Lambda : + { +#if defined(RUN_KOKKOS) + setKokkosTuningDefinitions(vid); +#endif + break; + } default : { #if 0 @@ -238,6 +246,12 @@ void KernelBase::runKernel(VariantID vid, size_t tune_idx) #endif break; } + case Kokkos_Lambda : + { +#if defined(RUN_KOKKOS) + runKokkosVariant(vid, tune_idx); +#endif + } default : { #if 0 diff --git a/src/common/KernelBase.hpp b/src/common/KernelBase.hpp index 8d74d6e05..ed3429643 100644 --- a/src/common/KernelBase.hpp +++ b/src/common/KernelBase.hpp @@ -90,6 +90,10 @@ class KernelBase virtual void setOpenMPTargetTuningDefinitions(VariantID vid) { addVariantTuningName(vid, getDefaultTuningName()); } #endif +#if defined(RUN_KOKKOS) + virtual void setKokkosTuningDefinitions(VariantID vid) + { addVariantTuningName(vid, getDefaultTuningName()); } +#endif // // Getter methods used to generate kernel execution summary @@ -230,6 +234,12 @@ class KernelBase #if defined(RAJA_ENABLE_TARGET_OPENMP) virtual void runOpenMPTargetVariant(VariantID vid, size_t tune_idx) = 0; #endif +#if defined(RUN_KOKKOS) + virtual void runKokkosVariant(VariantID vid, size_t tune_idx) + { + getCout() << "\n KernelBase: Unimplemented Kokkos variant id = " << vid << std::endl; + } +#endif protected: const RunParams& run_params; diff --git a/src/common/KokkosViewUtils.hpp b/src/common/KokkosViewUtils.hpp new file mode 100644 index 000000000..fc9210f36 --- /dev/null +++ b/src/common/KokkosViewUtils.hpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +/// +/// Types and methods for managing Suite kernels, variants, features, etc.. +/// + +#ifndef KokkosViewUtils_HPP +#define KokkosViewUtils_HPP + +#include "Kokkos_Core.hpp" + +#include +#include + +namespace rajaperf { +template struct PointerOfNdimensions; + +template struct PointerOfNdimensions { + using type = PointedAt; +}; + +template struct PointerOfNdimensions { + using type = + typename PointerOfNdimensions::type *; +}; + +// This templated function is used to wrap pointers +// (declared and defined in RAJAPerf Suite kernels) in Kokkos Views +// +template +auto getViewFromPointer(PointedAt *kokkos_ptr, Boundaries... boundaries) -> + Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultExecutionSpace::memory_space> + +{ + + using host_view_type = typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultHostExecutionSpace::memory_space>; + + using device_view_type = typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultExecutionSpace::memory_space>; + + using mirror_view_type = typename device_view_type::HostMirror; + + host_view_type pointer_holder(kokkos_ptr, boundaries...); + + // The boundaries parameter pack contains the array dimenions; + // An allocation is implicitly made here + device_view_type device_data_copy("StringName", boundaries...); + + mirror_view_type cpu_to_gpu_mirror = + Kokkos::create_mirror_view(device_data_copy); + + Kokkos::deep_copy(cpu_to_gpu_mirror, pointer_holder); + + Kokkos::deep_copy(device_data_copy, cpu_to_gpu_mirror); + + // Kokkos::View return type + + return device_data_copy; +} + +// This function will move data in a Kokkos::View back to host from device, +// and will be stored in the existing pointer(s) +template +void moveDataToHostFromKokkosView(PointedAt *kokkos_ptr, ExistingView my_view, + Boundaries... boundaries) { + + using host_view_type = typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultHostExecutionSpace::memory_space>; + + using device_view_type = typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultExecutionSpace::memory_space>; + + using mirror_view_type = typename device_view_type::HostMirror; + + host_view_type pointer_holder(kokkos_ptr, boundaries...); + + // Layout is optimal for gpu, but data are actually located on CPU + mirror_view_type cpu_to_gpu_mirror = Kokkos::create_mirror_view(my_view); + + // Actual copying of the data from the gpu (my_view) back to the cpu + Kokkos::deep_copy(cpu_to_gpu_mirror, my_view); + + // This copies from the mirror on the host cpu back to the existing + // pointer(s) + Kokkos::deep_copy(pointer_holder, cpu_to_gpu_mirror); +} + +} // namespace rajaperf + +#endif // closing endif for header file include guard diff --git a/src/common/RAJAPerfSuite.cpp b/src/common/RAJAPerfSuite.cpp index 9f66f4bf3..38b393fd8 100644 --- a/src/common/RAJAPerfSuite.cpp +++ b/src/common/RAJAPerfSuite.cpp @@ -274,6 +274,8 @@ static const std::string VariantNames [] = std::string("Lambda_HIP"), std::string("RAJA_HIP"), + std::string("Kokkos_Lambda"), + std::string("Unknown Variant") // Keep this at the end and DO NOT remove.... }; // END VariantNames @@ -418,6 +420,12 @@ bool isVariantAvailable(VariantID vid) } #endif +#if defined(RUN_KOKKOS) + if ( vid == Kokkos_Lambda ) { + ret_val = true; + } +#endif + return ret_val; } @@ -473,6 +481,12 @@ bool isVariantGPU(VariantID vid) } #endif +#if defined(RUN_KOKKOS) + if ( vid == Kokkos_Lambda ) { + ret_val = true; + } +#endif + return ret_val; } diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index 61a6f3bef..6abd4e9a3 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -7,7 +7,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// /// -/// Tyoes and methods for managing Suite kernels, variants, features, etc.. +/// Types and methods for managing Suite kernels, variants, features, etc.. /// #ifndef RAJAPerfSuite_HPP @@ -192,6 +192,8 @@ enum VariantID { Lambda_HIP, RAJA_HIP, + Kokkos_Lambda, + NumVariants // Keep this one last and NEVER comment out (!!) }; diff --git a/tpl/kokkos b/tpl/kokkos new file mode 160000 index 000000000..2834f94af --- /dev/null +++ b/tpl/kokkos @@ -0,0 +1 @@ +Subproject commit 2834f94af9b01debf67c1aaa3f0eb0c903d72c8d