diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b1ce3b48df..edf567f301 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -4,7 +4,7 @@ jobs: build_docker: strategy: matrix: - target: [gcc12_debug, gcc13, clang13, clang14_debug, clang15, rocm6, rocm6_desul, intel2024, intel2024_debug, intel2024_sycl] + target: [gcc12, gcc12_debug, gcc13, gcc13_desul, clang14_debug, clang14_style, clang14_desul, intel2024_0, intel2024_0_debug, intel2024_0_sycl, rocm6_4_3_desul] runs-on: ubuntu-latest steps: - run: | diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 21a3991cd3..22f6664407 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -78,7 +78,7 @@ stages: include: - local: '.gitlab/custom-jobs-and-variables.yml' - project: 'radiuss/radiuss-shared-ci' - ref: 'v2025.06.0' + ref: 'v2025.09.1' file: 'pipelines/${CI_MACHINE}.yml' - artifact: '${CI_MACHINE}-jobs.yml' job: 'generate-job-lists' @@ -107,7 +107,7 @@ include: file: 'id_tokens.yml' # [Optional] checks preliminary to running the actual CI test - project: 'radiuss/radiuss-shared-ci' - ref: 'v2025.06.0' + ref: 'v2025.09.1' file: 'utilities/preliminary-ignore-draft-pr.yml' # pipelines subscribed by the project - local: '.gitlab/subscribed-pipelines.yml' diff --git a/.gitlab/custom-jobs-and-variables.yml b/.gitlab/custom-jobs-and-variables.yml index 51531b2132..b578273ed2 100644 --- a/.gitlab/custom-jobs-and-variables.yml +++ b/.gitlab/custom-jobs-and-variables.yml @@ -22,6 +22,16 @@ variables: # Project specific deps for dane PROJECT_DANE_DEPS: +# Matrix +# Arguments for top level allocation + MATRIX_SHARED_ALLOC: "--exclusive --partition=pci --time=60 --nodes=1" +# Arguments for job level allocation + MATRIX_JOB_ALLOC: "--partition=pci --nodes=1" +# Project specific variants for matrix + PROJECT_MATRIX_VARIANTS: "~shared +cuda cuda_arch=90 +tests" +# Project specific deps for matrix + PROJECT_MATRIX_DEPS: + # Corona # Arguments for top level allocation CORONA_SHARED_ALLOC: "--exclusive --time-limit=60m --nodes=1 -o per-resource.count=2" @@ -44,7 +54,7 @@ variables: # Tuolumne # Arguments for top level allocation - TUOLUMNE_SHARED_ALLOC: "--exclusive --queue=pci --time-limit=60m --nodes=1 -o per-resource.count=2" + TUOLUMNE_SHARED_ALLOC: "--exclusive --queue=pci --time-limit=90m --nodes=1 -o per-resource.count=2" # Arguments for job level allocation TUOLUMNE_JOB_ALLOC: "--nodes=1 --begin-time=+5s" # Project specific variants for corona diff --git a/.gitlab/jobs/corona.yml b/.gitlab/jobs/corona.yml index 352ba7135a..64daeb9fd1 100644 --- a/.gitlab/jobs/corona.yml +++ b/.gitlab/jobs/corona.yml @@ -17,7 +17,7 @@ # project. We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} # So that the comparison with the original job is easier. -# No overridden jobs so far. +## rocm 5.7.1 job is in the shared RSC set of jobs ############ # Extra jobs @@ -26,16 +26,20 @@ # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. +rocmcc_5_7_1_hip_openmp: + variables: + SPEC: " ~shared +rocm +openmp +tests +vectorization amdgpu_target=gfx906 %llvm-amdgpu@=5.7.1 ^hip@5.7.1" + extends: .job_on_corona + rocmcc_5_7_1_hip_desul_atomics: variables: - SPEC: " ~shared +rocm ~openmp +tests +desul amdgpu_target=gfx906 %rocmcc@=5.7.1 ^hip@5.7.1" + SPEC: " ~shared +rocm ~openmp +tests +desul amdgpu_target=gfx906 %llvm-amdgpu@=5.7.1 ^hip@5.7.1" extends: .job_on_corona -clang_20_0_0_sycl_gcc_10_3_1_rocmcc_6_0_2_hip: +clang_22_0_0_sycl_gcc_10_3_1_rocmcc_6_4_2: variables: - SPEC: " ~shared +sycl ~openmp +tests %clang@=20.0.0 cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\"" - MODULE_LIST: "rocm/6.0.2" - SYCL_PATH: "/usr/WS2/raja-dev/clang_sycl_730cd3a5275f_hip_gcc10.3.1_rocm6.0.2/" + SPEC: " ~shared +sycl ~openmp +tests cxxflags==\"-w -fsycl -fsycl-unnamed-lambda -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906\" %sycl-clang-22-gcc-10" + MODULE_LIST: "rocm/6.4.2" + SYCL_PATH: "/usr/workspace/raja-dev/clang_sycl_16b7bcb09915_hip_gcc10.3.1_rocm6.4.2" LD_LIBRARY_PATH: "${SYCL_PATH}/lib:${SYCL_PATH}/lib64:${LD_LIBRARY_PATH}" extends: .job_on_corona - diff --git a/.gitlab/jobs/dane.yml b/.gitlab/jobs/dane.yml index b5b45541e7..33dbfd46f8 100644 --- a/.gitlab/jobs/dane.yml +++ b/.gitlab/jobs/dane.yml @@ -17,24 +17,23 @@ # project. We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} # when possible so that the comparison with the original job is easier. -# Identical to shared job, but use OpenMP tasks and no vectorization -clang_14_0_6: - variables: - SPEC: " ~shared +openmp +omptask ~vectorization +tests %clang@=14.0.6 ${PROJECT_POODLE_DEPS}" +# Shared job uses %clang-19-gcc-13, replaced with %clang-19 below +clang_19_1_3_gcc_13_3_1: extends: .job_on_dane - -# Identical to shared job, but use OpenMP tasks and no vectorization -gcc_10_3_1: variables: - SPEC: " ~shared +openmp +omptask ~vectorization +tests %gcc@=10.3.1 ${PROJECT_POODLE_DEPS}" + ON_DANE: "OFF" + +# Shared job uses %oneapi-2025-gcc-10, replaced with %oneapi@=2025.2 below +oneapi_2025_2_0_gcc_10_3_1: extends: .job_on_dane + variables: + ON_DANE: "OFF" + +## gcc 13.3.1 job is in the shared RSC set of jobs -# custom variant +# Similar to shared RSC job, but use OpenMP task, turn off vectorization, +# and lower optimization level to get correct results # https://github.com/LLNL/RAJA/pull/1712#issuecomment-2292006843 -oneapi_2023_2_1: - variables: - SPEC: "${PROJECT_POODLE_VARIANTS} +lowopttest cxxflags==-fp-model=precise %oneapi@=2023.2.1 ${PROJECT_POODLE_DEPS}" - extends: .job_on_dane ############ # Extra jobs @@ -43,18 +42,32 @@ oneapi_2023_2_1: # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -clang_14_0_6_openmp_off: +clang_14_0_6: + variables: + SPEC: " ~shared +openmp +omptask ~vectorization +tests %llvm@=14.0.6" + extends: .job_on_dane + +clang_19_1_3: + variables: + SPEC: " ~shared +openmp +tests %llvm@=19.1.3" + extends: .job_on_dane + +gcc_10_3_1: + variables: + SPEC: " ~shared +openmp +omptask ~vectorization +tests %gcc@=10.3.1 ${PROJECT_DANE_DEPS}" + extends: .job_on_dane + +gcc_12_1_1: variables: - SPEC: " ~shared ~openmp +tests %clang@=14.0.6" + SPEC: " ~shared +openmp +tests %gcc@=12.1.1" extends: .job_on_dane -gcc_10_3_1_openmp_default: +clang_14_0_6_gcc_11_2_1_desul_atomics: variables: - SPEC: " ~shared +tests %gcc@=10.3.1" + SPEC: " ~shared +openmp +tests +desul %clang-14-gcc-11" extends: .job_on_dane -# OTHERS -clang_14_0_6_gcc_10_3_1_desul_atomics: +oneapi_2025_2_0: variables: - SPEC: " ~shared +openmp +tests +desul %clang@=14.0.6.gcc.10.3.1" + SPEC: "~shared +openmp +vectorization +tests cxxflags==-fp-model=precise %oneapi@=2025.2.0" extends: .job_on_dane diff --git a/.gitlab/jobs/matrix.yml b/.gitlab/jobs/matrix.yml new file mode 100644 index 0000000000..aab48b372c --- /dev/null +++ b/.gitlab/jobs/matrix.yml @@ -0,0 +1,39 @@ +############################################################################## +# Copyright (c) 2016-25, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJA/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################## + +# Override reproducer section to define project specific variables. +.matrix_reproducer_vars: + script: + - !reference [.reproducer_vars, script] + +######################## +# Overridden shared jobs +######################## +# We duplicate the shared jobs description and add necessary changes for this +# project. We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} +# when possible so that the comparison with the original job is easier. + +# Shared job uses toolchain %clang-18-gcc-13, replaced with %llvm@=18.1.8 below +clang_18_1_1_gcc_13_cuda_12_6_0: + extends: .job_on_matrix + variables: + ON_MATRIX: "OFF" + +############ +# Extra jobs +############ +# We do not recommend using ${PROJECT__VARIANTS} and +# ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully +# describe the spec here. + +# clang_18_1_8_cuda_12_6_0 +clang_18_1_8_cuda_12_6_0: + variables: + SPEC: " ~shared +cuda cuda_arch=90 %llvm@=18.1.8 ^cuda@12.6.0+allow-unsupported-compilers" + extends: .job_on_matrix + allow_failure: true + diff --git a/.gitlab/jobs/tioga.yml b/.gitlab/jobs/tioga.yml index bc4abc8755..68fdc4a8f6 100644 --- a/.gitlab/jobs/tioga.yml +++ b/.gitlab/jobs/tioga.yml @@ -17,13 +17,9 @@ # project. We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} # So that the comparison with the original job is easier. -# Compiler error preventing a test to succeed. -# https://github.com/LLNL/RAJA/pull/1712#issuecomment-2316335119 -cce_18_0_1: - variables: - SPEC: "${PROJECT_TIOGA_VARIANTS} %cce@=18.0.1 ${PROJECT_TIOGA_DEPS}" - extends: .job_on_tioga - allow_failure: true +## NOTE: rocm 6.4.1 job is in the shared RSC set of jobs + +## NOTE: cce 20.0.0 job is in the shared RSC set of jobs ############ # Extra jobs @@ -32,17 +28,12 @@ cce_18_0_1: # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -cce_19_0_0: - variables: - SPEC: "${PROJECT_TIOGA_VARIANTS} %cce@=19.0.0 ${PROJECT_TIOGA_DEPS}" - extends: .job_on_tioga - rocmcc_6_4_1_hip_desul_atomics: variables: - SPEC: "~shared +rocm ~openmp +desul +tests amdgpu_target=gfx90a %rocmcc@=6.4.1 ^hip@6.4.1" + SPEC: "~shared +rocm ~openmp +desul +tests amdgpu_target=gfx90a %llvm-amdgpu@=6.4.1 ^hip@6.4.1" extends: .job_on_tioga rocmcc_6_4_1_hip_openmp: variables: - SPEC: "~shared +rocm +openmp +omptask +tests amdgpu_target=gfx90a %rocmcc@=6.4.1 ^hip@6.4.1" + SPEC: "~shared +rocm +openmp +omptask +tests amdgpu_target=gfx90a %llvm-amdgpu@=6.4.1 ^hip@6.4.1" extends: .job_on_tioga diff --git a/.gitlab/jobs/tuolumne.yml b/.gitlab/jobs/tuolumne.yml index 7ea3f58752..59c1c462b7 100644 --- a/.gitlab/jobs/tuolumne.yml +++ b/.gitlab/jobs/tuolumne.yml @@ -17,13 +17,9 @@ # project. We keep ${PROJECT__VARIANTS} and ${PROJECT__DEPS} # So that the comparison with the original job is easier. -# Compiler error preventing a test to succeed. -# https://github.com/LLNL/RAJA/pull/1712#issuecomment-2316335119 -cce_18_0_1: - variables: - SPEC: "${PROJECT_TUOLUMNE_VARIANTS} %cce@=18.0.1 ${PROJECT_TUOLUMNE_DEPS}" - extends: .job_on_tuolumne - allow_failure: true +## cce 20.0.0 job is in the shared RSC set of jobs + +## rocm 6.4.1 job is in the shared RSC set of jobs ############ # Extra jobs @@ -32,17 +28,12 @@ cce_18_0_1: # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. -cce_19_0_0: - variables: - SPEC: "${PROJECT_TUOLUMNE_VARIANTS} %cce@=19.0.0 ${PROJECT_TUOLUMNE_DEPS}" - extends: .job_on_tuolumne - rocmcc_6_4_1_hip_desul_atomics: variables: - SPEC: "~shared +rocm ~openmp +desul +tests amdgpu_target=gfx942 %rocmcc@=6.4.1 ^hip@6.4.1" + SPEC: "~shared +rocm ~openmp +desul +tests amdgpu_target=gfx942 %llvm-amdgpu@=6.4.1 ^hip@6.4.1" extends: .job_on_tuolumne rocmcc_6_4_1_hip_openmp: variables: - SPEC: "~shared +rocm +openmp +omptask +tests amdgpu_target=gfx942 %rocmcc@=6.4.1 ^hip@6.4.1" + SPEC: "~shared +rocm +openmp +omptask +tests amdgpu_target=gfx942 %llvm-amdgpu@=6.4.1 ^hip@6.4.1" extends: .job_on_tuolumne diff --git a/.gitlab/subscribed-pipelines.yml b/.gitlab/subscribed-pipelines.yml index 95d8a65348..1502195750 100644 --- a/.gitlab/subscribed-pipelines.yml +++ b/.gitlab/subscribed-pipelines.yml @@ -38,12 +38,14 @@ generate-job-lists: LOCAL_JOBS_PATH: ".gitlab/jobs" script: - cat ${RADIUSS_JOBS_PATH}/dane.yml ${LOCAL_JOBS_PATH}/dane.yml > dane-jobs.yml + - cat ${RADIUSS_JOBS_PATH}/matrix.yml ${LOCAL_JOBS_PATH}/matrix.yml > matrix-jobs.yml - cat ${RADIUSS_JOBS_PATH}/corona.yml ${LOCAL_JOBS_PATH}/corona.yml > corona-jobs.yml - cat ${RADIUSS_JOBS_PATH}/tioga.yml ${LOCAL_JOBS_PATH}/tioga.yml > tioga-jobs.yml - cat ${RADIUSS_JOBS_PATH}/tuolumne.yml ${LOCAL_JOBS_PATH}/tuolumne.yml > tuolumne-jobs.yml artifacts: paths: - dane-jobs.yml + - matrix-jobs.yml - corona-jobs.yml - tioga-jobs.yml - tuolumne-jobs.yml @@ -53,45 +55,101 @@ dane-up-check: variables: CI_MACHINE: "dane" extends: [.machine-check] + rules: + # Runs except if we explicitly deactivate dane by variable. + - if: '$ON_DANE == "OFF"' + when: never + - when: on_success dane-build-and-test: variables: CI_MACHINE: "dane" needs: [dane-up-check, generate-job-lists] extends: [.build-and-test] + rules: + # Runs except if we explicitly deactivate dane by variable. + - if: '$ON_DANE == "OFF"' + when: never + - when: on_success + +# MATRIX +matrix-up-check: + variables: + CI_MACHINE: "matrix" + extends: [.machine-check] + rules: + # Runs except if we explicitly deactivate matrix by variable. + - if: '$ON_MATRIX == "OFF"' + when: never + - when: on_success + +matrix-build-and-test: + variables: + CI_MACHINE: "matrix" + needs: [matrix-up-check, generate-job-lists] + extends: [.build-and-test] + rules: + # Runs except if we explicitly deactivate matrix by variable. + - if: '$ON_MATRIX == "OFF"' + when: never + - when: on_success # CORONA corona-up-check: variables: CI_MACHINE: "corona" extends: [.machine-check] + rules: + - if: '$ON_CORONA == "OFF"' + when: never + - when: on_success corona-build-and-test: variables: CI_MACHINE: "corona" needs: [corona-up-check, generate-job-lists] extends: [.build-and-test] + rules: + - if: '$ON_CORONA == "OFF"' + when: never + - when: on_success # TIOGA tioga-up-check: variables: CI_MACHINE: "tioga" extends: [.machine-check] + rules: + - if: '$ON_TIOGA == "OFF"' + when: never + - when: on_success tioga-build-and-test: variables: CI_MACHINE: "tioga" needs: [tioga-up-check, generate-job-lists] extends: [.build-and-test] + rules: + - if: '$ON_TIOGA == "OFF"' + when: never + - when: on_success # TUOLUMNE tuolumne-up-check: variables: CI_MACHINE: "tuolumne" extends: [.machine-check] + rules: + - if: '$ON_TUOLUMNE == "OFF"' + when: never + - when: on_success tuolumne-build-and-test: variables: CI_MACHINE: "tuolumne" needs: [tuolumne-up-check, generate-job-lists] extends: [.build-and-test] + rules: + - if: '$ON_TUOLUMNE == "OFF"' + when: never + - when: on_success diff --git a/.uberenv_config.json b/.uberenv_config.json index 6f64e983a6..b16440eb3f 100644 --- a/.uberenv_config.json +++ b/.uberenv_config.json @@ -4,8 +4,8 @@ "package_final_phase" : "initconfig", "package_source_dir" : "../..", "spack_url": "https://github.com/spack/spack.git", -"spack_commit": "280017a9ba3f6a969743deca0eebc96e7c0e5cfd", +"spack_branch": "v1.0.2", "spack_configs_path": "scripts/radiuss-spack-configs", -"spack_packages_path": "scripts/radiuss-spack-configs/packages", +"spack_packages_path": "scripts/radiuss-spack-configs/spack_repo/llnl_radiuss/packages", "spack_setup_clingo": false } diff --git a/CMakeLists.txt b/CMakeLists.txt index 6a2d862ab0..9438c31cdd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,8 +15,8 @@ include(CMakeDependentOption) # Set version number set(RAJA_VERSION_MAJOR 2025) -set(RAJA_VERSION_MINOR 09) -set(RAJA_VERSION_PATCHLEVEL 1) +set(RAJA_VERSION_MINOR 12) +set(RAJA_VERSION_PATCHLEVEL 0) if (RAJA_LOADED AND (NOT RAJA_LOADED STREQUAL "${RAJA_VERSION_MAJOR}.${RAJA_VERSION_MINOR}.${RAJA_VERSION_PATCHLEVEL}")) message(FATAL_ERROR "You are mixing RAJA versions. Loaded is ${RAJA_LOADED}, expected ${RAJA_VERSION_MAJOR}.${RAJA_VERSION_MINOR}.${RAJA_VERSION_PATCHLEVEL}") diff --git a/Dockerfile b/Dockerfile index d20809d639..b605df6fda 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,68 +6,53 @@ ############################################################################### ## -## Note that we build with 'make -j 16' on GitHub Actions and -## with 'make -j 6' on Azure. This is reflected in the 'make' commands below. -## This seems to work best for throughput. +## Note that we build with 'make -j 16' for newer targets and 'make -j 6' +## for older targets on GitHub Actions. This is reflected in the 'make' +## commands below. This seems to work best for throughput. ## -FROM ghcr.io/llnl/radiuss:gcc-11-ubuntu-22.04 AS gcc11 +FROM ghcr.io/llnl/radiuss:gcc-12-ubuntu-24.04 AS gcc12 ENV GTEST_COLOR=1 COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build -RUN cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DRAJA_ENABLE_WARNINGS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=On -DENABLE_OPENMP=On .. && \ - make -j 6 &&\ +RUN cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DRAJA_ENABLE_WARNINGS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=On -DENABLE_OPENMP=On -DBLT_CXX_STD=c++17 .. && \ + make -j 16 &&\ ctest -T test --output-on-failure && \ make clean -FROM ghcr.io/llnl/radiuss:gcc-12-ubuntu-22.04 AS gcc12 +FROM ghcr.io/llnl/radiuss:gcc-12-ubuntu-24.04 AS gcc12_debug ENV GTEST_COLOR=1 COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build -RUN cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DRAJA_ENABLE_WARNINGS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=On -DENABLE_OPENMP=On .. && \ - make -j 6 &&\ +RUN cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Debug -DRAJA_ENABLE_WARNINGS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=On -DENABLE_OPENMP=On -DBLT_CXX_STD=c++17 .. && \ + make -j 16 &&\ ctest -T test --output-on-failure && \ make clean -FROM ghcr.io/llnl/radiuss:gcc-12-ubuntu-22.04 AS gcc12_debug +FROM ghcr.io/llnl/radiuss:gcc-13-ubuntu-24.04 AS gcc13 ENV GTEST_COLOR=1 COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build -RUN cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Debug -DRAJA_ENABLE_WARNINGS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=On -DENABLE_OPENMP=On .. && \ +RUN cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DRAJA_ENABLE_WARNINGS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=On -DENABLE_OPENMP=On -DBLT_CXX_STD=c++17 .. && \ make -j 16 &&\ - ctest -T test --output-on-failure - -FROM ghcr.io/llnl/radiuss:gcc-12-ubuntu-22.04 AS gcc12_desul -ENV GTEST_COLOR=1 -COPY . /home/raja/workspace -WORKDIR /home/raja/workspace/build -RUN cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DRAJA_ENABLE_WARNINGS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=On -DENABLE_OPENMP=On -DRAJA_ENABLE_DESUL_ATOMICS=On .. && \ - make -j 6 &&\ ctest -T test --output-on-failure && \ make clean -FROM ghcr.io/llnl/radiuss:gcc-13-ubuntu-22.04 AS gcc13 +FROM ghcr.io/llnl/radiuss:gcc-13-ubuntu-24.04 AS gcc13_desul ENV GTEST_COLOR=1 COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build -RUN cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DRAJA_ENABLE_WARNINGS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=On -DENABLE_OPENMP=On .. && \ +RUN cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DRAJA_ENABLE_WARNINGS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=On -DENABLE_OPENMP=On -DRAJA_ENABLE_DESUL_ATOMICS=On -DBLT_CXX_STD=c++17 .. && \ make -j 16 &&\ - ctest -T test --output-on-failure - -FROM ghcr.io/llnl/radiuss:clang-13-ubuntu-22.04 AS clang13 -ENV GTEST_COLOR=1 -COPY . /home/raja/workspace -WORKDIR /home/raja/workspace/build -RUN cmake -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DENABLE_OPENMP=On .. && \ - make -j 16 &&\ - ctest -T test --output-on-failure + ctest -T test --output-on-failure && \ + make clean FROM ghcr.io/llnl/radiuss:clang-14-ubuntu-22.04 AS clang14_debug ENV GTEST_COLOR=1 COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build -RUN cmake -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Debug -DENABLE_OPENMP=On .. && \ - make -j 6 &&\ +RUN cmake -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Debug -DENABLE_OPENMP=On -DBLT_CXX_STD=c++17 .. && \ + make -j 16 &&\ ctest -T test --output-on-failure && \ make clean @@ -80,72 +65,49 @@ RUN clang-format --version && \ cmake -DENABLE_CLANGFORMAT=ON ../ && \ make check -FROM ghcr.io/llnl/radiuss:clang-15-ubuntu-22.04 AS clang15 +FROM ghcr.io/llnl/radiuss:clang-14-ubuntu-22.04 AS clang14_desul ENV GTEST_COLOR=1 COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build -RUN cmake -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DRAJA_ENABLE_RUNTIME_PLUGINS=ON -DENABLE_OPENMP=On .. && \ +RUN cmake -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DENABLE_OPENMP=On -DRAJA_ENABLE_DESUL_ATOMICS=On -DBLT_CXX_STD=c++17 .. && \ make -j 16 &&\ - ctest -T test --output-on-failure - -FROM ghcr.io/llnl/radiuss:clang-15-ubuntu-22.04 AS clang15_desul -ENV GTEST_COLOR=1 -COPY . /home/raja/workspace -WORKDIR /home/raja/workspace/build -RUN cmake -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DENABLE_OPENMP=On -DRAJA_ENABLE_DESUL_ATOMICS=On .. && \ - make -j 6 &&\ ctest -T test --output-on-failure && \ make clean -## Test run failure in RAJA launch tests with new reducer interface. -## Need to figure out best way to handle that. -FROM ghcr.io/llnl/radiuss:ubuntu-20.04-intel-2024.0 AS intel2024 +## Don't run tests due to failure in RAJA launch tests with new reducer interface. +FROM ghcr.io/llnl/radiuss:intel-2024.0-ubuntu-20.04 AS intel2024_0 ENV GTEST_COLOR=1 COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build RUN /bin/bash -c "source /opt/intel/oneapi/setvars.sh 2>&1 > /dev/null && \ - cmake -DCMAKE_CXX_COMPILER=icpx -DCMAKE_BUILD_TYPE=Release -DENABLE_OPENMP=On .. && \ - make -j 16" -## make -j 16 &&\ -## ctest -T test --output-on-failure" + cmake -DCMAKE_CXX_COMPILER=icpx -DCMAKE_BUILD_TYPE=Release -DENABLE_OPENMP=On -DBLT_CXX_STD=c++17 .. && \ + make -j 16 &&\ + make clean" -## Test run failure in RAJA launch tests with new reducer interface. -## Need to figure out best way to handle that. -FROM ghcr.io/llnl/radiuss:ubuntu-20.04-intel-2024.0 AS intel2024_debug +## Don't run tests due to failure in RAJA launch tests with new reducer interface. +FROM ghcr.io/llnl/radiuss:intel-2024.0-ubuntu-20.04 AS intel2024_0_debug ENV GTEST_COLOR=1 COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build RUN /bin/bash -c "source /opt/intel/oneapi/setvars.sh 2>&1 > /dev/null && \ - cmake -DCMAKE_CXX_COMPILER=icpx -DCMAKE_BUILD_TYPE=Debug -DENABLE_OPENMP=On .. && \ - make -j 16" -## make -j 16 &&\ -## ctest -T test --output-on-failure" - -## -## Need to find a viable cuda image to test... -## + cmake -DCMAKE_CXX_COMPILER=icpx -DCMAKE_BUILD_TYPE=Debug -DENABLE_OPENMP=On -DBLT_CXX_STD=c++17 .. && \ + make -j 16 &&\ + make clean" -FROM ghcr.io/llnl/radiuss:hip-6.0.2-ubuntu-20.04 AS rocm6 +FROM ghcr.io/llnl/radiuss:intel-2024.0-ubuntu-20.04 AS intel2024_0_sycl ENV GTEST_COLOR=1 -ENV HCC_AMDGPU_TARGET=gfx900 COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build -RUN cmake -DCMAKE_CXX_COMPILER=/opt/rocm-6.0.2/bin/amdclang++ -DROCM_PATH=/opt/rocm-6.0.2 -DCMAKE_BUILD_TYPE=Release -DENABLE_HIP=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=Off .. && \ - make -j 16 +RUN /bin/bash -c "source /opt/intel/oneapi/setvars.sh 2>&1 > /dev/null && \ + cmake -DCMAKE_CXX_COMPILER=dpcpp -DCMAKE_BUILD_TYPE=Release -DENABLE_OPENMP=Off -DRAJA_ENABLE_SYCL=On -DRAJA_ENABLE_DESUL_ATOMICS=On -DBLT_CXX_STD=c++17 .. && \ + make -j 16 &&\ + make clean" -FROM ghcr.io/llnl/radiuss:hip-6.0.2-ubuntu-20.04 AS rocm6_desul +FROM ghcr.io/llnl/radiuss:hip-6.4.3-ubuntu-24.04 AS rocm6_4_3_desul ENV GTEST_COLOR=1 ENV HCC_AMDGPU_TARGET=gfx900 COPY . /home/raja/workspace WORKDIR /home/raja/workspace/build -RUN cmake -DCMAKE_CXX_COMPILER=/opt/rocm-6.0.2/bin/amdclang++ -DROCM_PATH=/opt/rocm-6.0.2 -DCMAKE_BUILD_TYPE=Release -DENABLE_HIP=On -DRAJA_ENABLE_DESUL_ATOMICS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=Off .. && \ - make -j 16 - -FROM ghcr.io/llnl/radiuss:intel-2024.0-ubuntu-20.04 AS intel2024_sycl -ENV GTEST_COLOR=1 -COPY . /home/raja/workspace -WORKDIR /home/raja/workspace/build -RUN /bin/bash -c "source /opt/intel/oneapi/setvars.sh 2>&1 > /dev/null && \ - cmake -DCMAKE_CXX_COMPILER=dpcpp -DCMAKE_BUILD_TYPE=Release -DENABLE_OPENMP=Off -DRAJA_ENABLE_SYCL=On -DBLT_CXX_STD=c++17 -DRAJA_ENABLE_DESUL_ATOMICS=On .. && \ - make -j 4" - +RUN cmake -DCMAKE_CXX_COMPILER=/opt/rocm-6.4.3/bin/amdclang++ -DROCM_PATH=/opt/rocm-6.4.3 -DCMAKE_BUILD_TYPE=Release -DENABLE_HIP=On -DRAJA_ENABLE_DESUL_ATOMICS=On -DRAJA_ENABLE_WARNINGS_AS_ERRORS=Off -DBLT_CXX_STD=c++17 .. && \ + make -j 16 &&\ + make clean diff --git a/README.md b/README.md index d788dd65d4..1e2097d408 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,6 @@ # RAJA -[![Azure Pipeline Build Status](https://dev.azure.com/LLNL-RAJA/RAJA/_apis/build/status/LLNL.RAJA?branchName=develop)](https://dev.azure.com/LLNL-RAJA/RAJA/_build/latest?definitionId=1&branchName=develop) [![Documentation Status](https://readthedocs.org/projects/raja/badge/?version=develop)](https://raja.readthedocs.io/en/develop/?badge=develop) [![Coverage](https://img.shields.io/codecov/c/github/LLNL/RAJA/develop.svg)](https://codecov.io/gh/LLNL/RAJA) [![Join the chat at https://gitter.im/llnl/raja](https://badges.gitter.im/llnl/raja.svg)](https://gitter.im/llnl/raja?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) diff --git a/RELEASE b/RELEASE index 74b3e9c0b8..ff39101772 100644 --- a/RELEASE +++ b/RELEASE @@ -1,6 +1,6 @@ ******************************************************************************* -RAJA: ................................, version 2025.09.0 +RAJA: ................................, version 2025.12.0 Copyright (c) 2016-25, Lawrence Livermore National Security, LLC. Produced at the Lawrence Livermore National Laboratory. diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 950b701469..c72ac952b0 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -20,6 +20,31 @@ Notable changes include: * Bug fixes/improvements: +Version 2025.12.0 -- Release date 2025-12-19 +============================================ + +This release contains mostly improvements to code robustness and testing, +including evolving internal code implementations to use C++17. + +Notable changes include: + + * New features / API changes: + + * Build changes/improvements: + * Update Camp submodule to v2025.12.0 release. + * Improve CMake support for configuring with Caliper and fix issue reported + by a user. + * Bump minimum CMake version required to 3.24. + + * Bug fixes/improvements: + * Fix compilation failue when a downstream library or applications is + built without OpenMP enabled when RAJA was built with OpenMP enabled. + There may still be some corner cases that violate C++ ODR that we have not + resolved and are not being exposed by users. + * Various internal code cleanups, simplifications, and improvements using + C++17 features, with an eye toward C++20. + + Version 2025.09.1 -- Release date 2025-10-01 ============================================ diff --git a/azure-pipelines.yml b/azure-pipelines.yml deleted file mode 100644 index 100566af90..0000000000 --- a/azure-pipelines.yml +++ /dev/null @@ -1,92 +0,0 @@ -jobs: -- job: Windows - strategy: - matrix: - shared: - SHARED_ARGS: '-DBUILD_SHARED_LIBS=On' -########################### -## Windows static build is run on GitHub Actions -## static: -## SHARED_ARGS: '-DBUILD_SHARED_LIBS=Off' - pool: - vmImage: 'windows-2022' - variables: - CMAKE_EXTRA_FLAGS: '-DRAJA_ENABLE_WARNINGS_AS_ERRORS=Off -DBLT_CXX_STD="" -DCMAKE_CXX_STANDARD=17' - steps: - - checkout: self - clean: boolean - submodules: recursive - - task: CMake@1 - inputs: - workingDir: 'build' - cmakeArgs: '$(CMAKE_EXTRA_FLAGS) $(SHARED_ARGS) ../' - - task: CMake@1 - inputs: - workingDir: 'build' - cmakeArgs: '--build . --config Release --verbose -j 4' - - task: CmdLine@2 - inputs: - script: 'ctest.exe -T test -C Release' - workingDirectory: 'build' - - task: PublishTestResults@2 - inputs: - testResultsFormat: 'cTest' - testResultsFiles: '**/Test.xml' -- job: Docker - timeoutInMinutes: 360 - strategy: - matrix: - gcc11: - docker_target: gcc11 - gcc12: - docker_target: gcc12 - gcc12_desul: - docker_target: gcc12_desul - clang15_desul: - docker_target: clang15_desul - clang14_style: - docker_target: clang14_style - pool: - vmImage: 'ubuntu-latest' - variables: - DOCKER_BUILDKIT: '1' - CMAKE_EXTRA_FLAGS: '-DENABLE_DEVELOPER_BENCHMARKS=On -DENABLE_DEVELOPER_DEFAULTS=On -DCMAKE_CXX_STANDARD=17' - steps: - - checkout: self - clean: boolean - submodules: recursive - - task: Docker@1 - inputs: - command: build - dockerFile: 'Dockerfile' - arguments: '--target $(docker_target)' - - task: PublishTestResults@2 - inputs: - testResultsFormat: 'cTest' - testResultsFiles: '**/Test.xml' -- job: Mac - pool: - vmImage: 'macOS-latest' - variables: - CMAKE_EXTRA_FLAGS: '-DENABLE_OPENMP=Off -DCMAKE_CXX_STANDARD=17' - steps: - - checkout: self - clean: boolean - submodules: recursive - - task: CMake@1 - inputs: - workingDir: 'build' - cmakeArgs: '$(CMAKE_EXTRA_FLAGS) ../' - - script: | - cd build - make -j 4 - displayName: 'OSX Build' - condition: eq( variables['Agent.OS'], 'Darwin') - - script: | - cd build - ctest -T test --output-on-failure - displayName: 'OSX Test' - - task: PublishTestResults@2 - inputs: - testResultsFormat: 'cTest' - testResultsFiles: '**/Test.xml' diff --git a/docs/conf.py b/docs/conf.py index 3e98107236..6df0fef485 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -87,9 +87,9 @@ # built documents. # # The short X.Y version. -version = u'2025.09' +version = u'2025.12' # The full version, including alpha/beta/rc tags. -release = u'2025.09.1' +release = u'2025.12.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/sphinx/dev_guide/build_configurations.rst b/docs/sphinx/dev_guide/build_configurations.rst index f68b4bbe8b..60167809a5 100644 --- a/docs/sphinx/dev_guide/build_configurations.rst +++ b/docs/sphinx/dev_guide/build_configurations.rst @@ -233,13 +233,13 @@ Reproducing Docker Builds Locally ================================== RAJA uses Docker container images that it shares with other LLNL GitHub projects -for Azure and GitHub Actions CI testing (see :ref:`azure_ci-label` for more -information). We use Azure Pipelines and GitHub Actions for Linux, Windows, -and MacOS builds for build environments and compiler versions that -are not available on LLNL LC machines. +for GitHub Actions CI testing (see :ref:`github_actions_ci-label` for more +information). We use GitHub Actions for Linux, Windows, and MacOS builds for +build environments and compiler versions that are not available on LLNL LC +machines. -You can reproduce Azure and GitHub Actions builds locally for testing using -the following steps if you have access to Docker. +You can reproduce GitHub Actions builds locally for testing using the +following steps if you have access to Docker. #. Run the command to build a local Docker image:: diff --git a/docs/sphinx/dev_guide/ci.rst b/docs/sphinx/dev_guide/ci.rst index 1b0fff75bc..883ab8d016 100644 --- a/docs/sphinx/dev_guide/ci.rst +++ b/docs/sphinx/dev_guide/ci.rst @@ -17,23 +17,23 @@ Continuous Integration (CI) Testing viewed by clicking the appropriate link in the **checks** section of a GitHub pull request. -The RAJA project uses three CI tools to run tests: - - * **Azure Pipelines** and **GitHub Actions** runs builds and tests for Linux, - Windows, and MacOS environments using compilers in container images - maintained in the `RADIUSS Docker Project `_. - While we do some GPU builds on Azure and GitHub Actions for CUDA, HIP, and - SYCL, RAJA tests are only run for CPU-only builds. The current set of - builds run on Azure and GitHub Actions can be seen by looking at the - ``RAJA/azure-pipelines.yml`` and ``RAJA/.github/workflows/build.yml`` files, - respectively. The ``RAJA/Dockerfile`` file contains the CMake options used - to generate the build environment for each of the builds. +The RAJA project uses two CI tools to run tests: + + * **GitHub Actions** runs builds and tests for Linux, Windows, and MacOS + environments using compilers in container images maintained in the + `RADIUSS Docker Project `_. + While we do some GPU builds on GitHub Actions for CUDA, HIP, and SYCL, + RAJA tests are only run for CPU-only builds. The current set of builds + run on GitHub Actions can be seen by looking at the + ``RAJA/.github/workflows/build.yml`` file. The ``RAJA/Dockerfile`` file + contains the CMake options used to generate the build environment for + each of the builds. * **GitLab** instance in the Collaboration Zone (CZ) of the Livermore Computing (LC) Center runs builds and tests on LC platforms using software stacks (compilers, etc.) important to many RAJA user applications. - GitLab build configurations are more complex than Azure; they will be - described in detail in :ref:`gitlab_ci-label`. + GitLab build configurations are more complex than GitHub Actions; they + will be described in detail in :ref:`gitlab_ci-label`. These tools integrate with the RAJA GitHub project and automatically run RAJA builds and tests when a PR is created and when changes are pushed to a PR @@ -303,79 +303,22 @@ Also, recall that to generate a host-config file, Spack uses packages and specs in the `RADIUSS Spack Configs project `_ (a RAJA submodule), plus RAJA-specific specs defined in files in the `RAJA/.gitlab/jobs `_ directory, as described earlier. -.. _azure_ci-label: +.. _github_actions_ci-label: -====================================== -Azure Pipelines and GitHub Actions CI -====================================== +================== +GitHub Actions CI +================== -We use Azure Pipelines and GitHub Actions to run builds and tests for Linux, -Windows, and MacOS environments. We use these tools to run Linux builds and -tests for various less-common configurations, such as compiler versions that are -not available on LC systems. While we do builds for CUDA, HIP, and SYCL RAJA -GPU back-ends in the Azure and GitHub Actions Linux environments, RAJA tests -are only run for CPU-only pipelines. +We use GitHub Actions to run builds and tests for Linux, Windows, and MacOS +environments. We use this tool to run Linux builds and tests for various +less-common configurations, such as compiler versions that are not available +on LC systems. While we do builds for CUDA, HIP, and SYCL RAJA GPU back-ends +in the GitHub Actions Linux environment, RAJA tests are only run for CPU-only +pipelines. -.. note:: Azure Pipelines and GitHub Actions CI test jobs are run on every - RAJA pull request, regardless of whether it was made from a branch in the - RAJA project repo or from a fork of the repo. - -Azure Pipelines Testing Workflow --------------------------------- - -The jobs run in the Azure Pipelines testing workflow for RAJA are specified in -the `RAJA/azure-pipelines.yml `_ file. This file defines the job steps, commands, -compilers, etc. for each OS environment in the associated ``- job:`` section. -A summary of the configurations we build are: - - * **Windows.** The ``- job: Windows`` Windows section contains information - for the Windows test builds. For example, we build and test RAJA as - a static and/or shared library. This is indicated in the Windows - ``strategy`` section:: - - strategy: - matrix: - shared: - ... - static: - ... - - We use the Windows/compiler image provided by the Azure application - indicated the ``pool`` section; for example:: - - pool: - vmImage: 'windows-2019' - - **MacOS.** The ``- job: Mac`` section contains information for Mac test - builds. For example, we build RAJA using the MacOS/compiler - image provided by the Azure application indicated in the ``pool`` section; - for example:: - - pool: - vmImage: 'macOS-latest' - - **Linux.** The ``- job: Docker`` section contains information for Linux - test builds. We build and test RAJA using Docker container images generated - with recent versions of various compilers. The RAJA project shares these - images with other open-source LLNL RADIUSS projects and they are maintained - in the `RADIUSS Docker `_ - project on GitHub. The builds we do at any point in time are located in - the ``strategy`` block:: - - strategy: - matrix: - gcc11: - docker_target: ... - ... - clang14: - docker_target: ... - ... - - The Linux OS image used is indicated in the ``pool`` section; - for example:: - - pool: - vmImage: 'ubuntu-latest' +.. note:: GitHub Actions CI test jobs are run on every RAJA pull request, + regardless of whether it was made from a branch in the RAJA project repo + or from a fork of the repo. GitHub Actions Testing Workflow -------------------------------- diff --git a/docs/sphinx/dev_guide/ci_tasks.rst b/docs/sphinx/dev_guide/ci_tasks.rst index 16b3a6bad5..e5c77da403 100644 --- a/docs/sphinx/dev_guide/ci_tasks.rst +++ b/docs/sphinx/dev_guide/ci_tasks.rst @@ -191,92 +191,107 @@ Building the Compiler that the head of the SYCL branch will fail to build. In the event that it does not build, try checking out an earlier commit. On the Intel/LLVM GitHub page, one can see which of their commits builds by checking the status - badge next to each commit. Look for a commit that passes. + badge next to each commit. Look for a commit that passes. -#. Load the module of the version of GCC headers that you want to use. For example, we typically use the system default, which on corona is gcc/10.3.1-magic:: - module load gcc/10.3.1-magic +#. On LC machines, it is following the good neighbor policy to do your build on a compute node. -#. Load the module of the version of ROCm that you want to use. For example:: + Use an appropriate bank to get an interactive node, e.g on Corona:: - module load rocm/5.7.1 + flux alloc -t 60 -N 1 --bank=wbronze +#. Load the module of the version of GCC headers that you want to use. We typically use the system default, which on corona at time of writing is gcc/10.3.1-magic:: + + GCC_VERSION=10.3.1 + module load gcc/${GCC_VERSION}-magic + +#. Load the module of the version of ROCm that you want to use:: + ROCM_VERSION=6.4.2 + module load rocm/${ROCM_VERSION} + +#. Load Python module you want to use. At time of writing, the LLVM configure requires at least version 3.7. we use Corona default:: + PYTHON_VERSION=3.9.12 + module load python/${PYTHON_VERSION} + #. Clone the SYCL branch of Intel's LLVM compiler:: git clone https://github.com/intel/llvm -b sycl -#. cd into the LLVM folder:: - - cd llvm +#. cd into the LLVM folder and extract the GIT SHA for naming the install directories. The install directory uses the naming convention ``clang_sycl__hip_gcc_rocm``:: - In the event that the head of the sycl branch does not build, run - ``git checkout `` to checkout a version that does build. + cd llvm + GIT_SHA=$(git rev-parse --short=12 HEAD) + INSTALL_PREFIX=/usr/workspace/raja-dev/clang_sycl_${GIT_SHA}_hip_gcc${GCC_VERSION}_rocm${ROCM_VERSION} #. Build the compiler. - Note that in this example, we are using rocm5.7.1, but one can change the - version they wish to use simply by changing the paths in the configure step - a. Configure - .. code-block:: bash - - srun -n1 /usr/bin/python3 buildbot/configure.py --hip -o buildrocm5.7.1 \ - --cmake-gen "Unix Makefiles" \ - --cmake-opt=-DSYCL_BUILD_PI_HIP_ROCM_DIR=/opt/rocm-5.7.1 \ - --cmake-opt=-DSYCL_BUILD_PI_HIP_ROCM_INCLUDE_DIR=/opt/rocm-5.7.1/include \ - --cmake-opt=-DSYCL_BUILD_PI_HIP_ROCM_LIB_DIR=/opt/rocm-5.7.1/lib \ - --cmake-opt=-DSYCL_BUILD_PI_HIP_INCLUDE_DIR=/opt/rocm-5.7.1/include \ - --cmake-opt=-DSYCL_BUILD_PI_HIP_HSA_INCLUDE_DIR=/opt/rocm-5.7.1/hsa/include/hsa \ - --cmake-opt=-DSYCL_BUILD_PI_HIP_LIB_DIR=/opt/rocm-5.7.1/lib \ - --cmake-opt=-DUR_HIP_ROCM_DIR=/opt/rocm-5.7.1 \ - --cmake-opt=-DUR_HIP_INCLUDE_DIR=/opt/rocm-5.7.1/include \ - --cmake-opt=-DUR_HIP_HSA_INCLUDE_DIR=/opt/rocm-5.7.1/hsa/include/hsa \ - --cmake-opt=-DUR_HIP_LIB_DIR=/opt/rocm-5.7.1/lib - - b. Build - .. code-block:: bash - srun -n1 /usr/bin/python3 buildbot/compile.py -o buildrocm5.7.1 + python3 buildbot/configure.py --hip -o buildrocm${ROCM_VERSION} \ + --cmake-gen "Unix Makefiles" \ + --cmake-opt=-DSYCL_BUILD_PI_HIP_ROCM_DIR=/opt/rocm-${ROCM_VERSION} \ + --cmake-opt=-DSYCL_BUILD_PI_HIP_ROCM_INCLUDE_DIR=/opt/rocm-${ROCM_VERSION}/include \ + --cmake-opt=-DSYCL_BUILD_PI_HIP_ROCM_LIB_DIR=/opt/rocm-${ROCM_VERSION}/lib \ + --cmake-opt=-DSYCL_BUILD_PI_HIP_INCLUDE_DIR=/opt/rocm-${ROCM_VERSION}/include \ + --cmake-opt=-DSYCL_BUILD_PI_HIP_HSA_INCLUDE_DIR=/opt/rocm-${ROCM_VERSION}/hsa/include/hsa \ + --cmake-opt=-DSYCL_BUILD_PI_HIP_LIB_DIR=/opt/rocm-${ROCM_VERSION}/lib \ + --cmake-opt=-DUR_HIP_ROCM_DIR=/opt/rocm-${ROCM_VERSION} \ + --cmake-opt=-DUR_HIP_INCLUDE_DIR=/opt/rocm-${ROCM_VERSION}/include \ + --cmake-opt=-DUR_HIP_HSA_INCLUDE_DIR=/opt/rocm-${ROCM_VERSION}/hsa/include/hsa \ + --cmake-opt=-DUR_HIP_LIB_DIR=/opt/rocm-${ROCM_VERSION}/lib -#. Test the compiler + #. Build:: - Follow the steps in the `Using the compiler`_ section to test the installation + python buildbot/compile.py -o buildrocm${ROCM_VERSION} -#. Install + #. Install:: + + cp -rp buildrocm${ROCM_VERSION}/install ${INSTALL_PREFIX} + cd .. - a. The build step will install the compiler in the folder ``buildrocm/install``. Copy this folder to the ``/usr/workspace/raja-dev/`` directory using the naming scheme ``clang_sycl__hip_gcc_rocm`` +#. Set the permissions of the folder, and everything in it to 750:: - #. Set the permissions of the folder, and everything in it to 750:: + chmod 750 ${INSTALL_PREFIX} -R - chmod 750 /usr/workspace/raja-dev// -R +#. Change the group of the folder and everything in it to raja-dev:: - #. Change the group of the folder and everything in it to raja-dev:: + chgrp raja-dev ${INSTALL_PREFIX} -R - chgrp raja-dev /usr/workspace/raja-dev// -R +#. Test the compiler + Follow the steps in the `Using the compiler`_ section to test the installation Using the compiler ^^^^^^^^^^^^^^^^^^ -#. Load the version of rocm that you used when building the compiler, for example:: +#. Load the version of ROCm that you used when building the compiler, for example:: - module load rocm/5.7.1 + ROCM_VERSION=6.4.2 + module load rocm/${ROCM_VERSION} #. Navigate to the root of your local RAJA checkout space:: cd /path/to/raja +#. Determine where you installed the compiler. + + This is the ``INSTALL_PREFIX`` used above. For example:: + + SYCL_INSTALL_PREFIX=/usr/workspace/raja-dev/clang_sycl_16b7bcb09915_hip_gcc10.3.1_rocm6.4.2 + #. Run the test config script:: - ./scripts/lc-builds/corona_sycl.sh /usr/workspace/raja-dev/clang_sycl_2f03ef85fee5_hip_gcc10.3.1_rocm5.7.1 + ./scripts/lc-builds/corona_sycl.sh ${SYCL_INSTALL_PREFIX} - Note that at the time of writing, the newest compiler we had built was at ``clang_sycl_2f03ef85fee5_hip_gcc10.3.1_rocm5.7.1`` +#. As indicated in the output of the ``corona_sycl.sh`` script the SYCL compiler libraries need to be on the ``LD_LIBRARY_PATH``:: + + export LD_LIBRARY_PATH=${SYCL_INSTALL_PREFIX}/lib:${SYCL_INSTALL_PREFIX}/lib64:$LD_LIBRARY_PATH #. cd into the generated build directory:: - cd {build directory} + cd build_corona-sycl_${USER} #. Build the code and run the RAJA tests:: @@ -284,46 +299,28 @@ Using the compiler make test -============================================ -Azure Pipelines and GitHub Actions CI Tasks -============================================ +======================== +GitHub Actions CI Tasks +======================== -The tasks in this section apply to RAJA Azure Pipelines and GitHub Actions -CI testing that was described in :ref:`azure_ci-label` +The tasks in this section apply to RAJA GitHub Actions CI testing that was +described in :ref:`github_actions_ci-label` Changing Builds/Container Images -------------------------------- -The builds we run in Azure are defined in the `RAJA/azure-pipelines.yml `_ file. - -The builds we run in GitHub Actions are defined in the `RAJA/.github/workflows/build.yml `_ file. +The builds we run in GitHub Actions are defined in the +`RAJA/.github/workflows/build.yml `_ file. Linux/Docker ^^^^^^^^^^^^ -To update or add a new compiler / job to Azure Pipelines or GitHub Actions CI, -we need to edit either the ``RAJA/azure-pipelines.yml`` file or the -``RAJA/.github/workflows/build.yml`` file and the ``RAJA/Dockerfile``, if +To update or add a new compiler / job to GitHub Actions CI, we need to edit +the ``RAJA/.github/workflows/build.yml`` file and the ``RAJA/Dockerfile``, if changes are needed there. -If we want to add a new Azure pipeline to build with ``compilerX``, then in the -``RAJA/azure-pipelines.yml`` file we can add the job like so:: - - -job: Docker - ... - strategy: - matrix: - ... - compilerX: - docker_target: compilerX - -Here, ``compilerX`` defines the name of a job in Azure. ``docker_target: compilerX`` defines a variable ``docker_target``, which is used to determine which -entry in the ``Dockerfile`` file to use, where the name after ``docker_target`` -is the shorthand name of job in the ``Dockerfile`` file following the word -``AS``. - -Similarly, for GitHub Actions, we add the name of the job to the job list in -the ``RAJA/.github/workflows/build.yaml`` file:: +For GitHub Actions, we add the name of the job to the job list in the +``RAJA/.github/workflows/build.yml`` file:: jobs: build_docker: @@ -355,24 +352,9 @@ list of currently available images. Windows / MacOS ^^^^^^^^^^^^^^^ -We run our Windows and MacOS builds directly on the provided Azure machine -instances. To change the versions, change the ``pool`` under ``-job: Windows`` -or ``-job: Mac`` in the ``RAJA/azure-pipelines.yml`` file:: - - -job: Windows - ... - pool: - vmImage: 'windows-2019' - ... - - -job: Mac - ... - pool: - vmImage: 'macOS-latest' - -Similarly, in GitHub Actions, we run our Windows and MacOS builds directly on -the provided machine instances. To change the versions, change the -appropriate lines in the ``RAJA/.github/workflows/build.yml`` file:: +In GitHub Actions, we run our Windows and MacOS builds directly on the +provided machine instances. To change the versions, change the appropriate +lines in the ``RAJA/.github/workflows/build.yml`` file:: build_mac: runs-on: macos-latest @@ -410,7 +392,9 @@ Windows / MacOS ^^^^^^^^^^^^^^^ Windows and MacOS build / run parameters can be configured directly in the -``RAJA/azure-pipelines.yml`` or ``RAJA/.github/workflows/build.yml`` file. CMake options can be configured with ``CMAKE_EXTRA_FLAGS`` for each job. The ``-j`` value can also be edited directly in these files for each job. +``RAJA/.github/workflows/build.yml`` file. CMake options can be configured +in the workflow file for each job. The parallel build value can also be +edited directly in the workflow file for each job. .. _rajaperf_ci_tasks-label: @@ -433,7 +417,7 @@ Specifically, `_ directory. * The `RAJAPerf/Dockerfile `_ drives the - Azure testing pipelines. + GitHub Actions testing pipelines. The Performance Suite GitLab CI uses the ``uberenv`` and ``radiuss-spack-configs`` versions located in the RAJA submodule to make the diff --git a/docs/sphinx/dev_guide/contributing.rst b/docs/sphinx/dev_guide/contributing.rst index d26deb0459..69386776f4 100644 --- a/docs/sphinx/dev_guide/contributing.rst +++ b/docs/sphinx/dev_guide/contributing.rst @@ -71,10 +71,10 @@ of the contributor. The process involves four main steps: #. A RAJA contributor makes a PR on the RAJA GitHub project to merge a branch on which she has developed a contribution into another RAJA branch, typically, the develop branch. - #. When a PR is created, GitHub triggers Azure and GitHub Actions CI test - checks and GitLab CI checks if the branch is part of the RAJA GitHub repo. - Running and pass/fail status for all checks is reported back to the - corresponding GitHub pull request where it can be viewed and monitored. + #. When a PR is created, GitHub triggers GitHub Actions CI test checks and + GitLab CI checks if the branch is part of the RAJA GitHub repo. Running + and pass/fail status for all checks is reported back to the corresponding + GitHub pull request where it can be viewed and monitored. #. Meanwhile, RAJA team members and other contributors review the PR, suggesting changes and/or approving when they think it is ready to merge. #. When all checks pass and the PR is approved, the PR may be merged. diff --git a/host-configs/lc-builds/toss4/cce_omptarget_X.cmake b/host-configs/lc-builds/toss4/cce_omptarget_X.cmake new file mode 100644 index 0000000000..695e8838c8 --- /dev/null +++ b/host-configs/lc-builds/toss4/cce_omptarget_X.cmake @@ -0,0 +1,15 @@ +############################################################################### +# Copyright (c) 2016-25, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJA/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +set(RAJA_COMPILER "RAJA_COMPILER_CLANG" CACHE STRING "") + +set(CMAKE_CXX_FLAGS_RELEASE "-O3 -haccel=amd_${HIP_ARCH}" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -haccel=amd_${HIP_ARCH}" CACHE STRING "") +set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -haccel=amd_${HIP_ARCH}" CACHE STRING "") + +# hcpu flag needs more experimentation, can cause runtime vectorization failures. +# -hcpu=x86-genoa diff --git a/host-configs/lc-builds/toss4/nvcc_clang_X.cmake b/host-configs/lc-builds/toss4/nvcc_clang_X.cmake new file mode 100755 index 0000000000..6df45ac600 --- /dev/null +++ b/host-configs/lc-builds/toss4/nvcc_clang_X.cmake @@ -0,0 +1,26 @@ +############################################################################### +# Copyright (c) 2016-25, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJA/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +set(RAJA_COMPILER "RAJA_COMPILER_CLANG" CACHE STRING "") + +set(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -funroll-loops -finline-functions" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -march=native -funroll-loops -finline-functions" CACHE STRING "") +set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") + +set(HOST_OPT_FLAGS "-Xcompiler -O3 ") + +if(ENABLE_OPENMP) + set(HOST_OPT_FLAGS "${HOST_OPT_FLAGS} -Xcompiler -fopenmp") +endif() + +set(CMAKE_CUDA_FLAGS_RELEASE "-O3 ${HOST_OPT_FLAGS}" CACHE STRING "") +set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0" CACHE STRING "") +set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo -O3 ${HOST_OPT_FLAGS}" CACHE STRING "") + +set(RAJA_DATA_ALIGN 64 CACHE STRING "") + +set(RAJA_HOST_CONFIG_LOADED On CACHE BOOL "") diff --git a/host-configs/lc-builds/toss4/nvcc_gcc_X.cmake b/host-configs/lc-builds/toss4/nvcc_gcc_X.cmake new file mode 100755 index 0000000000..080c8376a9 --- /dev/null +++ b/host-configs/lc-builds/toss4/nvcc_gcc_X.cmake @@ -0,0 +1,26 @@ +############################################################################### +# Copyright (c) 2016-25, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJA/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +set(RAJA_COMPILER "RAJA_COMPILER_GNU" CACHE STRING "") + +set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -finline-functions" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Ofast -g -finline-functions" CACHE STRING "") +set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") + +set(HOST_OPT_FLAGS "-Xcompiler -O3 -Xcompiler -finline-functions ") + +if(ENABLE_OPENMP) + set(HOST_OPT_FLAGS "${HOST_OPT_FLAGS} -Xcompiler -fopenmp") +endif() + +set(CMAKE_CUDA_FLAGS_RELEASE "-O3 ${HOST_OPT_FLAGS}" CACHE STRING "") +set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0" CACHE STRING "") +set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo -O3 ${HOST_OPT_FLAGS}" CACHE STRING "") + +set(RAJA_DATA_ALIGN 64 CACHE STRING "") + +set(RAJA_HOST_CONFIG_LOADED On CACHE BOOL "") diff --git a/include/RAJA/RAJA.hpp b/include/RAJA/RAJA.hpp index abc9f30738..e09b88151c 100644 --- a/include/RAJA/RAJA.hpp +++ b/include/RAJA/RAJA.hpp @@ -135,6 +135,10 @@ // #include "RAJA/pattern/atomic.hpp" +// Thread operations support +// +#include "RAJA/pattern/thread.hpp" + // // Shared memory view patterns // diff --git a/include/RAJA/config.hpp.in b/include/RAJA/config.hpp.in index 4987f00a50..36e4836378 100644 --- a/include/RAJA/config.hpp.in +++ b/include/RAJA/config.hpp.in @@ -167,8 +167,15 @@ static_assert(RAJA_HAS_SOME_CXX14, * * \brief Programming model back-ends. * + * \note The RAJA_ENABLE_ macros indicate if the back-end was + * enabled when RAJA was configured. RAJA__ACTIVE macros + * indicate if the back-end is present when the application/library + * using RAJA is compiled. An application/library can use a subset of + * back-ends that are present in a RAJA install. + * ****************************************************************************** */ + #cmakedefine RAJA_ENABLE_OPENMP #cmakedefine RAJA_ENABLE_TARGET_OPENMP #cmakedefine RAJA_ENABLE_CUDA @@ -264,14 +271,14 @@ namespace RAJA { #if defined(RAJA_ENABLE_OPENMP) && !defined(__HIP_DEVICE_COMPILE__) #if defined(_OPENMP) +// RAJA build can be configured but downstream packages may not; guard OpenMP +#define RAJA_OPENMP_ACTIVE #if (_OPENMP >= 200805) #if defined(RAJA_ENABLE_OPENMP_TASK) #define RAJA_ENABLE_OPENMP_TASK_INTERNAL #endif #endif // _OPENMP >= 200805 -#else -#error RAJA configured with RAJA_ENABLE_OPENMP, but _OPENMP is not defined in this code section -#endif // else +#endif // OPENMP #endif // RAJA_ENABLE_OPENMP && !__HIP_DEVICE_COMPILE__ #if defined(RAJA_ENABLE_CUDA) && defined(__CUDACC__) diff --git a/include/RAJA/internal/ThreadUtils_CPU.hpp b/include/RAJA/internal/ThreadUtils_CPU.hpp index 8d420fa113..fe262363a5 100644 --- a/include/RAJA/internal/ThreadUtils_CPU.hpp +++ b/include/RAJA/internal/ThreadUtils_CPU.hpp @@ -21,9 +21,9 @@ #include "RAJA/config.hpp" -#if defined(RAJA_ENABLE_OPENMP) -#include -#endif +#include "RAJA/pattern/thread.hpp" +#include "RAJA/policy/openmp/thread.hpp" +#include "RAJA/policy/sequential/thread.hpp" namespace RAJA { @@ -35,16 +35,10 @@ namespace RAJA * ************************************************************************* */ -RAJA_INLINE -int getMaxOMPThreadsCPU() +template +RAJA_INLINE int getMaxOMPThreadsCPU() { - int nthreads = 1; - -#if defined(RAJA_ENABLE_OPENMP) - nthreads = omp_get_max_threads(); -#endif - - return nthreads; + return RAJA::get_max_threads(); } } // namespace RAJA diff --git a/include/RAJA/pattern/detail/TypeTraits.hpp b/include/RAJA/pattern/detail/TypeTraits.hpp new file mode 100644 index 0000000000..15a60812f0 --- /dev/null +++ b/include/RAJA/pattern/detail/TypeTraits.hpp @@ -0,0 +1,99 @@ +/*! + ****************************************************************************** + * + * \file + * + * \brief Header containing helper type traits for work with Reducers + * + ****************************************************************************** + */ + +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-25, Lawrence Livermore National Security, LLC +// and RAJA project contributors. See the RAJA/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + + +#ifndef RAJA_TYPETRAITS_HPP +#define RAJA_TYPETRAITS_HPP + +#include +#include + +namespace RAJA +{ +namespace expt +{ +//=========================================================================== +// +// +// Forward declarations for types used by type trait helpers +// +// + +// Forward declaration of ForallParamPack +template +struct ForallParamPack; + +// Forward declaration of Reducer +namespace detail +{ +template +struct Reducer; +} + +//=========================================================================== +// +// +// Type traits for SFINAE work. +// +// +namespace type_traits +{ +template +struct is_ForallParamPack : std::false_type +{}; + +template +struct is_ForallParamPack> : std::true_type +{}; + +template +struct is_ForallParamPack_empty : std::true_type +{}; + +template +struct is_ForallParamPack_empty> + : std::false_type +{}; + +template<> +struct is_ForallParamPack_empty> : std::true_type +{}; +} // namespace type_traits + +template +struct is_instance_of_Reducer : std::false_type +{}; + +template +struct is_instance_of_Reducer> : std::true_type +{}; + +template +struct tuple_contains_Reducers : std::false_type +{}; + +template +struct tuple_contains_Reducers> + : std::integral_constant< + bool, + camp::concepts::any_of...>::value> +{}; + +} // namespace expt +} // namespace RAJA + +#endif // RAJA_TYPETRAITS_HPP diff --git a/include/RAJA/pattern/kernel.hpp b/include/RAJA/pattern/kernel.hpp index abcdd1ac61..8ae119e063 100644 --- a/include/RAJA/pattern/kernel.hpp +++ b/include/RAJA/pattern/kernel.hpp @@ -204,6 +204,6 @@ kernel(SegmentTuple&& segments, Bodies&&... bodies) #include "RAJA/pattern/kernel/Region.hpp" #include "RAJA/pattern/kernel/Tile.hpp" #include "RAJA/pattern/kernel/TileTCount.hpp" -#include "RAJA/pattern/kernel/type-traits.hpp" +#include "RAJA/pattern/kernel/TypeTraits.hpp" #endif /* RAJA_pattern_kernel_HPP */ diff --git a/include/RAJA/pattern/kernel/type-traits.hpp b/include/RAJA/pattern/kernel/TypeTraits.hpp similarity index 97% rename from include/RAJA/pattern/kernel/type-traits.hpp rename to include/RAJA/pattern/kernel/TypeTraits.hpp index 74a54a8a43..2d7ec81a30 100644 --- a/include/RAJA/pattern/kernel/type-traits.hpp +++ b/include/RAJA/pattern/kernel/TypeTraits.hpp @@ -39,7 +39,7 @@ template struct loop_data_has_reducers< LoopData> - : RAJA::expt::tuple_contains_reducers + : RAJA::expt::tuple_contains_Reducers {}; template diff --git a/include/RAJA/pattern/launch.hpp b/include/RAJA/pattern/launch.hpp index 153e6e8376..66adf395e3 100644 --- a/include/RAJA/pattern/launch.hpp +++ b/include/RAJA/pattern/launch.hpp @@ -34,11 +34,11 @@ #include "RAJA/policy/hip/launch.hpp" #endif -#if defined(RAJA_ENABLE_OPENMP) +#if defined(RAJA_OPENMP_ACTIVE) #include "RAJA/policy/openmp/launch.hpp" #endif -#if defined(RAJA_ENABLE_SYCL) +#if defined(RAJA_SYCL_ACTIVE) #include "RAJA/policy/sycl/launch.hpp" #endif diff --git a/include/RAJA/pattern/launch/launch_core.hpp b/include/RAJA/pattern/launch/launch_core.hpp index bfce94057c..67d9b74ce2 100644 --- a/include/RAJA/pattern/launch/launch_core.hpp +++ b/include/RAJA/pattern/launch/launch_core.hpp @@ -31,7 +31,7 @@ // Odd dependecy with atomics is breaking CI builds //#include "RAJA/util/View.hpp" -#if defined(RAJA_GPU_DEVICE_COMPILE_PASS_ACTIVE) && !defined(RAJA_ENABLE_SYCL) +#if defined(RAJA_GPU_DEVICE_COMPILE_PASS_ACTIVE) && !defined(RAJA_SYCL_ACTIVE) #define RAJA_TEAM_SHARED __shared__ #else #define RAJA_TEAM_SHARED @@ -185,7 +185,8 @@ class LaunchContext void* shared_mem_ptr; -#if defined(RAJA_ENABLE_SYCL) +#if defined(RAJA_SYCL_ACTIVE) + // SGS ODR issue mutable ::sycl::nd_item<3>* itm; #endif @@ -231,11 +232,12 @@ class LaunchContext RAJA_HOST_DEVICE void teamSync() { -#if defined(RAJA_GPU_DEVICE_COMPILE_PASS_ACTIVE) && defined(RAJA_ENABLE_SYCL) + // SGS ODR Issue +#if defined(RAJA_GPU_DEVICE_COMPILE_PASS_ACTIVE) && defined(RAJA_SYCL_ACTIVE) itm->barrier(::sycl::access::fence_space::local_space); #endif -#if defined(RAJA_GPU_DEVICE_COMPILE_PASS_ACTIVE) && !defined(RAJA_ENABLE_SYCL) +#if defined(RAJA_GPU_DEVICE_COMPILE_PASS_ACTIVE) && !defined(RAJA_SYCL_ACTIVE) __syncthreads(); #endif } diff --git a/include/RAJA/pattern/params/forall.hpp b/include/RAJA/pattern/params/forall.hpp index b8b12a5c29..166594b6df 100644 --- a/include/RAJA/pattern/params/forall.hpp +++ b/include/RAJA/pattern/params/forall.hpp @@ -2,6 +2,7 @@ #define FORALL_PARAM_HPP +#include "RAJA/pattern/detail/TypeTraits.hpp" #include "RAJA/pattern/params/reducer.hpp" #include "RAJA/util/CombiningAdapter.hpp" #include "camp/camp.hpp" @@ -23,7 +24,7 @@ namespace detail template RAJA_HOST_DEVICE constexpr auto filter_reducers(camp::tuple& params) { - return camp::get_refs_to_elements_by_type_trait( + return camp::get_refs_to_elements_by_type_trait( params); } @@ -35,8 +36,9 @@ void resolve_params_helper(ParamTuple& params_tuple, const camp::idx_seq&, Args&&... args) { - CAMP_EXPAND(param_resolve(ExecPol {}, camp::get(params_tuple), - std::forward(args)...)); + (param_resolve(ExecPol {}, camp::get(params_tuple), + std::forward(args)...), + ...); } template @@ -57,8 +59,9 @@ void init_params_helper(ParamTuple& params_tuple, const camp::idx_seq&, Args&&... args) { - CAMP_EXPAND(param_init(ExecPol {}, camp::get(params_tuple), - std::forward(args)...)); + (param_init(ExecPol {}, camp::get(params_tuple), + std::forward(args)...), + ...); } template @@ -75,12 +78,12 @@ template RAJA_HOST_DEVICE void combine_params_helper(const camp::idx_seq&, ParamTuple& params_tuple) { - CAMP_EXPAND(param_combine(ExecPol {}, camp::get(params_tuple))); + (param_combine(ExecPol {}, camp::get(params_tuple)), ...); } template camp::concepts::enable_if< - concepts::negate>>, + concepts::negate>>, concepts::negate>> param_combine(EXEC_POL const&, T&, const T&) {} @@ -90,8 +93,9 @@ RAJA_HOST_DEVICE void combine_params_helper(const camp::idx_seq&, ParamTuple& params_tuple, const ParamTuple& params_tuple_in) { - CAMP_EXPAND(param_combine(ExecPol {}, camp::get(params_tuple), - camp::get(params_tuple_in))); + (param_combine(ExecPol {}, camp::get(params_tuple), + camp::get(params_tuple_in)), + ...); } template @@ -142,8 +146,9 @@ struct ForallParamPack ForallParamPack& f_params, Args&&... args) { - CAMP_EXPAND(param_init(pol, camp::get(f_params.param_tup), - std::forward(args)...)); + (param_init(pol, camp::get(f_params.param_tup), + std::forward(args)...), + ...); } // Combine @@ -154,8 +159,9 @@ struct ForallParamPack ForallParamPack& out, const ForallParamPack& in) { - CAMP_EXPAND(param_combine(pol, camp::get(out.param_tup), - camp::get(in.param_tup))); + (param_combine(pol, camp::get(out.param_tup), + camp::get(in.param_tup)), + ...); } template @@ -164,7 +170,7 @@ struct ForallParamPack camp::idx_seq, ForallParamPack& f_params) { - CAMP_EXPAND(param_combine(pol, camp::get(f_params.param_tup))); + (param_combine(pol, camp::get(f_params.param_tup)), ...); } // Resolve @@ -174,8 +180,9 @@ struct ForallParamPack ForallParamPack& f_params, Args&&... args) { - CAMP_EXPAND(param_resolve(pol, camp::get(f_params.param_tup), - std::forward(args)...)); + (param_resolve(pol, camp::get(f_params.param_tup), + std::forward(args)...), + ...); } // Used to construct the argument TYPES that will be invoked with the lambda. @@ -254,8 +261,13 @@ struct ParamMultiplexer ForallParamPack& f_params, Args&&... args) { - FP::parampack_init(pol, typename FP::params_seq(), f_params, - std::forward(args)...); + constexpr bool has_reducers = + !RAJA::expt::type_traits::is_ForallParamPack_empty::value; + if constexpr (has_reducers) + { + FP::parampack_init(pol, typename FP::params_seq(), f_params, + std::forward(args)...); + } } template& f_params, Args&&... args) { - FP::parampack_combine(pol, typename FP::params_seq(), f_params, - std::forward(args)...); + constexpr bool has_reducers = + !RAJA::expt::type_traits::is_ForallParamPack_empty::value; + if constexpr (has_reducers) + { + FP::parampack_combine(pol, typename FP::params_seq(), f_params, + std::forward(args)...); + } } template& f_params, Args&&... args) { - FP::parampack_resolve(pol, typename FP::params_seq(), f_params, - std::forward(args)...); + constexpr bool has_reducers = + !RAJA::expt::type_traits::is_ForallParamPack_empty::value; + if constexpr (has_reducers) + { + FP::parampack_resolve(pol, typename FP::params_seq(), f_params, + std::forward(args)...); + } } }; @@ -535,39 +557,6 @@ constexpr void check_forall_optional_args(Lambda&& l, ForallParams& fpp) //=========================================================================== -//=========================================================================== -// -// -// Type trailts for SFINAE work. -// -// -namespace type_traits -{ -template -struct is_ForallParamPack : std::false_type -{}; - -template -struct is_ForallParamPack> : std::true_type -{}; - -template -struct is_ForallParamPack_empty : std::true_type -{}; - -template -struct is_ForallParamPack_empty> - : std::false_type -{}; - -template<> -struct is_ForallParamPack_empty> : std::true_type -{}; -} // namespace type_traits - -//=========================================================================== - - //=========================================================================== // // @@ -601,10 +590,20 @@ RAJA_HOST_DEVICE constexpr auto invoke_body(Params&& params, Fn&& f, Ts&&... extra) { - return detail::invoke_with_order( - camp::forward(params), camp::forward(f), - typename camp::decay::lambda_arg_seq(), - camp::forward(extra)...); + using FPType = camp::decay; + constexpr bool has_reducers = + !RAJA::expt::type_traits::is_ForallParamPack_empty::value; + if constexpr (has_reducers) + { + return detail::invoke_with_order( + camp::forward(params), camp::forward(f), + typename camp::decay::lambda_arg_seq(), + camp::forward(extra)...); + } + else + { + return f(camp::forward(extra)...); + } } //=========================================================================== diff --git a/include/RAJA/pattern/params/reducer.hpp b/include/RAJA/pattern/params/reducer.hpp index 21042a591b..4298994285 100644 --- a/include/RAJA/pattern/params/reducer.hpp +++ b/include/RAJA/pattern/params/reducer.hpp @@ -3,6 +3,7 @@ #include +#include "RAJA/pattern/detail/TypeTraits.hpp" #include "RAJA/pattern/params/params_base.hpp" #include "RAJA/util/SoAPtr.hpp" @@ -247,24 +248,6 @@ auto constexpr ReduceLoc(T* target, IndexType* index) target, index); } -template -struct is_instance_of_reducer : std::false_type -{}; - -template -struct is_instance_of_reducer> : std::true_type -{}; - -template -struct tuple_contains_reducers : std::false_type -{}; - -template -struct tuple_contains_reducers> - : std::integral_constant< - bool, - camp::concepts::any_of...>::value> -{}; } // namespace expt diff --git a/include/RAJA/pattern/thread.hpp b/include/RAJA/pattern/thread.hpp new file mode 100644 index 0000000000..0fc15fa83c --- /dev/null +++ b/include/RAJA/pattern/thread.hpp @@ -0,0 +1,95 @@ +/*! + ****************************************************************************** + * + * \file + * + * \brief RAJA header file defining thread operations. + * + ****************************************************************************** + */ + +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-25, Lawrence Livermore National Security, LLC +// and RAJA project contributors. See the RAJA/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#ifndef RAJA_pattern_thread_HPP +#define RAJA_pattern_thread_HPP + +#include "RAJA/config.hpp" + +#include "RAJA/util/macros.hpp" + +#include "RAJA/policy/thread_auto.hpp" + +namespace RAJA +{ +/*! + * \file + * Thread operation functions in the namespace RAJA::thread + * + * The dispatch of all of these is: + * + * int get_thread_num() -- User facing API + * + * calls + * + * int get_thread_num(Policy{}) -- Policy specific implementation + * + * + * With the exception of the auto_thread policy which then calls the + * "appropriate" policy implementation. + * + * + * Current supported policies include: + * + * auto_thread -- Attempts to do "the right thing" + * + * omp_thread -- Available (and default) when OpenMP is active + * these are safe inside and outside of OMP parallel + * regions + * + * seq_thread -- Non-thread + * + * + * The implementation code lives in: + * RAJA/policy/thread_auto.hpp -- for auto_thread + * RAJA/policy/XXX/thread.hpp -- for omp_thread + * + */ + +/*! + * @brief Get maximum number of threads + + * This is based on OpenMP threading model. This value is also an + * upper bound on the number of threads that could be used to form a + * new team if a parallel region without a num_threads clause were + * encountered after execution returns from this routine. + * + * Returns 1 if OMP is not active. + * @return Maximum number of threads + */ +template +RAJA_INLINE RAJA_HOST_DEVICE int get_max_threads() +{ + return RAJA::get_max_threads(Policy {}); +} + +/*! + * @brief Get current thread number + * This is based on the OpenMP threading model. Within a parallel team + * executing a parallel region the threads are numbered 0-N. Returns 0 if called + * in sequential part of a program or OMP is not active + * @return Current thread number + */ +template +RAJA_INLINE RAJA_HOST_DEVICE int get_thread_num() +{ + return RAJA::get_thread_num(Policy {}); +} + +} // namespace RAJA + +#endif diff --git a/include/RAJA/policy/cuda/MemUtils_CUDA.hpp b/include/RAJA/policy/cuda/MemUtils_CUDA.hpp index be8eafc58f..49bf62e5aa 100644 --- a/include/RAJA/policy/cuda/MemUtils_CUDA.hpp +++ b/include/RAJA/policy/cuda/MemUtils_CUDA.hpp @@ -27,11 +27,12 @@ #include #include #include +#include #include #include + #include "RAJA/util/basic_mempool.hpp" -#include "RAJA/util/mutex.hpp" #include "RAJA/util/types.hpp" #include "RAJA/util/macros.hpp" #include "RAJA/util/resource.hpp" @@ -182,17 +183,12 @@ struct cudaInfo struct cudaStatusInfo : cudaInfo { -#if defined(RAJA_ENABLE_OPENMP) - omp::mutex lock; -#endif + std::mutex lock; }; extern cudaStatusInfo g_status; -extern cudaStatusInfo tl_status; -#if defined(RAJA_ENABLE_OPENMP) -#pragma omp threadprivate(tl_status) -#endif +thread_local extern cudaStatusInfo tl_status; // stream to synchronization status: true synchronized, false running extern std::unordered_map g_stream_info_map; @@ -206,9 +202,7 @@ void synchronize_impl(::RAJA::resources::Cuda res) { res.wait(); } RAJA_INLINE void synchronize() { -#if defined(RAJA_ENABLE_OPENMP) - lock_guard lock(detail::g_status.lock); -#endif + std::lock_guard lock(detail::g_status.lock); bool synchronize = false; for (auto& val : detail::g_stream_info_map) { @@ -228,9 +222,7 @@ void synchronize() RAJA_INLINE void synchronize(::RAJA::resources::Cuda res) { -#if defined(RAJA_ENABLE_OPENMP) - lock_guard lock(detail::g_status.lock); -#endif + std::lock_guard lock(detail::g_status.lock); auto iter = detail::g_stream_info_map.find(res.get_stream()); if (iter != detail::g_stream_info_map.end()) { @@ -250,9 +242,7 @@ void synchronize(::RAJA::resources::Cuda res) RAJA_INLINE void launch(::RAJA::resources::Cuda res, bool async = true) { -#if defined(RAJA_ENABLE_OPENMP) - lock_guard lock(detail::g_status.lock); -#endif + std::lock_guard lock(detail::g_status.lock); auto iter = detail::g_stream_info_map.find(res.get_stream()); if (iter != detail::g_stream_info_map.end()) { diff --git a/include/RAJA/policy/cuda/atomic.hpp b/include/RAJA/policy/cuda/atomic.hpp index dfa148320a..2c7b38d727 100644 --- a/include/RAJA/policy/cuda/atomic.hpp +++ b/include/RAJA/policy/cuda/atomic.hpp @@ -38,7 +38,8 @@ #include "RAJA/policy/sequential/atomic.hpp" #include "RAJA/policy/atomic_builtin.hpp" -#if defined(RAJA_ENABLE_OPENMP) + +#if defined(RAJA_OPENMP_ACTIVE) #include "RAJA/policy/openmp/atomic.hpp" #endif diff --git a/include/RAJA/policy/cuda/forall.hpp b/include/RAJA/policy/cuda/forall.hpp index cfa2c953d5..2af88ca941 100644 --- a/include/RAJA/policy/cuda/forall.hpp +++ b/include/RAJA/policy/cuda/forall.hpp @@ -390,23 +390,13 @@ __launch_bounds__(BlockSize, BlocksPerSM) __global__ auto privatizer = thread_privatize(loop_body); auto& body = privatizer.get_priv(); auto ii = IterationGetter::template index(); - constexpr bool is_forall_parampack_empty = - RAJA::expt::type_traits::is_ForallParamPack_empty::value; + if (ii < length) { - if constexpr (is_forall_parampack_empty) - { - body(idx[ii]); - } - else - { - RAJA::expt::invoke_body(f_params, body, idx[ii]); - } - } - if constexpr (!is_forall_parampack_empty) - { - RAJA::expt::ParamMultiplexer::parampack_combine(EXEC_POL {}, f_params); + RAJA::expt::invoke_body(f_params, body, idx[ii]); } + + RAJA::expt::ParamMultiplexer::parampack_combine(EXEC_POL {}, f_params); } /// @@ -432,23 +422,13 @@ __global__ void forallp_cuda_kernel( auto privatizer = thread_privatize(loop_body); auto& body = privatizer.get_priv(); auto ii = IterationGetter::template index(); - constexpr bool is_forall_parampack_empty = - RAJA::expt::type_traits::is_ForallParamPack_empty::value; + if (ii < length) { - if constexpr (is_forall_parampack_empty) - { - body(idx[ii]); - } - else - { - RAJA::expt::invoke_body(f_params, body, idx[ii]); - } - } - if constexpr (!is_forall_parampack_empty) - { - RAJA::expt::ParamMultiplexer::parampack_combine(EXEC_POL {}, f_params); + RAJA::expt::invoke_body(f_params, body, idx[ii]); } + + RAJA::expt::ParamMultiplexer::parampack_combine(EXEC_POL {}, f_params); } /// @@ -476,24 +456,14 @@ __launch_bounds__(BlockSize, BlocksPerSM) __global__ using RAJA::internal::thread_privatize; auto privatizer = thread_privatize(loop_body); auto& body = privatizer.get_priv(); - constexpr bool is_forall_parampack_empty = - RAJA::expt::type_traits::is_ForallParamPack_empty::value; + for (auto ii = IterationGetter::template index(); ii < length; ii += IterationGetter::template size()) { - if constexpr (is_forall_parampack_empty) - { - body(idx[ii]); - } - else - { - RAJA::expt::invoke_body(f_params, body, idx[ii]); - } - } - if constexpr (!is_forall_parampack_empty) - { - RAJA::expt::ParamMultiplexer::parampack_combine(EXEC_POL {}, f_params); + RAJA::expt::invoke_body(f_params, body, idx[ii]); } + + RAJA::expt::ParamMultiplexer::parampack_combine(EXEC_POL {}, f_params); } /// @@ -521,24 +491,14 @@ __global__ void forallp_cuda_kernel( using RAJA::internal::thread_privatize; auto privatizer = thread_privatize(loop_body); auto& body = privatizer.get_priv(); - constexpr bool is_forall_parampack_empty = - RAJA::expt::type_traits::is_ForallParamPack_empty::value; + for (auto ii = IterationGetter::template index(); ii < length; ii += IterationGetter::template size()) { - if constexpr (is_forall_parampack_empty) - { - body(idx[ii]); - } - else - { - RAJA::expt::invoke_body(f_params, body, idx[ii]); - } - } - if constexpr (!is_forall_parampack_empty) - { - RAJA::expt::ParamMultiplexer::parampack_combine(EXEC_POL {}, f_params); + RAJA::expt::invoke_body(f_params, body, idx[ii]); } + + RAJA::expt::ParamMultiplexer::parampack_combine(EXEC_POL {}, f_params); } } // namespace impl @@ -590,8 +550,6 @@ forall_impl(resources::Cuda cuda_res, Iterator begin = std::begin(iter); Iterator end = std::end(iter); IndexType len = std::distance(begin, end); - constexpr bool is_forallparampack_empty = - RAJA::expt::type_traits::is_ForallParamPack_empty::value; // Only launch kernel if we have something to iterate over if (len > 0) @@ -620,11 +578,7 @@ forall_impl(resources::Cuda cuda_res, launch_info.res = cuda_res; { - if constexpr (!is_forallparampack_empty) - { - RAJA::expt::ParamMultiplexer::parampack_init(pol, f_params, - launch_info); - } + RAJA::expt::ParamMultiplexer::parampack_init(pol, f_params, launch_info); // // Privatize the loop_body, using make_launch_body to setup reductions @@ -641,11 +595,8 @@ forall_impl(resources::Cuda cuda_res, RAJA::cuda::launch(func, dims.blocks, dims.threads, args, shmem, cuda_res, Async); - if constexpr (!is_forallparampack_empty) - { - RAJA::expt::ParamMultiplexer::parampack_resolve(pol, f_params, - launch_info); - } + RAJA::expt::ParamMultiplexer::parampack_resolve(pol, f_params, + launch_info); } RAJA_FT_END; diff --git a/include/RAJA/policy/cuda/launch.hpp b/include/RAJA/policy/cuda/launch.hpp index 32e4920731..d9cca09216 100644 --- a/include/RAJA/policy/cuda/launch.hpp +++ b/include/RAJA/policy/cuda/launch.hpp @@ -28,22 +28,6 @@ namespace RAJA { -template -__global__ void launch_global_fcn(const RAJA_CUDA_GRID_CONSTANT BODY body_in) -{ - LaunchContext ctx; - - using RAJA::internal::thread_privatize; - auto privatizer = thread_privatize(body_in); - auto& body = privatizer.get_priv(); - - // Set pointer to shared memory - extern __shared__ char raja_shmem_ptr[]; - ctx.shared_mem_ptr = raja_shmem_ptr; - - body(ctx); -} - template __global__ void launch_new_reduce_global_fcn(const RAJA_CUDA_GRID_CONSTANT BODY body_in, @@ -76,71 +60,7 @@ struct LaunchExecute< template static concepts::enable_if_t< resources::EventProxy, - RAJA::expt::type_traits::is_ForallParamPack, - RAJA::expt::type_traits::is_ForallParamPack_empty> - exec(RAJA::resources::Resource res, - const LaunchParams& params, - BODY_IN&& body_in, - ReduceParams& RAJA_UNUSED_ARG(launch_reducers)) - { - using BODY = camp::decay; - - auto func = reinterpret_cast(&launch_global_fcn); - - resources::Cuda cuda_res = res.get(); - - // - // Compute the number of blocks and threads - // - - cuda_dim_t gridSize {static_cast(params.teams.value[0]), - static_cast(params.teams.value[1]), - static_cast(params.teams.value[2])}; - - cuda_dim_t blockSize { - static_cast(params.threads.value[0]), - static_cast(params.threads.value[1]), - static_cast(params.threads.value[2])}; - - // Only launch kernel if we have something to iterate over - constexpr cuda_dim_member_t zero = 0; - if (gridSize.x > zero && gridSize.y > zero && gridSize.z > zero && - blockSize.x > zero && blockSize.y > zero && blockSize.z > zero) - { - - RAJA_FT_BEGIN; - - { - size_t shared_mem_size = params.shared_mem_size; - - // - // Privatize the loop_body, using make_launch_body to setup reductions - // - BODY body = RAJA::cuda::make_launch_body( - func, gridSize, blockSize, shared_mem_size, cuda_res, - std::forward(body_in)); - - // - // Launch the kernel - // - void* args[] = {(void*)&body}; - RAJA::cuda::launch(func, gridSize, blockSize, args, shared_mem_size, - cuda_res, async); - } - - RAJA_FT_END; - } - - return resources::EventProxy(res); - } - - // Version with explicit reduction parameters.. - template - static concepts::enable_if_t< - resources::EventProxy, - RAJA::expt::type_traits::is_ForallParamPack, - concepts::negate< - RAJA::expt::type_traits::is_ForallParamPack_empty>> + RAJA::expt::type_traits::is_ForallParamPack> exec(RAJA::resources::Resource res, const LaunchParams& launch_params, BODY_IN&& body_in, @@ -214,23 +134,6 @@ struct LaunchExecute< } }; -template -__launch_bounds__(num_threads, BLOCKS_PER_SM) __global__ - void launch_global_fcn_fixed(const RAJA_CUDA_GRID_CONSTANT BODY body_in) -{ - LaunchContext ctx; - - using RAJA::internal::thread_privatize; - auto privatizer = thread_privatize(body_in); - auto& body = privatizer.get_priv(); - - // Set pointer to shared memory - extern __shared__ char raja_shmem_ptr[]; - ctx.shared_mem_ptr = raja_shmem_ptr; - - body(ctx); -} - template static concepts::enable_if_t< resources::EventProxy, - RAJA::expt::type_traits::is_ForallParamPack, - RAJA::expt::type_traits::is_ForallParamPack_empty> - exec(RAJA::resources::Resource res, - const LaunchParams& params, - BODY_IN&& body_in, - ReduceParams& RAJA_UNUSED_ARG(launch_reducers)) - { - using BODY = camp::decay; - - auto func = reinterpret_cast( - &launch_global_fcn_fixed); - - resources::Cuda cuda_res = res.get(); - - // - // Compute the number of blocks and threads - // - - cuda_dim_t gridSize {static_cast(params.teams.value[0]), - static_cast(params.teams.value[1]), - static_cast(params.teams.value[2])}; - - cuda_dim_t blockSize { - static_cast(params.threads.value[0]), - static_cast(params.threads.value[1]), - static_cast(params.threads.value[2])}; - - // Only launch kernel if we have something to iterate over - constexpr cuda_dim_member_t zero = 0; - if (gridSize.x > zero && gridSize.y > zero && gridSize.z > zero && - blockSize.x > zero && blockSize.y > zero && blockSize.z > zero) - { - - RAJA_FT_BEGIN; - - { - size_t shared_mem_size = params.shared_mem_size; - - // - // Privatize the loop_body, using make_launch_body to setup reductions - // - BODY body = RAJA::cuda::make_launch_body( - func, gridSize, blockSize, shared_mem_size, cuda_res, - std::forward(body_in)); - - // - // Launch the kernel - // - void* args[] = {(void*)&body}; - RAJA::cuda::launch(func, gridSize, blockSize, args, shared_mem_size, - cuda_res, async); - } - - RAJA_FT_END; - } - - return resources::EventProxy(res); - } - - // Version with explicit reduction parameters.. - template - static concepts::enable_if_t< - resources::EventProxy, - RAJA::expt::type_traits::is_ForallParamPack, - concepts::negate< - RAJA::expt::type_traits::is_ForallParamPack_empty>> + RAJA::expt::type_traits::is_ForallParamPack> exec(RAJA::resources::Resource res, const LaunchParams& launch_params, BODY_IN&& body_in, diff --git a/include/RAJA/policy/cuda/multi_reduce.hpp b/include/RAJA/policy/cuda/multi_reduce.hpp index 4aad6e6f0b..f4729d5dff 100644 --- a/include/RAJA/policy/cuda/multi_reduce.hpp +++ b/include/RAJA/policy/cuda/multi_reduce.hpp @@ -27,14 +27,15 @@ #include #include +#include #include #include + #include #include "RAJA/util/macros.hpp" #include "RAJA/util/math.hpp" -#include "RAJA/util/mutex.hpp" #include "RAJA/util/types.hpp" #include "RAJA/util/reduce.hpp" #include "RAJA/util/OffsetOperators.hpp" @@ -51,6 +52,8 @@ #include "RAJA/policy/cuda/atomic.hpp" #endif +#include "RAJA/pattern/thread.hpp" + #include "RAJA/policy/cuda/policy.hpp" #include "RAJA/policy/cuda/raja_cudaerrchk.hpp" @@ -204,7 +207,10 @@ RAJA_DEVICE RAJA_INLINE void grid_multi_reduce_shmem_to_global_atomic( // //! MultiReduction data for Cuda Offload -- stores value, host pointer -template +template struct MultiReduceGridAtomicHostInit_TallyData { //! setup permanent settings, allocate and initialize tally memory @@ -319,10 +325,7 @@ struct MultiReduceGridAtomicHostInit_TallyData static int get_tally_replication() { - int min_tally_replication = 1; -#if defined(RAJA_ENABLE_OPENMP) - min_tally_replication = omp_get_max_threads(); -#endif + int min_tally_replication = RAJA::get_max_threads(); struct { @@ -410,7 +413,10 @@ struct MultiReduceGridAtomicHostInit_TallyData }; //! MultiReduction data for Cuda Offload -- stores value, host pointer -template +template struct MultiReduceGridAtomicHostInit_Data : MultiReduceGridAtomicHostInit_TallyData { @@ -451,10 +457,7 @@ struct MultiReduceGridAtomicHostInit_Data //! combine value on host, combine a value into the tally void combine_host(int bin, T value) { - int tally_rep = 0; -#if defined(RAJA_ENABLE_OPENMP) - tally_rep = omp_get_thread_num(); -#endif + int tally_rep = RAJA::get_thread_num(); int tally_offset = GetTallyOffset {}(bin, m_tally_bins, tally_rep, m_tally_replication); Combiner {}(m_tally_mem[tally_offset], value); @@ -472,7 +475,10 @@ struct MultiReduceGridAtomicHostInit_Data }; //! MultiReduction data for Cuda Offload -- stores value, host pointer -template +template struct MultiReduceBlockThenGridAtomicHostInit_Data : MultiReduceGridAtomicHostInit_TallyData { @@ -597,10 +603,7 @@ struct MultiReduceBlockThenGridAtomicHostInit_Data //! combine value on host, combine a value into the tally void combine_host(int bin, T value) { - int tally_rep = 0; -#if defined(RAJA_ENABLE_OPENMP) - tally_rep = omp_get_thread_num(); -#endif + int tally_rep = RAJA::get_thread_num(); int tally_offset = GetTallyOffset {}(bin, m_tally_bins, tally_rep, m_tally_replication); Combiner {}(m_tally_mem[tally_offset], value); diff --git a/include/RAJA/policy/cuda/reduce.hpp b/include/RAJA/policy/cuda/reduce.hpp index 25de2a9a5c..2dd1ece75f 100644 --- a/include/RAJA/policy/cuda/reduce.hpp +++ b/include/RAJA/policy/cuda/reduce.hpp @@ -26,6 +26,7 @@ #if defined(RAJA_ENABLE_CUDA) #include +#include #include @@ -33,7 +34,6 @@ #include "RAJA/util/SoAArray.hpp" #include "RAJA/util/SoAPtr.hpp" #include "RAJA/util/basic_mempool.hpp" -#include "RAJA/util/mutex.hpp" #include "RAJA/util/types.hpp" #include "RAJA/util/reduce.hpp" @@ -600,9 +600,7 @@ class PinnedTally //! get new value for use in resource auto new_value(::RAJA::resources::Cuda res) -> T (&)[num_slots] { -#if defined(RAJA_ENABLE_OPENMP) - lock_guard lock(m_mutex); -#endif + std::lock_guard lock(m_mutex); ResourceNode* rn = resource_list; while (rn) { @@ -652,9 +650,7 @@ class PinnedTally ~PinnedTally() { free_list(); } -#if defined(RAJA_ENABLE_OPENMP) - omp::mutex m_mutex; -#endif + std::mutex m_mutex; private: ResourceNode* resource_list; @@ -1099,9 +1095,7 @@ class Reduce { if (val.value != val.identity) { -#if defined(RAJA_ENABLE_OPENMP) - lock_guard lock(tally_or_val_ptr.list->m_mutex); -#endif + std::lock_guard lock(tally_or_val_ptr.list->m_mutex); parent->combine(val.value); } } diff --git a/include/RAJA/policy/hip/MemUtils_HIP.hpp b/include/RAJA/policy/hip/MemUtils_HIP.hpp index 559cc640c8..9824498aff 100644 --- a/include/RAJA/policy/hip/MemUtils_HIP.hpp +++ b/include/RAJA/policy/hip/MemUtils_HIP.hpp @@ -27,11 +27,12 @@ #include #include #include +#include #include #include + #include "RAJA/util/basic_mempool.hpp" -#include "RAJA/util/mutex.hpp" #include "RAJA/util/types.hpp" #include "RAJA/util/macros.hpp" #include "RAJA/util/resource.hpp" @@ -175,17 +176,12 @@ struct hipInfo struct hipStatusInfo : hipInfo { -#if defined(RAJA_ENABLE_OPENMP) - omp::mutex lock; -#endif + std::mutex lock; }; extern hipStatusInfo g_status; -extern hipStatusInfo tl_status; -#if defined(RAJA_ENABLE_OPENMP) -#pragma omp threadprivate(tl_status) -#endif +thread_local extern hipStatusInfo tl_status; // stream to synchronization status: true synchronized, false running extern std::unordered_map g_stream_info_map; @@ -199,9 +195,7 @@ void synchronize_impl(::RAJA::resources::Hip res) { res.wait(); } RAJA_INLINE void synchronize() { -#if defined(RAJA_ENABLE_OPENMP) - lock_guard lock(detail::g_status.lock); -#endif + std::lock_guard lock(detail::g_status.lock); bool synchronize = false; for (auto& val : detail::g_stream_info_map) { @@ -221,9 +215,7 @@ void synchronize() RAJA_INLINE void synchronize(::RAJA::resources::Hip res) { -#if defined(RAJA_ENABLE_OPENMP) - lock_guard lock(detail::g_status.lock); -#endif + std::lock_guard lock(detail::g_status.lock); auto iter = detail::g_stream_info_map.find(res.get_stream()); if (iter != detail::g_stream_info_map.end()) { @@ -243,9 +235,7 @@ void synchronize(::RAJA::resources::Hip res) RAJA_INLINE void launch(::RAJA::resources::Hip res, bool async = true) { -#if defined(RAJA_ENABLE_OPENMP) - lock_guard lock(detail::g_status.lock); -#endif + std::lock_guard lock(detail::g_status.lock); auto iter = detail::g_stream_info_map.find(res.get_stream()); if (iter != detail::g_stream_info_map.end()) { diff --git a/include/RAJA/policy/hip/atomic.hpp b/include/RAJA/policy/hip/atomic.hpp index 5d0a91e140..18a510de09 100644 --- a/include/RAJA/policy/hip/atomic.hpp +++ b/include/RAJA/policy/hip/atomic.hpp @@ -31,7 +31,8 @@ #include "RAJA/policy/sequential/atomic.hpp" #include "RAJA/policy/atomic_builtin.hpp" -#if defined(RAJA_ENABLE_OPENMP) + +#if defined(RAJA_OPENMP_ACTIVE) #include "RAJA/policy/openmp/atomic.hpp" #endif diff --git a/include/RAJA/policy/hip/forall.hpp b/include/RAJA/policy/hip/forall.hpp index f421eb5038..6f098a9a11 100644 --- a/include/RAJA/policy/hip/forall.hpp +++ b/include/RAJA/policy/hip/forall.hpp @@ -383,26 +383,15 @@ __launch_bounds__(BlockSize, 1) __global__ auto privatizer = thread_privatize(loop_body); auto& body = privatizer.get_priv(); auto ii = IterationGetter::template index(); - constexpr bool is_forall_parampack_empty = - RAJA::expt::type_traits::is_ForallParamPack_empty::value; + if (ii < length) { - if constexpr (is_forall_parampack_empty) - { - body(idx[ii]); - } - else - { - RAJA::expt::invoke_body(f_params, body, idx[ii]); - } - } - if constexpr (!is_forall_parampack_empty) - { - RAJA::expt::ParamMultiplexer::parampack_combine(EXEC_POL {}, f_params); + RAJA::expt::invoke_body(f_params, body, idx[ii]); } + + RAJA::expt::ParamMultiplexer::parampack_combine(EXEC_POL {}, f_params); } -/// template(); - constexpr bool is_forall_parampack_empty = - RAJA::expt::type_traits::is_ForallParamPack_empty::value; + ; if (ii < length) { - if constexpr (is_forall_parampack_empty) - { - body(idx[ii]); - } - else - { - RAJA::expt::invoke_body(f_params, body, idx[ii]); - } - } - if constexpr (!is_forall_parampack_empty) - { - RAJA::expt::ParamMultiplexer::parampack_combine(EXEC_POL {}, f_params); + RAJA::expt::invoke_body(f_params, body, idx[ii]); } + RAJA::expt::ParamMultiplexer::parampack_combine(EXEC_POL {}, f_params); } -/// template< typename EXEC_POL, typename Iterator, @@ -466,27 +443,15 @@ __launch_bounds__(BlockSize, 1) __global__ using RAJA::internal::thread_privatize; auto privatizer = thread_privatize(loop_body); auto& body = privatizer.get_priv(); - constexpr bool is_forall_parampack_empty = - RAJA::expt::type_traits::is_ForallParamPack_empty::value; + for (auto ii = IterationGetter::template index(); ii < length; ii += IterationGetter::template size()) { - if constexpr (is_forall_parampack_empty) - { - body(idx[ii]); - } - else - { - RAJA::expt::invoke_body(f_params, body, idx[ii]); - } - } - if constexpr (!is_forall_parampack_empty) - { - RAJA::expt::ParamMultiplexer::parampack_combine(EXEC_POL {}, f_params); + RAJA::expt::invoke_body(f_params, body, idx[ii]); } + RAJA::expt::ParamMultiplexer::parampack_combine(EXEC_POL {}, f_params); } -/// template< typename EXEC_POL, typename Iterator, @@ -509,24 +474,15 @@ __global__ void forallp_hip_kernel(const LOOP_BODY loop_body, using RAJA::internal::thread_privatize; auto privatizer = thread_privatize(loop_body); auto& body = privatizer.get_priv(); - constexpr bool is_forall_parampack_empty = - RAJA::expt::type_traits::is_ForallParamPack_empty::value; + for (auto ii = IterationGetter::template index(); ii < length; ii += IterationGetter::template size()) { - if constexpr (is_forall_parampack_empty) - { - body(idx[ii]); - } - else - { - RAJA::expt::invoke_body(f_params, body, idx[ii]); - } - } - if constexpr (!is_forall_parampack_empty) - { - RAJA::expt::ParamMultiplexer::parampack_combine(EXEC_POL {}, f_params); + + RAJA::expt::invoke_body(f_params, body, idx[ii]); } + + RAJA::expt::ParamMultiplexer::parampack_combine(EXEC_POL {}, f_params); } } // namespace impl @@ -576,8 +532,7 @@ forall_impl(resources::Hip hip_res, Iterator begin = std::begin(iter); Iterator end = std::end(iter); IndexType len = std::distance(begin, end); - constexpr bool is_forallparampack_empty = - RAJA::expt::type_traits::is_ForallParamPack_empty::value; + // Only launch kernel if we have something to iterate over if (len > 0) { @@ -605,11 +560,7 @@ forall_impl(resources::Hip hip_res, launch_info.res = hip_res; { - if constexpr (!is_forallparampack_empty) - { - RAJA::expt::ParamMultiplexer::parampack_init(pol, f_params, - launch_info); - } + RAJA::expt::ParamMultiplexer::parampack_init(pol, f_params, launch_info); // // Privatize the loop_body, using make_launch_body to setup reductions @@ -625,11 +576,9 @@ forall_impl(resources::Hip hip_res, (void*)&f_params}; RAJA::hip::launch(func, dims.blocks, dims.threads, args, shmem, hip_res, Async); - if constexpr (!is_forallparampack_empty) - { - RAJA::expt::ParamMultiplexer::parampack_resolve(pol, f_params, - launch_info); - } + + RAJA::expt::ParamMultiplexer::parampack_resolve(pol, f_params, + launch_info); } RAJA_FT_END; diff --git a/include/RAJA/policy/hip/launch.hpp b/include/RAJA/policy/hip/launch.hpp index 040386a324..f3ae8f87c1 100644 --- a/include/RAJA/policy/hip/launch.hpp +++ b/include/RAJA/policy/hip/launch.hpp @@ -28,22 +28,6 @@ namespace RAJA { -template -__global__ void launch_global_fcn(const BODY body_in) -{ - LaunchContext ctx; - - using RAJA::internal::thread_privatize; - auto privatizer = thread_privatize(body_in); - auto& body = privatizer.get_priv(); - - // Set pointer to shared memory - extern __shared__ char raja_shmem_ptr[]; - ctx.shared_mem_ptr = raja_shmem_ptr; - - body(ctx); -} - template __global__ void launch_new_reduce_global_fcn(const BODY body_in, ReduceParams reduce_params) @@ -73,71 +57,7 @@ struct LaunchExecute< template static concepts::enable_if_t< resources::EventProxy, - RAJA::expt::type_traits::is_ForallParamPack, - RAJA::expt::type_traits::is_ForallParamPack_empty> - exec(RAJA::resources::Resource res, - const LaunchParams& params, - BODY_IN&& body_in, - ReduceParams& RAJA_UNUSED_ARG(launch_reducers)) - { - using BODY = camp::decay; - - auto func = reinterpret_cast(&launch_global_fcn); - - resources::Hip hip_res = res.get(); - - // - // Compute the number of blocks and threads - // - - hip_dim_t gridSize {static_cast(params.teams.value[0]), - static_cast(params.teams.value[1]), - static_cast(params.teams.value[2])}; - - hip_dim_t blockSize { - static_cast(params.threads.value[0]), - static_cast(params.threads.value[1]), - static_cast(params.threads.value[2])}; - - // Only launch kernel if we have something to iterate over - constexpr hip_dim_member_t zero = 0; - if (gridSize.x > zero && gridSize.y > zero && gridSize.z > zero && - blockSize.x > zero && blockSize.y > zero && blockSize.z > zero) - { - - RAJA_FT_BEGIN; - - { - size_t shared_mem_size = params.shared_mem_size; - - // - // Privatize the loop_body, using make_launch_body to setup reductions - // - BODY body = RAJA::hip::make_launch_body(func, gridSize, blockSize, - shared_mem_size, hip_res, - std::forward(body_in)); - - // - // Launch the kernel - // - void* args[] = {(void*)&body}; - RAJA::hip::launch(func, gridSize, blockSize, args, shared_mem_size, - hip_res, async); - } - - RAJA_FT_END; - } - - return resources::EventProxy(res); - } - - // Version with explicit reduction parameters.. - template - static concepts::enable_if_t< - resources::EventProxy, - RAJA::expt::type_traits::is_ForallParamPack, - concepts::negate< - RAJA::expt::type_traits::is_ForallParamPack_empty>> + RAJA::expt::type_traits::is_ForallParamPack> exec(RAJA::resources::Resource res, const LaunchParams& launch_params, BODY_IN&& body_in, @@ -212,23 +132,6 @@ struct LaunchExecute< } }; -template -__launch_bounds__(num_threads, 1) __global__ - void launch_global_fcn_fixed(const BODY body_in) -{ - LaunchContext ctx; - - using RAJA::internal::thread_privatize; - auto privatizer = thread_privatize(body_in); - auto& body = privatizer.get_priv(); - - // Set pointer to shared memory - extern __shared__ char raja_shmem_ptr[]; - ctx.shared_mem_ptr = raja_shmem_ptr; - - body(ctx); -} - template __launch_bounds__(num_threads, 1) __global__ void launch_new_reduce_global_fcn_fixed(const BODY body_in, @@ -258,72 +161,7 @@ struct LaunchExecute> template static concepts::enable_if_t< resources::EventProxy, - RAJA::expt::type_traits::is_ForallParamPack, - RAJA::expt::type_traits::is_ForallParamPack_empty> - exec(RAJA::resources::Resource res, - const LaunchParams& params, - BODY_IN&& body_in, - ReduceParams& RAJA_UNUSED_ARG(launch_reducers)) - { - using BODY = camp::decay; - - auto func = - reinterpret_cast(&launch_global_fcn_fixed); - - resources::Hip hip_res = res.get(); - - // - // Compute the number of blocks and threads - // - - hip_dim_t gridSize {static_cast(params.teams.value[0]), - static_cast(params.teams.value[1]), - static_cast(params.teams.value[2])}; - - hip_dim_t blockSize { - static_cast(params.threads.value[0]), - static_cast(params.threads.value[1]), - static_cast(params.threads.value[2])}; - - // Only launch kernel if we have something to iterate over - constexpr hip_dim_member_t zero = 0; - if (gridSize.x > zero && gridSize.y > zero && gridSize.z > zero && - blockSize.x > zero && blockSize.y > zero && blockSize.z > zero) - { - - RAJA_FT_BEGIN; - - { - size_t shared_mem_size = params.shared_mem_size; - - // - // Privatize the loop_body, using make_launch_body to setup reductions - // - BODY body = RAJA::hip::make_launch_body(func, gridSize, blockSize, - shared_mem_size, hip_res, - std::forward(body_in)); - - // - // Launch the kernel - // - void* args[] = {(void*)&body}; - RAJA::hip::launch(func, gridSize, blockSize, args, shared_mem_size, - hip_res, async); - } - - RAJA_FT_END; - } - - return resources::EventProxy(res); - } - - // Version with explicit reduction parameters.. - template - static concepts::enable_if_t< - resources::EventProxy, - RAJA::expt::type_traits::is_ForallParamPack, - concepts::negate< - RAJA::expt::type_traits::is_ForallParamPack_empty>> + RAJA::expt::type_traits::is_ForallParamPack> exec(RAJA::resources::Resource res, const LaunchParams& launch_params, BODY_IN&& body_in, diff --git a/include/RAJA/policy/hip/multi_reduce.hpp b/include/RAJA/policy/hip/multi_reduce.hpp index e374191324..41f0772764 100644 --- a/include/RAJA/policy/hip/multi_reduce.hpp +++ b/include/RAJA/policy/hip/multi_reduce.hpp @@ -27,14 +27,15 @@ #include #include +#include #include #include + #include "hip/hip_runtime.h" #include "RAJA/util/macros.hpp" #include "RAJA/util/math.hpp" -#include "RAJA/util/mutex.hpp" #include "RAJA/util/types.hpp" #include "RAJA/util/reduce.hpp" #include "RAJA/util/OffsetOperators.hpp" @@ -54,6 +55,8 @@ #include "RAJA/policy/hip/policy.hpp" #include "RAJA/policy/hip/raja_hiperrchk.hpp" +#include "RAJA/pattern/thread.hpp" + namespace RAJA { @@ -204,7 +207,10 @@ RAJA_DEVICE RAJA_INLINE void grid_multi_reduce_shmem_to_global_atomic( // //! MultiReduction data for Hip Offload -- stores value, host pointer -template +template struct MultiReduceGridAtomicHostInit_TallyData { //! setup permanent settings, allocate and initialize tally memory @@ -319,10 +325,7 @@ struct MultiReduceGridAtomicHostInit_TallyData static int get_tally_replication() { - int min_tally_replication = 1; -#if defined(RAJA_ENABLE_OPENMP) - min_tally_replication = omp_get_max_threads(); -#endif + int min_tally_replication = RAJA::get_max_threads(); struct { @@ -410,7 +413,10 @@ struct MultiReduceGridAtomicHostInit_TallyData }; //! MultiReduction data for Hip Offload -- stores value, host pointer -template +template struct MultiReduceGridAtomicHostInit_Data : MultiReduceGridAtomicHostInit_TallyData { @@ -451,10 +457,7 @@ struct MultiReduceGridAtomicHostInit_Data //! combine value on host, combine a value into the tally void combine_host(int bin, T value) { - int tally_rep = 0; -#if defined(RAJA_ENABLE_OPENMP) - tally_rep = omp_get_thread_num(); -#endif + int tally_rep = RAJA::get_thread_num(); int tally_offset = GetTallyOffset {}(bin, m_tally_bins, tally_rep, m_tally_replication); Combiner {}(m_tally_mem[tally_offset], value); @@ -472,7 +475,10 @@ struct MultiReduceGridAtomicHostInit_Data }; //! MultiReduction data for Hip Offload -- stores value, host pointer -template +template struct MultiReduceBlockThenGridAtomicHostInit_Data : MultiReduceGridAtomicHostInit_TallyData { @@ -597,10 +603,7 @@ struct MultiReduceBlockThenGridAtomicHostInit_Data //! combine value on host, combine a value into the tally void combine_host(int bin, T value) { - int tally_rep = 0; -#if defined(RAJA_ENABLE_OPENMP) - tally_rep = omp_get_thread_num(); -#endif + int tally_rep = RAJA::get_thread_num(); int tally_offset = GetTallyOffset {}(bin, m_tally_bins, tally_rep, m_tally_replication); Combiner {}(m_tally_mem[tally_offset], value); diff --git a/include/RAJA/policy/hip/reduce.hpp b/include/RAJA/policy/hip/reduce.hpp index f67ba380e2..f2f15468ae 100644 --- a/include/RAJA/policy/hip/reduce.hpp +++ b/include/RAJA/policy/hip/reduce.hpp @@ -26,6 +26,7 @@ #if defined(RAJA_ENABLE_HIP) #include +#include #include @@ -33,7 +34,6 @@ #include "RAJA/util/SoAArray.hpp" #include "RAJA/util/SoAPtr.hpp" #include "RAJA/util/basic_mempool.hpp" -#include "RAJA/util/mutex.hpp" #include "RAJA/util/types.hpp" #include "RAJA/util/reduce.hpp" @@ -593,9 +593,7 @@ class PinnedTally //! get new value for use in resource auto new_value(::RAJA::resources::Hip res) -> T (&)[num_slots] { -#if defined(RAJA_ENABLE_OPENMP) - lock_guard lock(m_mutex); -#endif + std::lock_guard lock(m_mutex); ResourceNode* rn = resource_list; while (rn) { @@ -645,9 +643,7 @@ class PinnedTally ~PinnedTally() { free_list(); } -#if defined(RAJA_ENABLE_OPENMP) - omp::mutex m_mutex; -#endif + std::mutex m_mutex; private: ResourceNode* resource_list; @@ -1091,9 +1087,7 @@ class Reduce { if (val.value != val.identity) { -#if defined(RAJA_ENABLE_OPENMP) - lock_guard lock(tally_or_val_ptr.list->m_mutex); -#endif + std::lock_guard lock(tally_or_val_ptr.list->m_mutex); parent->combine(val.value); } } diff --git a/include/RAJA/policy/openmp.hpp b/include/RAJA/policy/openmp.hpp index 5fe73ceff0..21f760d516 100644 --- a/include/RAJA/policy/openmp.hpp +++ b/include/RAJA/policy/openmp.hpp @@ -42,6 +42,7 @@ #include "RAJA/policy/openmp/scan.hpp" #include "RAJA/policy/openmp/sort.hpp" #include "RAJA/policy/openmp/synchronize.hpp" +#include "RAJA/policy/openmp/thread.hpp" #include "RAJA/policy/openmp/launch.hpp" #include "RAJA/policy/openmp/WorkGroup.hpp" #include "RAJA/policy/openmp/params/reduce.hpp" diff --git a/include/RAJA/policy/openmp/forall.hpp b/include/RAJA/policy/openmp/forall.hpp index 5887bc20bb..27b3b9760e 100644 --- a/include/RAJA/policy/openmp/forall.hpp +++ b/include/RAJA/policy/openmp/forall.hpp @@ -33,7 +33,7 @@ #include "RAJA/util/types.hpp" #include "RAJA/internal/fault_tolerance.hpp" -#include "RAJA/pattern/kernel/type-traits.hpp" +#include "RAJA/pattern/kernel/TypeTraits.hpp" #include "RAJA/index/IndexSet.hpp" #include "RAJA/index/ListSegment.hpp" diff --git a/include/RAJA/policy/openmp/launch.hpp b/include/RAJA/policy/openmp/launch.hpp index 2092c87bb3..2ba8066fe9 100644 --- a/include/RAJA/policy/openmp/launch.hpp +++ b/include/RAJA/policy/openmp/launch.hpp @@ -28,68 +28,69 @@ template<> struct LaunchExecute { - template - static concepts::enable_if_t< - resources::EventProxy, - RAJA::expt::type_traits::is_ForallParamPack, - RAJA::expt::type_traits::is_ForallParamPack_empty> - exec(RAJA::resources::Resource res, - LaunchParams const& params, - BODY const& body, - ReduceParams& RAJA_UNUSED_ARG(launch_reducers)) - { - RAJA::region([&]() { - LaunchContext ctx; - - using RAJA::internal::thread_privatize; - auto loop_body = thread_privatize(body); - - ctx.shared_mem_ptr = (char*)malloc(params.shared_mem_size); - - loop_body.get_priv()(ctx); - - free(ctx.shared_mem_ptr); - ctx.shared_mem_ptr = nullptr; - }); - - return resources::EventProxy(res); - } template static concepts::enable_if_t< resources::EventProxy, - RAJA::expt::type_traits::is_ForallParamPack, - concepts::negate< - RAJA::expt::type_traits::is_ForallParamPack_empty>> + RAJA::expt::type_traits::is_ForallParamPack> exec(RAJA::resources::Resource res, LaunchParams const& launch_params, BODY const& body, ReduceParams& f_params) { + using RAJA::internal::thread_privatize; + constexpr bool has_reducers = + !RAJA::expt::type_traits::is_ForallParamPack_empty::value; using EXEC_POL = RAJA::omp_launch_t; EXEC_POL pol {}; + using BodyType = decltype(thread_privatize(body)); - expt::ParamMultiplexer::parampack_init(pol, f_params); - - // reducer object must be named f_params as expected by macro below - RAJA_OMP_DECLARE_REDUCTION_COMBINE; - -#pragma omp parallel reduction(combine : f_params) - { - + auto parallel_section = [&](ReduceParams& f_params, auto func) { LaunchContext ctx; - - using RAJA::internal::thread_privatize; auto loop_body = thread_privatize(body); + static_assert(std::is_invocable::value, + "Internal RAJA error: Check the parallel kernel passed to " + "OpenMP Parallel section in openmp/launch.hpp"); ctx.shared_mem_ptr = (char*)malloc(launch_params.shared_mem_size); - expt::invoke_body(f_params, loop_body.get_priv(), ctx); + func(f_params, loop_body, ctx); free(ctx.shared_mem_ptr); ctx.shared_mem_ptr = nullptr; - } + }; + // Init reducers if present + expt::ParamMultiplexer::parampack_init(pol, f_params); + + // reducer object must be named f_params as expected by macro below + if constexpr (has_reducers) + { + RAJA_OMP_DECLARE_REDUCTION_COMBINE; +#pragma omp parallel reduction(combine : f_params) + { + // This "extra lambda" has to be declared within the scope of the OpenMP + // pragma so that the reduction parameter pack it operates on is the + // version tracked by the combine OpenMP syntax + auto parallel_kernel = [&](ReduceParams& f_params, BodyType& body, + LaunchContext& ctx) { + expt::invoke_body(f_params, body.get_priv(), ctx); + }; + parallel_section(f_params, parallel_kernel); + } + } + else + { + RAJA::region([&]() { + auto parallel_kernel = [&](ReduceParams&, BodyType& body, + LaunchContext& ctx) { + body.get_priv()(ctx); + }; + parallel_section(f_params, parallel_kernel); + }); + } + // Resolve reducers if present expt::ParamMultiplexer::parampack_resolve(pol, f_params); return resources::EventProxy(res); diff --git a/include/RAJA/policy/openmp/params/forall.hpp b/include/RAJA/policy/openmp/params/forall.hpp index a80e6ff5af..0a99133ad6 100644 --- a/include/RAJA/policy/openmp/params/forall.hpp +++ b/include/RAJA/policy/openmp/params/forall.hpp @@ -64,8 +64,7 @@ template class ExecPol, int ChunkSize, typename ForallParam> RAJA_INLINE concepts::enable_if< - std::is_same, RAJA::policy::omp::Static>, - std::integral_constant> + std::is_same, RAJA::policy::omp::Static>> forall_impl(const ExecPol& p, Iterable&& iter, Func&& loop_body, @@ -83,48 +82,21 @@ forall_impl(const ExecPol& p, using RAJA::internal::thread_privatize; auto body = thread_privatize(loop_body); -#pragma omp for schedule(static) reduction(combine : f_params) - for (decltype(distance_it) i = 0; i < distance_it; ++i) + if constexpr (ChunkSize > 0) { - RAJA::expt::invoke_body(f_params, body.get_priv(), begin_it[i]); - } - } - - RAJA::expt::ParamMultiplexer::parampack_resolve(p, f_params); -} - -// -// omp for schedule(static, ChunkSize) -// -template class ExecPol, - typename Iterable, - typename Func, - int ChunkSize, - typename ForallParam> -RAJA_INLINE concepts::enable_if< - std::is_same, RAJA::policy::omp::Static>, - std::integral_constant 0)>> -forall_impl(const ExecPol& p, - Iterable&& iter, - Func&& loop_body, - ForallParam&& f_params) -{ - using EXEC_POL = camp::decay; - - RAJA::expt::ParamMultiplexer::parampack_init(p, f_params); - RAJA_OMP_DECLARE_REDUCTION_COMBINE; - - RAJA_EXTRACT_BED_IT(iter); -#pragma omp parallel - { - - using RAJA::internal::thread_privatize; - auto body = thread_privatize(loop_body); - #pragma omp for schedule(static, ChunkSize) reduction(combine : f_params) - for (decltype(distance_it) i = 0; i < distance_it; ++i) + for (decltype(distance_it) i = 0; i < distance_it; ++i) + { + RAJA::expt::invoke_body(f_params, body.get_priv(), begin_it[i]); + } + } + else { - RAJA::expt::invoke_body(f_params, body.get_priv(), begin_it[i]); +#pragma omp for schedule(static) reduction(combine : f_params) + for (decltype(distance_it) i = 0; i < distance_it; ++i) + { + RAJA::expt::invoke_body(f_params, body.get_priv(), begin_it[i]); + } } } @@ -193,49 +165,10 @@ RAJA_INLINE void forall_impl_nowait(const ::RAJA::policy::omp::Auto& p, RAJA::expt::ParamMultiplexer::parampack_resolve(p, f_params); } -// -// omp for schedule(dynamic) -// -template::type* = nullptr> -RAJA_INLINE void forall_impl(const ::RAJA::policy::omp::Dynamic& p, - Iterable&& iter, - Func&& loop_body, - ForallParam&& f_params) -{ - using EXEC_POL = camp::decay; - - RAJA::expt::ParamMultiplexer::parampack_init(p, f_params); - RAJA_OMP_DECLARE_REDUCTION_COMBINE; - - RAJA_EXTRACT_BED_IT(iter); -#pragma omp parallel - { - - using RAJA::internal::thread_privatize; - auto body = thread_privatize(loop_body); - -#pragma omp for schedule(dynamic) reduction(combine : f_params) - for (decltype(distance_it) i = 0; i < distance_it; ++i) - { - RAJA::expt::invoke_body(f_params, body.get_priv(), begin_it[i]); - } - } - - RAJA::expt::ParamMultiplexer::parampack_resolve(p, f_params); -} - // // omp for schedule(dynamic, ChunkSize) // -template 0)>::type* = nullptr> +template RAJA_INLINE void forall_impl(const ::RAJA::policy::omp::Dynamic& p, Iterable&& iter, Func&& loop_body, @@ -252,25 +185,32 @@ RAJA_INLINE void forall_impl(const ::RAJA::policy::omp::Dynamic& p, using RAJA::internal::thread_privatize; auto body = thread_privatize(loop_body); - + if constexpr (ChunkSize > 0) + { #pragma omp for schedule(dynamic, ChunkSize) reduction(combine : f_params) - for (decltype(distance_it) i = 0; i < distance_it; ++i) + for (decltype(distance_it) i = 0; i < distance_it; ++i) + { + RAJA::expt::invoke_body(f_params, body.get_priv(), begin_it[i]); + } + } + else { - RAJA::expt::invoke_body(f_params, body.get_priv(), begin_it[i]); +#pragma omp for schedule(dynamic) reduction(combine : f_params) + for (decltype(distance_it) i = 0; i < distance_it; ++i) + { + RAJA::expt::invoke_body(f_params, body.get_priv(), begin_it[i]); + } } } + RAJA::expt::ParamMultiplexer::parampack_resolve(p, f_params); } // // omp for schedule(guided) // -template::type* = nullptr> +template RAJA_INLINE void forall_impl(const ::RAJA::policy::omp::Guided& p, Iterable&& iter, Func&& loop_body, @@ -287,46 +227,21 @@ RAJA_INLINE void forall_impl(const ::RAJA::policy::omp::Guided& p, using RAJA::internal::thread_privatize; auto body = thread_privatize(loop_body); - -#pragma omp for schedule(guided) reduction(combine : f_params) - for (decltype(distance_it) i = 0; i < distance_it; ++i) + if constexpr (ChunkSize <= 0) { - RAJA::expt::invoke_body(f_params, body.get_priv(), begin_it[i]); +#pragma omp for schedule(guided) reduction(combine : f_params) + for (decltype(distance_it) i = 0; i < distance_it; ++i) + { + RAJA::expt::invoke_body(f_params, body.get_priv(), begin_it[i]); + } } - } - - RAJA::expt::ParamMultiplexer::parampack_resolve(p, f_params); -} - -// -// omp for schedule(guided, ChunkSize) -// -template 0)>::type* = nullptr> -RAJA_INLINE void forall_impl(const ::RAJA::policy::omp::Guided& p, - Iterable&& iter, - Func&& loop_body, - ForallParam&& f_params) -{ - using EXEC_POL = camp::decay; - - RAJA::expt::ParamMultiplexer::parampack_init(p, f_params); - RAJA_OMP_DECLARE_REDUCTION_COMBINE; - - RAJA_EXTRACT_BED_IT(iter); -#pragma omp parallel - { - - using RAJA::internal::thread_privatize; - auto body = thread_privatize(loop_body); - -#pragma omp for schedule(guided, ChunkSize) reduction(combine : f_params) - for (decltype(distance_it) i = 0; i < distance_it; ++i) + else { - RAJA::expt::invoke_body(f_params, body.get_priv(), begin_it[i]); +#pragma omp for schedule(guided, ChunkSize) reduction(combine : f_params) + for (decltype(distance_it) i = 0; i < distance_it; ++i) + { + RAJA::expt::invoke_body(f_params, body.get_priv(), begin_it[i]); + } } } @@ -336,11 +251,7 @@ RAJA_INLINE void forall_impl(const ::RAJA::policy::omp::Guided& p, // // omp for schedule(static) nowait // -template::type* = nullptr> +template RAJA_INLINE void forall_impl_nowait( const ::RAJA::policy::omp::Static& p, Iterable&& iter, @@ -358,53 +269,25 @@ RAJA_INLINE void forall_impl_nowait( using RAJA::internal::thread_privatize; auto body = thread_privatize(loop_body); - -#pragma omp for schedule(static) nowait reduction(combine : f_params) - for (decltype(distance_it) i = 0; i < distance_it; ++i) + if constexpr (ChunkSize <= 0) { - RAJA::expt::invoke_body(f_params, body.get_priv(), begin_it[i]); +#pragma omp for schedule(static) nowait reduction(combine : f_params) + for (decltype(distance_it) i = 0; i < distance_it; ++i) + { + RAJA::expt::invoke_body(f_params, body.get_priv(), begin_it[i]); + } } - } - - RAJA::expt::ParamMultiplexer::parampack_resolve(p, f_params); -} - -// -// omp for schedule(static, ChunkSize) nowait -// -template 0)>::type* = nullptr> -RAJA_INLINE void forall_impl_nowait( - const ::RAJA::policy::omp::Static& p, - Iterable&& iter, - Func&& loop_body, - ForallParam&& f_params) -{ - using EXEC_POL = camp::decay; - - RAJA::expt::ParamMultiplexer::parampack_init(p, f_params); - RAJA_OMP_DECLARE_REDUCTION_COMBINE; - - RAJA_EXTRACT_BED_IT(iter); -#pragma omp parallel - { - - using RAJA::internal::thread_privatize; - auto body = thread_privatize(loop_body); - -#pragma omp for schedule(static, ChunkSize) nowait reduction(combine : f_params) - for (decltype(distance_it) i = 0; i < distance_it; ++i) + else { - RAJA::expt::invoke_body(f_params, body.get_priv(), begin_it[i]); +#pragma omp for schedule(static, ChunkSize) nowait reduction(combine : f_params) + for (decltype(distance_it) i = 0; i < distance_it; ++i) + { + RAJA::expt::invoke_body(f_params, body.get_priv(), begin_it[i]); + } } } - RAJA::expt::ParamMultiplexer::parampack_resolve(p, f_params); } - } // namespace internal template using omp_multi_reduce_tuning = @@ -385,6 +387,11 @@ using omp_multi_reduce = omp_multi_reduce_unordered; /// using policy::omp::omp_atomic; +/// +/// Type alias for thread +/// +using policy::omp::omp_thread; + /// /// Type aliases to simplify common omp parallel for loop execution /// diff --git a/include/RAJA/policy/openmp/thread.hpp b/include/RAJA/policy/openmp/thread.hpp new file mode 100644 index 0000000000..faf58d38ba --- /dev/null +++ b/include/RAJA/policy/openmp/thread.hpp @@ -0,0 +1,47 @@ +/*! + ****************************************************************************** + * + * \file + * + * \brief RAJA header file defining OpenMP thread operations. + * + ****************************************************************************** + */ + +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-25, Lawrence Livermore National Security, LLC +// and RAJA project contributors. See the RAJA/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#ifndef RAJA_policy_openmp_thread_HPP +#define RAJA_policy_openmp_thread_HPP + +#include "RAJA/config.hpp" + +#if defined(RAJA_OPENMP_ACTIVE) + +#include "RAJA/util/macros.hpp" + +#include "RAJA/policy/thread_auto.hpp" + +namespace RAJA +{ + +template<> +RAJA_HOST_DEVICE RAJA_INLINE int get_max_threads(omp_thread) +{ + return omp_get_max_threads(); +} + +template<> +RAJA_HOST_DEVICE RAJA_INLINE int get_thread_num(omp_thread) +{ + return omp_get_thread_num(); +} + +} // namespace RAJA + +#endif // RAJA_ENABLE_OPENMP +#endif // guard diff --git a/include/RAJA/policy/sequential.hpp b/include/RAJA/policy/sequential.hpp index 4497db42c8..e9ac98afea 100644 --- a/include/RAJA/policy/sequential.hpp +++ b/include/RAJA/policy/sequential.hpp @@ -31,6 +31,7 @@ #include "RAJA/policy/sequential/multi_reduce.hpp" #include "RAJA/policy/sequential/scan.hpp" #include "RAJA/policy/sequential/sort.hpp" +#include "RAJA/policy/sequential/thread.hpp" #include "RAJA/policy/sequential/launch.hpp" #include "RAJA/policy/sequential/WorkGroup.hpp" #include "RAJA/policy/sequential/params/reduce.hpp" diff --git a/include/RAJA/policy/sequential/forall.hpp b/include/RAJA/policy/sequential/forall.hpp index 57bccc9f21..1fd9b08d1d 100644 --- a/include/RAJA/policy/sequential/forall.hpp +++ b/include/RAJA/policy/sequential/forall.hpp @@ -58,45 +58,33 @@ namespace sequential template RAJA_INLINE concepts::enable_if_t< resources::EventProxy, - expt::type_traits::is_ForallParamPack, - concepts::negate>> + expt::type_traits::is_ForallParamPack> forall_impl(resources::Host host_res, const seq_exec& pol, Iterable&& iter, Func&& body, ForallParam f_params) { + constexpr bool has_reducers = + !expt::type_traits::is_ForallParamPack_empty::value; expt::ParamMultiplexer::parampack_init(pol, f_params); RAJA_EXTRACT_BED_IT(iter); for (decltype(distance_it) i = 0; i < distance_it; ++i) { - expt::invoke_body(f_params, body, *(begin_it + i)); + if constexpr (has_reducers) + { + expt::invoke_body(f_params, body, *(begin_it + i)); + } + else + { + body(*(begin_it + i)); + } } - expt::ParamMultiplexer::parampack_resolve(pol, f_params); - return resources::EventProxy(host_res); -} - -template -RAJA_INLINE concepts::enable_if_t< - resources::EventProxy, - expt::type_traits::is_ForallParamPack, - expt::type_traits::is_ForallParamPack_empty> -forall_impl(resources::Host host_res, - const seq_exec&, - Iterable&& iter, - Func&& body, - ForallParam) -{ - RAJA_EXTRACT_BED_IT(iter); - - for (decltype(distance_it) i = 0; i < distance_it; ++i) - { - body(*(begin_it + i)); - } + expt::ParamMultiplexer::parampack_resolve(pol, f_params); return resources::EventProxy(host_res); } diff --git a/include/RAJA/policy/sequential/policy.hpp b/include/RAJA/policy/sequential/policy.hpp index ed735ba10d..9ead6375ae 100644 --- a/include/RAJA/policy/sequential/policy.hpp +++ b/include/RAJA/policy/sequential/policy.hpp @@ -124,6 +124,9 @@ struct seq_multi_reduce_policy : make_policy_pattern_launch_platform_t< struct seq_atomic {}; +struct seq_thread +{}; + template using seq_multi_reduce_tuning = seq_multi_reduce_policy>; @@ -147,6 +150,7 @@ using policy::sequential::seq_multi_reduce; using policy::sequential::seq_reduce; using policy::sequential::seq_region; using policy::sequential::seq_segit; +using policy::sequential::seq_thread; using policy::sequential::seq_work; diff --git a/include/RAJA/policy/sequential/thread.hpp b/include/RAJA/policy/sequential/thread.hpp new file mode 100644 index 0000000000..a181b2bbaf --- /dev/null +++ b/include/RAJA/policy/sequential/thread.hpp @@ -0,0 +1,44 @@ +/*! + ****************************************************************************** + * + * \file + * + * \brief RAJA header file defining sequential thread operations. + * + ****************************************************************************** + */ + +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-25, Lawrence Livermore National Security, LLC +// and RAJA project contributors. See the RAJA/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#ifndef RAJA_policy_sequential_thread_HPP +#define RAJA_policy_sequential_thread_HPP + +#include "RAJA/config.hpp" + +#include "RAJA/util/macros.hpp" + +#include "RAJA/policy/thread_auto.hpp" + +namespace RAJA +{ +template<> +RAJA_HOST_DEVICE RAJA_INLINE int get_max_threads(seq_thread) +{ + return 1; +} + +template<> +RAJA_HOST_DEVICE RAJA_INLINE int get_thread_num(seq_thread) +{ + return 0; +} + +} // namespace RAJA + + +#endif // guard diff --git a/include/RAJA/policy/sycl/MemUtils_SYCL.hpp b/include/RAJA/policy/sycl/MemUtils_SYCL.hpp index 29bebfac9e..0062eb322d 100644 --- a/include/RAJA/policy/sycl/MemUtils_SYCL.hpp +++ b/include/RAJA/policy/sycl/MemUtils_SYCL.hpp @@ -28,11 +28,12 @@ #include #include #include +#include #include #include + #include "RAJA/util/basic_mempool.hpp" -#include "RAJA/util/mutex.hpp" #include "RAJA/util/types.hpp" #include "RAJA/policy/sycl/policy.hpp" @@ -53,15 +54,11 @@ struct syclInfo sycl_dim_t blockDim {0}; ::sycl::queue qu = ::sycl::queue(); bool setup_reducers = false; -#if defined(RAJA_ENABLE_OPENMP) - syclInfo* thread_states = nullptr; - omp::mutex lock; -#endif }; extern syclInfo g_status; -extern syclInfo tl_status; +thread_local extern syclInfo tl_status; extern std::unordered_map<::sycl::queue, bool> g_queue_info_map; diff --git a/include/RAJA/policy/thread_auto.hpp b/include/RAJA/policy/thread_auto.hpp new file mode 100644 index 0000000000..f5b69b7897 --- /dev/null +++ b/include/RAJA/policy/thread_auto.hpp @@ -0,0 +1,60 @@ +/*! + ****************************************************************************** + * + * \file + * + * \brief RAJA header file defining automatic thread operations. + * + ****************************************************************************** + */ + +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-25, Lawrence Livermore National Security, LLC +// and RAJA project contributors. See the RAJA/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#ifndef RAJA_policy_thread_auto_HPP +#define RAJA_policy_thread_auto_HPP + +#include "RAJA/config.hpp" + +#include "RAJA/util/macros.hpp" + +#ifdef RAJA_OPENMP_ACTIVE +#include "RAJA/policy/openmp/policy.hpp" +#endif + +#include "RAJA/policy/sequential/policy.hpp" + +namespace RAJA +{ + +namespace detail +{ +/*! + * Provides priority between thread policies that should do the "right thing" + * + * If OpenMP is active we always use the omp_thread. + * + * Fallback to seq_thread, which performs non-thread operations + * assumes there is no thread safety issues + */ +#if defined(RAJA_OPENMP_ACTIVE) +using active_auto_thread = RAJA::omp_thread; +#else +using active_auto_thread = RAJA::seq_thread; +#endif + +} // namespace detail + +template +RAJA_HOST_DEVICE RAJA_INLINE int get_max_threads(AtomicPolicy); + +template +RAJA_HOST_DEVICE RAJA_INLINE int get_thread_num(AtomicPolicy); + +} // namespace RAJA + +#endif diff --git a/include/RAJA/util/Layout.hpp b/include/RAJA/util/Layout.hpp index 9bcc1ce132..6aa6d63336 100644 --- a/include/RAJA/util/Layout.hpp +++ b/include/RAJA/util/Layout.hpp @@ -77,9 +77,9 @@ struct LayoutBase_impl, IdxLin, StrideOneDim> using IndexLinear = IdxLin; using IndexRange = camp::make_idx_seq_t; - static constexpr size_t n_dims = sizeof...(RangeInts); - static constexpr IdxLin limit = RAJA::operators::limits::max(); - static constexpr ptrdiff_t stride_one_dim = StrideOneDim; + static inline constexpr size_t n_dims = sizeof...(RangeInts); + static inline constexpr IdxLin limit = RAJA::operators::limits::max(); + static inline constexpr ptrdiff_t stride_one_dim = StrideOneDim; IdxLin sizes[n_dims] = {0}; IdxLin strides[n_dims] = {0}; @@ -90,11 +90,12 @@ struct LayoutBase_impl, IdxLin, StrideOneDim> /*! * Default constructor with zero sizes and strides. */ - constexpr RAJA_INLINE LayoutBase_impl() = default; - constexpr RAJA_INLINE LayoutBase_impl(LayoutBase_impl const&) = default; - constexpr RAJA_INLINE LayoutBase_impl(LayoutBase_impl&&) = default; - RAJA_INLINE LayoutBase_impl& operator=(LayoutBase_impl const&) = default; - RAJA_INLINE LayoutBase_impl& operator=(LayoutBase_impl&&) = default; + RAJA_INLINE constexpr LayoutBase_impl() = default; + RAJA_INLINE constexpr LayoutBase_impl(LayoutBase_impl const&) = default; + RAJA_INLINE constexpr LayoutBase_impl(LayoutBase_impl&&) = default; + RAJA_INLINE constexpr LayoutBase_impl& operator=(LayoutBase_impl const&) = + default; + RAJA_INLINE constexpr LayoutBase_impl& operator=(LayoutBase_impl&&) = default; /*! * Construct a layout given the size of each dimension. @@ -116,10 +117,10 @@ struct LayoutBase_impl, IdxLin, StrideOneDim> * Templated copy ctor from simillar layout. */ template - constexpr RAJA_INLINE RAJA_HOST_DEVICE - LayoutBase_impl(const LayoutBase_impl, - CIdxLin, - CStrideOneDim>& rhs) + RAJA_INLINE RAJA_HOST_DEVICE constexpr LayoutBase_impl( + const LayoutBase_impl, + CIdxLin, + CStrideOneDim>& rhs) : sizes {static_cast(rhs.sizes[RangeInts])...}, strides {static_cast(rhs.strides[RangeInts])...}, inv_strides {static_cast(rhs.inv_strides[RangeInts])...}, @@ -267,12 +268,6 @@ struct LayoutBase_impl, IdxLin, StrideOneDim> } }; -template -constexpr size_t - LayoutBase_impl, IdxLin, StrideOneDim>::n_dims; -template -constexpr IdxLin - LayoutBase_impl, IdxLin, StrideOneDim>::limit; } // namespace detail /*! @@ -400,8 +395,8 @@ struct TypedLayout, StrideOne> * */ template -RAJA_INLINE Layout make_stride_one( - Layout const& l) +RAJA_INLINE RAJA_HOST_DEVICE constexpr Layout +make_stride_one(Layout const& l) { return Layout(l); } @@ -411,8 +406,8 @@ RAJA_INLINE Layout make_stride_one( * */ template -RAJA_INLINE TypedLayout make_stride_one( - TypedLayout const& l) +RAJA_INLINE RAJA_HOST_DEVICE constexpr TypedLayout +make_stride_one(TypedLayout const& l) { // strip l to it's base-class type using Base = typename TypedLayout::Base; diff --git a/include/RAJA/util/OffsetLayout.hpp b/include/RAJA/util/OffsetLayout.hpp index a5f27b12bf..4237614a15 100644 --- a/include/RAJA/util/OffsetLayout.hpp +++ b/include/RAJA/util/OffsetLayout.hpp @@ -49,14 +49,14 @@ struct OffsetLayout_impl, IdxLin> using Base = RAJA::detail::LayoutBase_impl; Base base_; - static constexpr camp::idx_t stride_one_dim = Base::stride_one_dim; + static inline constexpr camp::idx_t stride_one_dim = Base::stride_one_dim; - static constexpr size_t n_dims = sizeof...(RangeInts); - IdxLin offsets[n_dims] = {0}; // If not specified set to zero + static inline constexpr size_t n_dims = sizeof...(RangeInts); + IdxLin offsets[n_dims] = {0}; // If not specified set to zero - constexpr RAJA_INLINE OffsetLayout_impl( - std::array begin, - std::array end) + constexpr RAJA_INLINE RAJA_HOST_DEVICE + OffsetLayout_impl(std::array begin, + std::array end) : base_ {(end[RangeInts] - begin[RangeInts])...}, offsets {begin[RangeInts]...} {} @@ -66,7 +66,8 @@ struct OffsetLayout_impl, IdxLin> offsets {c.offsets[RangeInts]...} {} - void shift(std::array shift) + constexpr RAJA_INLINE RAJA_HOST_DEVICE void shift( + std::array shift) { for (size_t i = 0; i < n_dims; ++i) offsets[i] += shift[i]; @@ -116,10 +117,11 @@ struct OffsetLayout_impl, IdxLin> camp::sink((indices = (offsets[RangeInts] + indices))...); } - static RAJA_INLINE OffsetLayout_impl - from_layout_and_offsets( - const std::array& offsets_in, - const Layout& rhs) + static constexpr RAJA_INLINE RAJA_HOST_DEVICE + OffsetLayout_impl + from_layout_and_offsets( + const std::array& offsets_in, + const Layout& rhs) { OffsetLayout_impl ret {rhs}; camp::sink((ret.offsets[RangeInts] = offsets_in[RangeInts])...); @@ -202,8 +204,8 @@ struct TypedOffsetLayout> using OffsetLayout::OffsetLayout; #endif - RAJA_INLINE RAJA_HOST_DEVICE constexpr IdxLin operator()( - DimTypes... indices) const + RAJA_INLINE RAJA_HOST_DEVICE RAJA_BOUNDS_CHECK_constexpr IdxLin + operator()(DimTypes... indices) const { return IdxLin(Base::operator()(stripIndexType(indices)...)); } @@ -230,17 +232,18 @@ struct TypedOffsetLayout> }; template -auto make_offset_layout(const std::array& begin, - const std::array& end) - -> OffsetLayout +RAJA_INLINE RAJA_HOST_DEVICE constexpr auto make_offset_layout( + const std::array& begin, + const std::array& end) -> OffsetLayout { return OffsetLayout {begin, end}; } template -auto make_permuted_offset_layout(const std::array& begin, - const std::array& end, - const std::array& permutation) +RAJA_INLINE RAJA_HOST_DEVICE constexpr auto make_permuted_offset_layout( + const std::array& begin, + const std::array& end, + const std::array& permutation) -> decltype(make_offset_layout(begin, end)) { std::array sizes; diff --git a/include/RAJA/util/PermutedLayout.hpp b/include/RAJA/util/PermutedLayout.hpp index adbd13ff4c..c542e59359 100644 --- a/include/RAJA/util/PermutedLayout.hpp +++ b/include/RAJA/util/PermutedLayout.hpp @@ -61,9 +61,9 @@ namespace RAJA * */ template -auto make_permuted_layout(std::array sizes, - std::array permutation) - -> Layout +constexpr RAJA_INLINE RAJA_HOST_DEVICE auto make_permuted_layout( + std::array sizes, + std::array permutation) -> Layout { std::array strides; std::array folded_strides; diff --git a/include/RAJA/util/SoAArray.hpp b/include/RAJA/util/SoAArray.hpp index 28c23d1ef6..f507e131c2 100644 --- a/include/RAJA/util/SoAArray.hpp +++ b/include/RAJA/util/SoAArray.hpp @@ -41,9 +41,12 @@ class SoAArray using value_type = T; public: - RAJA_HOST_DEVICE value_type get(size_t i) const { return mem[i]; } + constexpr RAJA_HOST_DEVICE value_type get(size_t i) const { return mem[i]; } - RAJA_HOST_DEVICE void set(size_t i, value_type val) { mem[i] = val; } + constexpr RAJA_HOST_DEVICE void set(size_t i, value_type val) + { + mem[i] = val; + } private: value_type mem[size]; @@ -60,12 +63,12 @@ class SoAArray<::RAJA::reduce::detail::ValueLoc, size> using second_type = IndexType; public: - RAJA_HOST_DEVICE value_type get(size_t i) const + constexpr RAJA_HOST_DEVICE value_type get(size_t i) const { return value_type(mem[i], mem_idx[i]); } - RAJA_HOST_DEVICE void set(size_t i, value_type val) + constexpr RAJA_HOST_DEVICE void set(size_t i, value_type val) { mem[i] = val; mem_idx[i] = val.getLoc(); diff --git a/include/RAJA/util/SoAPtr.hpp b/include/RAJA/util/SoAPtr.hpp index caeb99d4a6..fb613013a5 100644 --- a/include/RAJA/util/SoAPtr.hpp +++ b/include/RAJA/util/SoAPtr.hpp @@ -90,12 +90,12 @@ class SoAPtr RAJA_HOST_DEVICE bool allocated() const { return mem != nullptr; } - RAJA_HOST_DEVICE value_type get(size_t i) const + constexpr RAJA_HOST_DEVICE value_type get(size_t i) const { return accessor::get(mem, i); } - RAJA_HOST_DEVICE void set(size_t i, value_type val) + constexpr RAJA_HOST_DEVICE void set(size_t i, value_type val) { accessor::set(mem, i, val); } @@ -166,12 +166,12 @@ class SoAPtr, RAJA_HOST_DEVICE bool allocated() const { return mem != nullptr; } - RAJA_HOST_DEVICE value_type get(size_t i) const + constexpr RAJA_HOST_DEVICE value_type get(size_t i) const { return value_type(accessor::get(mem, i), accessor::get(mem_idx, i)); } - RAJA_HOST_DEVICE void set(size_t i, value_type val) + constexpr RAJA_HOST_DEVICE void set(size_t i, value_type val) { accessor::set(mem, i, first_type(val)); accessor::set(mem_idx, i, val.getLoc()); @@ -238,12 +238,12 @@ class SoAPtr, mempool, accessor> RAJA_HOST_DEVICE bool allocated() const { return mem != nullptr; } - RAJA_HOST_DEVICE value_type get(size_t i) const + constexpr RAJA_HOST_DEVICE value_type get(size_t i) const { return value_type(accessor::get(mem, i), accessor::get(mem_idx, i)); } - RAJA_HOST_DEVICE void set(size_t i, value_type val) + constexpr RAJA_HOST_DEVICE void set(size_t i, value_type val) { accessor::set(mem, i, val.getVal()); accessor::set(mem_idx, i, val.getLoc()); diff --git a/include/RAJA/util/Span.hpp b/include/RAJA/util/Span.hpp index 741327a4c0..69ce145051 100644 --- a/include/RAJA/util/Span.hpp +++ b/include/RAJA/util/Span.hpp @@ -72,94 +72,122 @@ struct Span static_assert(type_traits::is_random_access_iterator::value, "IterType must model RandomAccessIterator"); - RAJA_HOST_DEVICE Span(iterator begin, iterator end) + constexpr RAJA_HOST_DEVICE Span(iterator begin, iterator end) : m_begin {begin}, m_end {end} {} - RAJA_HOST_DEVICE Span(iterator begin, size_type size) + constexpr RAJA_HOST_DEVICE Span(iterator begin, size_type size) : m_begin {begin}, m_end {begin + size} {} - RAJA_HOST_DEVICE RAJA_INLINE iterator begin() { return m_begin; } + constexpr RAJA_HOST_DEVICE RAJA_INLINE iterator begin() { return m_begin; } - RAJA_HOST_DEVICE RAJA_INLINE iterator end() { return m_end; } + constexpr RAJA_HOST_DEVICE RAJA_INLINE iterator end() { return m_end; } - RAJA_HOST_DEVICE RAJA_INLINE const_iterator begin() const { return m_begin; } + constexpr RAJA_HOST_DEVICE RAJA_INLINE const_iterator begin() const + { + return m_begin; + } - RAJA_HOST_DEVICE RAJA_INLINE const_iterator end() const { return m_end; } + constexpr RAJA_HOST_DEVICE RAJA_INLINE const_iterator end() const + { + return m_end; + } - RAJA_HOST_DEVICE RAJA_INLINE const_iterator cbegin() const { return m_begin; } + constexpr RAJA_HOST_DEVICE RAJA_INLINE const_iterator cbegin() const + { + return m_begin; + } - RAJA_HOST_DEVICE RAJA_INLINE const_iterator cend() const { return m_end; } + constexpr RAJA_HOST_DEVICE RAJA_INLINE const_iterator cend() const + { + return m_end; + } - RAJA_HOST_DEVICE RAJA_INLINE friend iterator begin(Span& s) + constexpr RAJA_HOST_DEVICE RAJA_INLINE friend iterator begin(Span& s) { return s.begin(); } - RAJA_HOST_DEVICE RAJA_INLINE friend iterator end(Span& s) { return s.end(); } + constexpr RAJA_HOST_DEVICE RAJA_INLINE friend iterator end(Span& s) + { + return s.end(); + } - RAJA_HOST_DEVICE RAJA_INLINE friend const_iterator begin(const Span& s) + constexpr RAJA_HOST_DEVICE RAJA_INLINE friend const_iterator begin( + const Span& s) { return s.begin(); } - RAJA_HOST_DEVICE RAJA_INLINE friend const_iterator end(const Span& s) + constexpr RAJA_HOST_DEVICE RAJA_INLINE friend const_iterator end( + const Span& s) { return s.end(); } - RAJA_HOST_DEVICE RAJA_INLINE friend const_iterator cbegin(const Span& s) + constexpr RAJA_HOST_DEVICE RAJA_INLINE friend const_iterator cbegin( + const Span& s) { return s.cbegin(); } - RAJA_HOST_DEVICE RAJA_INLINE friend const_iterator cend(const Span& s) + constexpr RAJA_HOST_DEVICE RAJA_INLINE friend const_iterator cend( + const Span& s) { return s.cend(); } - RAJA_HOST_DEVICE RAJA_INLINE reference front() const { return *begin(); } + constexpr RAJA_HOST_DEVICE RAJA_INLINE reference front() const + { + return *begin(); + } - RAJA_HOST_DEVICE RAJA_INLINE reference back() const { return *(end() - 1); } + constexpr RAJA_HOST_DEVICE RAJA_INLINE reference back() const + { + return *(end() - 1); + } - RAJA_HOST_DEVICE RAJA_INLINE reference operator[](size_type i) const + constexpr RAJA_HOST_DEVICE RAJA_INLINE reference operator[](size_type i) const { return data()[i]; } - RAJA_HOST_DEVICE RAJA_INLINE iterator data() const { return m_begin; } + constexpr RAJA_HOST_DEVICE RAJA_INLINE iterator data() const + { + return m_begin; + } - RAJA_HOST_DEVICE RAJA_INLINE size_type size() const + constexpr RAJA_HOST_DEVICE RAJA_INLINE size_type size() const { return static_cast(m_end - m_begin); } - RAJA_HOST_DEVICE RAJA_INLINE bool empty() const + constexpr RAJA_HOST_DEVICE RAJA_INLINE bool empty() const { return size() == static_cast(0); } - RAJA_HOST_DEVICE RAJA_INLINE Span first(size_type count) const + constexpr RAJA_HOST_DEVICE RAJA_INLINE Span first(size_type count) const { return slice(0, count); } - RAJA_HOST_DEVICE RAJA_INLINE Span last(size_type count) const + constexpr RAJA_HOST_DEVICE RAJA_INLINE Span last(size_type count) const { return slice(size() - count, count); } - RAJA_HOST_DEVICE RAJA_INLINE Span subspan(size_type begin, - size_type length) const + constexpr RAJA_HOST_DEVICE RAJA_INLINE Span subspan(size_type begin, + size_type length) const { return slice(begin, length); } - RAJA_HOST_DEVICE RAJA_INLINE Span slice(size_type begin, - size_type length) const + constexpr RAJA_HOST_DEVICE RAJA_INLINE Span slice(size_type begin, + size_type length) const { auto start = m_begin + begin; auto end = start + length > m_end ? m_end : start + length; @@ -192,14 +220,15 @@ struct Span * */ template -RAJA_HOST_DEVICE RAJA_INLINE Span make_span(IterType begin, - IndexType size) +constexpr RAJA_HOST_DEVICE RAJA_INLINE Span make_span( + IterType begin, + IndexType size) { return Span(begin, size); } template -RAJA_INLINE auto make_span(Iter& iterable) +constexpr RAJA_HOST_DEVICE RAJA_INLINE auto make_span(Iter& iterable) { using std::begin; using std::distance; diff --git a/include/RAJA/util/TypedViewBase.hpp b/include/RAJA/util/TypedViewBase.hpp index 359783845f..8cbb05622f 100644 --- a/include/RAJA/util/TypedViewBase.hpp +++ b/include/RAJA/util/TypedViewBase.hpp @@ -208,12 +208,10 @@ struct ViewReturnHelper, { using return_type = ElementType&; - RAJA_INLINE - - RAJA_HOST_DEVICE - static constexpr return_type make_return(LayoutType const& layout, - PointerType const& data, - Args const&... args) + RAJA_INLINE RAJA_HOST_DEVICE static constexpr return_type make_return( + LayoutType const& layout, + PointerType const& data, + Args const&... args) { return data[stripIndexType(layout(args...))]; } @@ -268,12 +266,10 @@ struct ViewReturnHelper, using return_type = internal::expt::ET::TensorLoadStore; - RAJA_INLINE - - RAJA_HOST_DEVICE - static constexpr return_type make_return(LayoutType const& layout, - PointerType const& data, - Args const&... args) + RAJA_INLINE RAJA_HOST_DEVICE static constexpr return_type make_return( + LayoutType const& layout, + PointerType const& data, + Args const&... args) { return return_type(ref_type { @@ -385,11 +381,7 @@ struct ViewReturnHelper< using return_type = internal::expt::ET::TensorLoadStore; - - RAJA_INLINE - - RAJA_HOST_DEVICE - static constexpr return_type make_return( + RAJA_INLINE RAJA_HOST_DEVICE static constexpr return_type make_return( LayoutType const& layout, PointerType const& data, RAJA::expt::StaticTensorIndex const&... args) @@ -610,26 +602,22 @@ class ViewBase RAJA_INLINE constexpr ViewBase() {}; - RAJA_HOST_DEVICE - RAJA_INLINE ViewBase(ViewBase const& c) + RAJA_HOST_DEVICE RAJA_INLINE constexpr ViewBase(ViewBase const& c) : m_layout(c.m_layout), m_data(c.m_data) {} - RAJA_HOST_DEVICE - - RAJA_INLINE - ViewBase& operator=(ViewBase const& c) + RAJA_HOST_DEVICE RAJA_INLINE constexpr ViewBase& operator=(ViewBase const& c) { m_layout = c.m_layout; m_data = c.m_data; } #else - constexpr ViewBase() = default; - RAJA_INLINE constexpr ViewBase(ViewBase const&) = default; - RAJA_INLINE constexpr ViewBase(ViewBase&&) = default; - RAJA_INLINE ViewBase& operator=(ViewBase const&) = default; - RAJA_INLINE ViewBase& operator=(ViewBase&&) = default; + constexpr ViewBase() = default; + RAJA_INLINE constexpr ViewBase(ViewBase const&) = default; + RAJA_INLINE constexpr ViewBase(ViewBase&&) = default; + RAJA_INLINE constexpr ViewBase& operator=(ViewBase const&) = default; + RAJA_INLINE constexpr ViewBase& operator=(ViewBase&&) = default; #endif @@ -655,23 +643,25 @@ class ViewBase m_layout(rhs.get_layout()) {} - RAJA_HOST_DEVICE - RAJA_INLINE void set_data(PointerType data_ptr) { m_data = data_ptr; } - - RAJA_HOST_DEVICE - - RAJA_INLINE - constexpr pointer_type const& get_data() const { return m_data; } - - RAJA_HOST_DEVICE + RAJA_HOST_DEVICE RAJA_INLINE constexpr void set_data(PointerType data_ptr) + { + m_data = data_ptr; + } - RAJA_INLINE - constexpr layout_type const& get_layout() const { return m_layout; } + RAJA_HOST_DEVICE RAJA_INLINE constexpr pointer_type const& get_data() const + { + return m_data; + } - RAJA_HOST_DEVICE + RAJA_HOST_DEVICE RAJA_INLINE constexpr layout_type const& get_layout() const + { + return m_layout; + } - RAJA_INLINE - constexpr linear_index_type size() const { return m_layout.size(); } + RAJA_HOST_DEVICE RAJA_INLINE constexpr linear_index_type size() const + { + return m_layout.size(); + } template RAJA_HOST_DEVICE RAJA_INLINE constexpr linear_index_type get_dim_size() const @@ -709,9 +699,11 @@ class ViewBase m_layout, m_data, args...); } + // why doesn't this return a shifted copy instead of shifting this view? template - RAJA_INLINE ShiftedView shift(const std::array& shift) + RAJA_HOST_DEVICE RAJA_INLINE constexpr ShiftedView shift( + const std::array& shift) { static_assert(n_dims == layout_type::n_dims, "Dimension mismatch in ViewBase shift"); @@ -802,9 +794,11 @@ class TypedViewBase(args)...); } + // why doesn't this return a shifted copy instead of shifting this view? template - RAJA_INLINE ShiftedView shift(const std::array& shift) + RAJA_HOST_DEVICE RAJA_INLINE constexpr ShiftedView shift( + const std::array& shift) { static_assert(n_dims == layout_type::n_dims, "Dimension mismatch in TypedViewBase shift"); diff --git a/include/RAJA/util/View.hpp b/include/RAJA/util/View.hpp index d00fa0cf65..0283b514cf 100644 --- a/include/RAJA/util/View.hpp +++ b/include/RAJA/util/View.hpp @@ -57,7 +57,8 @@ using TypedView = internal:: TypedViewBase>; template -RAJA_INLINE View> make_view(ValueType* ptr) +RAJA_HOST_DEVICE RAJA_INLINE constexpr View> +make_view(ValueType* ptr) { return View>(ptr, 1); } @@ -66,7 +67,9 @@ template -RAJA_INLINE View> +RAJA_HOST_DEVICE RAJA_INLINE constexpr View< + ValueType, + IndexLayout> make_index_view(ValueType* ptr, IndexLayout index_layout) { @@ -77,9 +80,9 @@ make_index_view(ValueType* ptr, // select certain indices from a tuple, given a curated index sequence // returns linear index of layout(ar...) template -RAJA_HOST_DEVICE RAJA_INLINE auto selecttuple(Lay lyout, - Tup&& tup, - camp::idx_seq) +RAJA_HOST_DEVICE RAJA_INLINE constexpr auto selecttuple(Lay lyout, + Tup&& tup, + camp::idx_seq) -> decltype(lyout(camp::get(std::forward(tup))...)) { return lyout(camp::get(std::forward(tup))...); @@ -114,7 +117,7 @@ using offset_seq_t = typename offset_seq::type; // remove the Nth index in a parameter pack // returns linear index of layout(ar...) template -RAJA_HOST_DEVICE RAJA_INLINE auto removenth(Lay lyout, Tup&& tup) +RAJA_HOST_DEVICE RAJA_INLINE constexpr auto removenth(Lay lyout, Tup&& tup) -> decltype(selecttuple( lyout, std::forward(tup), @@ -165,12 +168,14 @@ struct MultiView MultiView() = default; template - RAJA_INLINE constexpr MultiView(pointer_type data_ptr, Args... dim_sizes) + RAJA_HOST_DEVICE RAJA_INLINE constexpr MultiView(pointer_type data_ptr, + Args... dim_sizes) : layout(dim_sizes...), data(nc_pointer_type(data_ptr)) {} - RAJA_INLINE constexpr MultiView(pointer_type data_ptr, layout_type const& ly) + RAJA_HOST_DEVICE RAJA_INLINE constexpr MultiView(pointer_type data_ptr, + layout_type const& ly) : layout(ly), data(nc_pointer_type(data_ptr)) {} @@ -179,46 +184,46 @@ struct MultiView // arrays. For now, this becomes an ambiguous call to constructor error. // template::value> - // RAJA_INLINE constexpr MultiView( + // RAJA_HOST_DEVICE RAJA_INLINE constexpr MultiView( // std::enable_if_t data_ptr, // Args... dim_sizes) // : layout(dim_sizes...), // data(nc_pointer_type(data_ptr)) //{} // template::value> - // RAJA_INLINE constexpr MultiView( + // RAJA_HOST_DEVICE RAJA_INLINE constexpr MultiView( // std::enable_if_t data_ptr, // layout_type const& ly) // : layout(ly), // data(nc_pointer_type(data_ptr)) //{} - RAJA_INLINE constexpr MultiView(MultiView const&) = default; - RAJA_INLINE constexpr MultiView(MultiView&&) = default; - RAJA_INLINE MultiView& operator=(MultiView const&) = default; - RAJA_INLINE MultiView& operator=(MultiView&&) = default; + RAJA_INLINE constexpr MultiView(MultiView const&) = default; + RAJA_INLINE constexpr MultiView(MultiView&&) = default; + RAJA_INLINE constexpr MultiView& operator=(MultiView const&) = default; + RAJA_INLINE constexpr MultiView& operator=(MultiView&&) = default; template::value> - RAJA_INLINE constexpr MultiView( + RAJA_HOST_DEVICE RAJA_INLINE constexpr MultiView( std::enable_if_t const& rhs) : layout(rhs.layout), data(nc_pointer_type(rhs.data)) {} - RAJA_HOST_DEVICE RAJA_INLINE void set_layout(layout_type const& ly) + RAJA_HOST_DEVICE RAJA_INLINE constexpr void set_layout(layout_type const& ly) { layout = ly; } template::value> - RAJA_HOST_DEVICE RAJA_INLINE void set_data( + RAJA_HOST_DEVICE RAJA_INLINE constexpr void set_data( std::enable_if_t data_ptr) { data = nc_pointer_type( data_ptr); // This data_ptr should already be non-const. } - RAJA_HOST_DEVICE RAJA_INLINE void set_data(pointer_type data_ptr) + RAJA_HOST_DEVICE RAJA_INLINE constexpr void set_data(pointer_type data_ptr) { data = nc_pointer_type(data_ptr); } @@ -233,8 +238,9 @@ struct MultiView return pointer_type(data); } + // why doesn't this return a shifted copy instead of shifting this view? template - RAJA_INLINE RAJA:: + RAJA_HOST_DEVICE RAJA_INLINE constexpr RAJA:: MultiView::type, P2Pidx> shift(const std::array& shift) { @@ -279,10 +285,15 @@ struct AtomicViewWrapper base_type base_; - RAJA_INLINE - constexpr explicit AtomicViewWrapper(ViewType view) : base_(view) {} + RAJA_HOST_DEVICE RAJA_INLINE constexpr explicit AtomicViewWrapper( + ViewType view) + : base_(view) + {} - RAJA_INLINE void set_data(pointer_type data_ptr) { base_.set_data(data_ptr); } + RAJA_HOST_DEVICE RAJA_INLINE constexpr void set_data(pointer_type data_ptr) + { + base_.set_data(data_ptr); + } template RAJA_HOST_DEVICE RAJA_INLINE atomic_type operator()(ARGS&&... args) const @@ -305,10 +316,15 @@ struct AtomicViewWrapper base_type base_; - RAJA_INLINE - constexpr explicit AtomicViewWrapper(ViewType const& view) : base_ {view} {} + RAJA_HOST_DEVICE RAJA_INLINE constexpr explicit AtomicViewWrapper( + ViewType const& view) + : base_ {view} + {} - RAJA_INLINE void set_data(pointer_type data_ptr) { base_.set_data(data_ptr); } + RAJA_HOST_DEVICE RAJA_INLINE constexpr void set_data(pointer_type data_ptr) + { + base_.set_data(data_ptr); + } template RAJA_HOST_DEVICE RAJA_INLINE value_type& operator()(ARGS&&... args) const @@ -318,8 +334,8 @@ struct AtomicViewWrapper }; template -RAJA_INLINE AtomicViewWrapper make_atomic_view( - ViewType const& view) +RAJA_HOST_DEVICE RAJA_INLINE constexpr AtomicViewWrapper +make_atomic_view(ViewType const& view) { return RAJA::AtomicViewWrapper(view); @@ -338,13 +354,15 @@ template struct PermutedViewHelper; template -constexpr auto get_last_index(T last) +RAJA_HOST_DEVICE RAJA_INLINE constexpr auto get_last_index(T last) { return last; } template -constexpr auto get_last_index(T0, T1 t1, Args... args) +RAJA_HOST_DEVICE RAJA_INLINE constexpr auto get_last_index(T0, + T1 t1, + Args... args) { return get_last_index(t1, args...); } @@ -353,7 +371,8 @@ template struct PermutedViewHelper> { template - static auto get(T* ptr, Extents&&... extents) + static RAJA_HOST_DEVICE RAJA_INLINE constexpr auto get(T* ptr, + Extents&&... extents) { constexpr int N = sizeof...(Extents); @@ -372,7 +391,8 @@ template<> struct PermutedViewHelper { template - static auto get(T* ptr, Extents&&... extents) + static RAJA_HOST_DEVICE RAJA_INLINE constexpr auto get(T* ptr, + Extents&&... extents) { constexpr int N = sizeof...(Extents); using view_t = RAJA::View>; @@ -382,7 +402,8 @@ struct PermutedViewHelper }; template -constexpr auto make_reverse_array(std::index_sequence) +RAJA_HOST_DEVICE RAJA_INLINE constexpr auto make_reverse_array( + std::index_sequence) { return std::array {sizeof...(idx) - 1U - idx...}; } @@ -391,7 +412,8 @@ template<> struct PermutedViewHelper { template - static auto get(T* ptr, Extents&&... extents) + static RAJA_HOST_DEVICE RAJA_INLINE constexpr auto get(T* ptr, + Extents&&... extents) { constexpr int N = sizeof...(Extents); @@ -410,7 +432,9 @@ template -auto make_permuted_view(T* ptr, Extents&&... extents) +RAJA_HOST_DEVICE RAJA_INLINE constexpr auto make_permuted_view( + T* ptr, + Extents&&... extents) { return detail::PermutedViewHelper::template get( ptr, std::forward(extents)...); diff --git a/include/RAJA/util/basic_mempool.hpp b/include/RAJA/util/basic_mempool.hpp index 898672f6d7..d3c2011957 100644 --- a/include/RAJA/util/basic_mempool.hpp +++ b/include/RAJA/util/basic_mempool.hpp @@ -24,9 +24,9 @@ #include #include #include +#include #include "RAJA/util/align.hpp" -#include "RAJA/util/mutex.hpp" namespace RAJA { @@ -329,9 +329,7 @@ class MemPool /// Free all backing allocations, even if they are currently in use void free_chunks() { -#if defined(RAJA_ENABLE_OPENMP) - lock_guard lock(m_mutex); -#endif + std::lock_guard lock(m_mutex); while (!m_arenas.empty()) { @@ -343,18 +341,14 @@ class MemPool size_t arena_size() { -#if defined(RAJA_ENABLE_OPENMP) - lock_guard lock(m_mutex); -#endif + std::lock_guard lock(m_mutex); return m_default_arena_size; } size_t arena_size(size_t new_size) { -#if defined(RAJA_ENABLE_OPENMP) - lock_guard lock(m_mutex); -#endif + std::lock_guard lock(m_mutex); size_t prev_size = m_default_arena_size; m_default_arena_size = new_size; @@ -364,9 +358,7 @@ class MemPool template T* malloc(size_t nTs, size_t alignment = alignof(T)) { -#if defined(RAJA_ENABLE_OPENMP) - lock_guard lock(m_mutex); -#endif + std::lock_guard lock(m_mutex); const size_t size = nTs * sizeof(T); void* ptr = nullptr; @@ -398,9 +390,7 @@ class MemPool void free(const void* cptr) { -#if defined(RAJA_ENABLE_OPENMP) - lock_guard lock(m_mutex); -#endif + std::lock_guard lock(m_mutex); void* ptr = const_cast(cptr); arena_container_type::iterator end = m_arenas.end(); @@ -422,9 +412,7 @@ class MemPool private: using arena_container_type = std::list; -#if defined(RAJA_ENABLE_OPENMP) - omp::mutex m_mutex; -#endif + std::mutex m_mutex; arena_container_type m_arenas; size_t m_default_arena_size; diff --git a/include/RAJA/util/for_each.hpp b/include/RAJA/util/for_each.hpp index 708d3115e8..cc950d5ebf 100644 --- a/include/RAJA/util/for_each.hpp +++ b/include/RAJA/util/for_each.hpp @@ -39,9 +39,9 @@ namespace detail // runtime loop applying func to each element in the range in order RAJA_SUPPRESS_HD_WARN template -RAJA_HOST_DEVICE RAJA_INLINE UnaryFunc for_each(Iter begin, - Iter end, - UnaryFunc func) +constexpr RAJA_HOST_DEVICE RAJA_INLINE UnaryFunc for_each(Iter begin, + Iter end, + UnaryFunc func) { for (; begin != end; ++begin) { @@ -54,8 +54,8 @@ RAJA_HOST_DEVICE RAJA_INLINE UnaryFunc for_each(Iter begin, // compile time expansion applying func to a each type in the list in order RAJA_SUPPRESS_HD_WARN template -RAJA_HOST_DEVICE RAJA_INLINE UnaryFunc for_each_type(camp::list const&, - UnaryFunc func) +constexpr RAJA_HOST_DEVICE RAJA_INLINE UnaryFunc +for_each_type(camp::list const&, UnaryFunc func) { // braced init lists are evaluated in order int seq_unused_array[] = {0, (func(Ts {}), 0)...}; @@ -67,9 +67,8 @@ RAJA_HOST_DEVICE RAJA_INLINE UnaryFunc for_each_type(camp::list const&, // compile time expansion applying func to a each type in the tuple in order RAJA_SUPPRESS_HD_WARN template -RAJA_HOST_DEVICE RAJA_INLINE UnaryFunc for_each_tuple(Tuple&& t, - UnaryFunc func, - camp::idx_seq) +constexpr RAJA_HOST_DEVICE RAJA_INLINE UnaryFunc +for_each_tuple(Tuple&& t, UnaryFunc func, camp::idx_seq) { using camp::get; // braced init lists are evaluated in order @@ -88,7 +87,7 @@ RAJA_HOST_DEVICE RAJA_INLINE UnaryFunc for_each_tuple(Tuple&& t, */ RAJA_SUPPRESS_HD_WARN template -RAJA_HOST_DEVICE RAJA_INLINE +constexpr RAJA_HOST_DEVICE RAJA_INLINE concepts::enable_if_t> for_each(Container&& c, UnaryFunc func) { @@ -104,8 +103,8 @@ RAJA_HOST_DEVICE RAJA_INLINE */ RAJA_SUPPRESS_HD_WARN template -RAJA_HOST_DEVICE RAJA_INLINE UnaryFunc for_each_type(camp::list const& c, - UnaryFunc func) +constexpr RAJA_HOST_DEVICE RAJA_INLINE UnaryFunc +for_each_type(camp::list const& c, UnaryFunc func) { return detail::for_each_type(c, std::move(func)); } @@ -116,7 +115,8 @@ RAJA_HOST_DEVICE RAJA_INLINE UnaryFunc for_each_type(camp::list const& c, */ RAJA_SUPPRESS_HD_WARN template -RAJA_HOST_DEVICE RAJA_INLINE UnaryFunc for_each_tuple(Tuple&& t, UnaryFunc func) +constexpr RAJA_HOST_DEVICE RAJA_INLINE UnaryFunc for_each_tuple(Tuple&& t, + UnaryFunc func) { return detail::for_each_tuple( std::forward(t), std::move(func), diff --git a/include/RAJA/util/mutex.hpp b/include/RAJA/util/mutex.hpp deleted file mode 100644 index 1786ab4cd7..0000000000 --- a/include/RAJA/util/mutex.hpp +++ /dev/null @@ -1,84 +0,0 @@ -/*! -****************************************************************************** -* -* \file -* -* \brief Header file providing functionality similar to std mutex header. -* -****************************************************************************** -*/ - -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2016-25, Lawrence Livermore National Security, LLC -// and RAJA project contributors. See the RAJA/LICENSE file for details. -// -// SPDX-License-Identifier: (BSD-3-Clause) -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - -#ifndef RAJA_util_mutex_HPP -#define RAJA_util_mutex_HPP - -#include "RAJA/config.hpp" - -#if defined(RAJA_ENABLE_OPENMP) -#include -#endif - -namespace RAJA -{ - -#if defined(RAJA_ENABLE_OPENMP) -namespace omp -{ - -//! class wrapping omp_lock_t with std::mutex interface -class mutex -{ -public: - using native_handle_type = omp_lock_t; - - mutex() { omp_init_lock(&m_lock); } - - mutex(const mutex&) = delete; - mutex(mutex&&) = delete; - mutex& operator=(const mutex&) = delete; - mutex& operator=(mutex&&) = delete; - - void lock() { omp_set_lock(&m_lock); } - - bool try_lock() { return omp_test_lock(&m_lock) != 0; } - - void unlock() { omp_unset_lock(&m_lock); } - - native_handle_type& native_handle() { return m_lock; } - - ~mutex() { omp_destroy_lock(&m_lock); } - -private: - native_handle_type m_lock; -}; - -} // namespace omp -#endif // closing endif for if defined(RAJA_ENABLE_OPENMP) - -//! class providing functionality of std::lock_guard -template -class lock_guard -{ -public: - explicit lock_guard(mutex_type& m) : m_mutex(m) { m_mutex.lock(); } - - lock_guard(const lock_guard&) = delete; - lock_guard(lock_guard&&) = delete; - lock_guard& operator=(const lock_guard&) = delete; - lock_guard& operator=(lock_guard&&) = delete; - - ~lock_guard() { m_mutex.unlock(); } - -private: - mutex_type& m_mutex; -}; - -} // namespace RAJA - -#endif // closing endif for header file include guard diff --git a/include/RAJA/util/sycl_compat.hpp b/include/RAJA/util/sycl_compat.hpp index 2e6e03d1e7..cc553f54fd 100644 --- a/include/RAJA/util/sycl_compat.hpp +++ b/include/RAJA/util/sycl_compat.hpp @@ -18,6 +18,7 @@ #ifndef RAJA_util_sycl_compat_HPP #define RAJA_util_sycl_compat_HPP +#if defined(RAJA_SYCL_ACTIVE) #if (__INTEL_CLANG_COMPILER && __INTEL_CLANG_COMPILER < 20230000) // older version, use legacy header locations #include @@ -25,5 +26,6 @@ // SYCL 2020 standard header #include #endif +#endif #endif // RAJA_util_sycl_compat_HPP diff --git a/include/RAJA/util/types.hpp b/include/RAJA/util/types.hpp index 6d99862d7a..6486565fd0 100644 --- a/include/RAJA/util/types.hpp +++ b/include/RAJA/util/types.hpp @@ -333,11 +333,11 @@ class ConstRestrictRealPtr /// Ctors and assignment op. /// - ConstRestrictRealPtr() : dptr(0) { ; } + constexpr ConstRestrictRealPtr() : dptr(0) { ; } - ConstRestrictRealPtr(const Real_type* d) : dptr(d) { ; } + constexpr ConstRestrictRealPtr(const Real_type* d) : dptr(d) { ; } - ConstRestrictRealPtr& operator=(const Real_type* d) + constexpr ConstRestrictRealPtr& operator=(const Real_type* d) { ConstRestrictRealPtr copy(d); std::swap(dptr, copy.dptr); @@ -351,18 +351,18 @@ class ConstRestrictRealPtr /// /// Implicit conversion operator to bare const pointer. /// - operator const Real_type*() { return dptr; } + constexpr operator const Real_type*() { return dptr; } /// /// "Explicit conversion operator" to bare const pointer, /// consistent with boost shared ptr. /// - const Real_type* get() const { return dptr; } + constexpr const Real_type* get() const { return dptr; } /// /// Bracket operator. /// - const Real_type& operator[](Index_type i) const + constexpr const Real_type& operator[](Index_type i) const { return ((const Real_type* RAJA_RESTRICT)dptr)[i]; } @@ -370,7 +370,7 @@ class ConstRestrictRealPtr /// /// + operator for pointer arithmetic. /// - const Real_type* operator+(Index_type i) const { return dptr + i; } + constexpr const Real_type* operator+(Index_type i) const { return dptr + i; } private: const Real_type* dptr; @@ -390,11 +390,11 @@ class RestrictRealPtr /// Ctors and assignment op. /// - RestrictRealPtr() : dptr(0) { ; } + constexpr RestrictRealPtr() : dptr(0) { ; } - RestrictRealPtr(Real_type* d) : dptr(d) { ; } + constexpr RestrictRealPtr(Real_type* d) : dptr(d) { ; } - RestrictRealPtr& operator=(Real_type* d) + constexpr RestrictRealPtr& operator=(Real_type* d) { RestrictRealPtr copy(d); std::swap(dptr, copy.dptr); @@ -408,35 +408,38 @@ class RestrictRealPtr /// /// Implicit conversion operator to (non-const) bare pointer. /// - operator Real_type*() { return dptr; } + constexpr operator Real_type*() { return dptr; } /// /// Implicit conversion operator to const bare pointer. /// - operator const Real_type*() const { return dptr; } + constexpr operator const Real_type*() const { return dptr; } /// /// "Explicit conversion operator" to (non-const) bare pointer, /// consistent with boost shared ptr. /// - Real_type* get() { return dptr; } + constexpr Real_type* get() { return dptr; } /// /// "Explicit conversion operator" to const bare pointer, /// consistent with boost shared ptr. /// - const Real_type* get() const { return dptr; } + constexpr const Real_type* get() const { return dptr; } /// /// Operator that enables implicit conversion from RestrictRealPtr to /// RestrictRealConstPtr. /// - operator ConstRestrictRealPtr() { return ConstRestrictRealPtr(dptr); } + constexpr operator ConstRestrictRealPtr() + { + return ConstRestrictRealPtr(dptr); + } /// /// Bracket operator. /// - Real_type& operator[](Index_type i) + constexpr Real_type& operator[](Index_type i) { return ((Real_type * RAJA_RESTRICT) dptr)[i]; } @@ -444,12 +447,12 @@ class RestrictRealPtr /// /// + operator for (non-const) pointer arithmetic. /// - Real_type* operator+(Index_type i) { return dptr + i; } + constexpr Real_type* operator+(Index_type i) { return dptr + i; } /// /// + operator for const pointer arithmetic. /// - const Real_type* operator+(Index_type i) const { return dptr + i; } + constexpr const Real_type* operator+(Index_type i) const { return dptr + i; } private: Real_type* dptr; @@ -469,11 +472,11 @@ class ConstRestrictAlignedRealPtr /// Ctors and assignment op. /// - ConstRestrictAlignedRealPtr() : dptr(0) { ; } + constexpr ConstRestrictAlignedRealPtr() : dptr(0) { ; } - ConstRestrictAlignedRealPtr(const Real_type* d) : dptr(d) { ; } + constexpr ConstRestrictAlignedRealPtr(const Real_type* d) : dptr(d) { ; } - ConstRestrictAlignedRealPtr& operator=(const Real_type* d) + constexpr ConstRestrictAlignedRealPtr& operator=(const Real_type* d) { ConstRestrictAlignedRealPtr copy(d); std::swap(dptr, copy.dptr); @@ -487,13 +490,13 @@ class ConstRestrictAlignedRealPtr /// /// Implicit conversion operator to bare const pointer. /// - operator const Real_type*() { return dptr; } + constexpr operator const Real_type*() { return dptr; } /// /// "Explicit conversion operator" to bare const pointer, /// consistent with boost shared ptr. /// - const Real_type* get() const { return dptr; } + constexpr const Real_type* get() const { return dptr; } /// /// Compiler-specific bracket operators. @@ -501,7 +504,7 @@ class ConstRestrictAlignedRealPtr #if defined(RAJA_COMPILER_ICC) /// - const Real_type& operator[](Index_type i) const + constexpr const Real_type& operator[](Index_type i) const { #if __ICC < 1300 // use alignment intrinsic RAJA_ALIGN_DATA(dptr); @@ -513,7 +516,7 @@ class ConstRestrictAlignedRealPtr #elif defined(RAJA_COMPILER_GNU) /// - const Real_type& operator[](Index_type i) const + constexpr const Real_type& operator[](Index_type i) const { #if 1 // NOTE: alignment instrinsic not available for older GNU compilers return ((const Real_type* RAJA_RESTRICT)RAJA_ALIGN_DATA(dptr))[i]; @@ -523,14 +526,14 @@ class ConstRestrictAlignedRealPtr } #elif defined(RAJA_COMPILER_XLC) - const Real_type& operator[](Index_type i) const + constexpr const Real_type& operator[](Index_type i) const { RAJA_ALIGN_DATA(dptr); return ((const Real_type* RAJA_RESTRICT)dptr)[i]; } #elif defined(RAJA_COMPILER_CLANG) - const Real_type& operator[](Index_type i) const + constexpr const Real_type& operator[](Index_type i) const { return ((const_TDRAReal_ptr)dptr)[i]; } @@ -543,7 +546,7 @@ class ConstRestrictAlignedRealPtr /// /// + operator for pointer arithmetic. /// - const Real_type* operator+(Index_type i) const { return dptr + i; } + constexpr const Real_type* operator+(Index_type i) const { return dptr + i; } private: const Real_type* dptr; @@ -563,11 +566,11 @@ class RestrictAlignedRealPtr /// Ctors and assignment op. /// - RestrictAlignedRealPtr() : dptr(0) { ; } + constexpr RestrictAlignedRealPtr() : dptr(0) { ; } - RestrictAlignedRealPtr(Real_type* d) : dptr(d) { ; } + constexpr RestrictAlignedRealPtr(Real_type* d) : dptr(d) { ; } - RestrictAlignedRealPtr& operator=(Real_type* d) + constexpr RestrictAlignedRealPtr& operator=(Real_type* d) { RestrictAlignedRealPtr copy(d); std::swap(dptr, copy.dptr); @@ -581,30 +584,30 @@ class RestrictAlignedRealPtr /// /// Implicit conversion operator to (non-const) bare pointer. /// - operator Real_type*() { return dptr; } + constexpr operator Real_type*() { return dptr; } /// /// Implicit conversion operator to const bare pointer. /// - operator const Real_type*() const { return dptr; } + constexpr operator const Real_type*() const { return dptr; } /// /// "Explicit conversion operator" to (non-const) bare pointer, /// consistent with boost shared ptr. /// - Real_type* get() { return dptr; } + constexpr Real_type* get() { return dptr; } /// /// "Explicit conversion operator" to const bare pointer, /// consistent with boost shared ptr. /// - const Real_type* get() const { return dptr; } + constexpr const Real_type* get() const { return dptr; } /// /// Operator that enables implicit conversion from /// RestrictAlignedRealPtr to RestrictAlignedRealConstPtr. /// - operator ConstRestrictAlignedRealPtr() + constexpr operator ConstRestrictAlignedRealPtr() { return ConstRestrictAlignedRealPtr(dptr); } @@ -615,7 +618,7 @@ class RestrictAlignedRealPtr #if defined(RAJA_COMPILER_ICC) /// - Real_type& operator[](Index_type i) + constexpr Real_type& operator[](Index_type i) { #if __ICC < 1300 // use alignment intrinsic RAJA_ALIGN_DATA(dptr); @@ -626,7 +629,7 @@ class RestrictAlignedRealPtr } /// - const Real_type& operator[](Index_type i) const + constexpr const Real_type& operator[](Index_type i) const { #if __ICC < 1300 // use alignment intrinsic RAJA_ALIGN_DATA(dptr); @@ -638,7 +641,7 @@ class RestrictAlignedRealPtr #elif defined(RAJA_COMPILER_GNU) /// - Real_type& operator[](Index_type i) + constexpr Real_type& operator[](Index_type i) { #if 1 // NOTE: alignment instrinsic not available for older GNU compilers return ((Real_type * RAJA_RESTRICT) RAJA_ALIGN_DATA(dptr))[i]; @@ -648,7 +651,7 @@ class RestrictAlignedRealPtr } /// - const Real_type& operator[](Index_type i) const + constexpr const Real_type& operator[](Index_type i) const { #if 1 // NOTE: alignment instrinsic not available for older GNU compilers return ((Real_type * RAJA_RESTRICT) RAJA_ALIGN_DATA(dptr))[i]; @@ -659,14 +662,14 @@ class RestrictAlignedRealPtr #elif defined(RAJA_COMPILER_XLC) /// - Real_type& operator[](Index_type i) + constexpr Real_type& operator[](Index_type i) { RAJA_ALIGN_DATA(dptr); return ((Real_type * RAJA_RESTRICT) dptr)[i]; } /// - const Real_type& operator[](Index_type i) const + constexpr const Real_type& operator[](Index_type i) const { RAJA_ALIGN_DATA(dptr); return ((Real_type * RAJA_RESTRICT) dptr)[i]; @@ -674,7 +677,10 @@ class RestrictAlignedRealPtr #elif defined(RAJA_COMPILER_CLANG) /// - Real_type& operator[](Index_type i) { return ((TDRAReal_ptr)dptr)[i]; } + constexpr Real_type& operator[](Index_type i) + { + return ((TDRAReal_ptr)dptr)[i]; + } /// const Real_type& operator[](Index_type i) const @@ -690,12 +696,12 @@ class RestrictAlignedRealPtr /// /// + operator for (non-const) pointer arithmetic. /// - Real_type* operator+(Index_type i) { return dptr + i; } + constexpr Real_type* operator+(Index_type i) { return dptr + i; } /// /// + operator for const pointer arithmetic. /// - const Real_type* operator+(Index_type i) const { return dptr + i; } + constexpr const Real_type* operator+(Index_type i) const { return dptr + i; } private: Real_type* dptr; @@ -716,11 +722,11 @@ class ConstRestrictComplexPtr /// Ctors and assignment op. /// - ConstRestrictComplexPtr() : dptr(0) { ; } + constexpr ConstRestrictComplexPtr() : dptr(0) { ; } - ConstRestrictComplexPtr(const Complex_type* d) : dptr(d) { ; } + constexpr ConstRestrictComplexPtr(const Complex_type* d) : dptr(d) { ; } - ConstRestrictComplexPtr& operator=(const Complex_type* d) + constexpr ConstRestrictComplexPtr& operator=(const Complex_type* d) { ConstRestrictComplexPtr copy(d); std::swap(dptr, copy.dptr); @@ -734,18 +740,18 @@ class ConstRestrictComplexPtr /// /// Implicit conversion operator to bare const pointer. /// - operator const Complex_type*() const { return dptr; } + constexpr operator const Complex_type*() const { return dptr; } /// /// "Explicit conversion operator" to bare const pointer, /// consistent with boost shared ptr. /// - const Complex_type* get() const { return dptr; } + constexpr const Complex_type* get() const { return dptr; } /// /// Bracket operator. /// - const Complex_type& operator[](Index_type i) const + constexpr const Complex_type& operator[](Index_type i) const { return ((const Complex_type* RAJA_RESTRICT)dptr)[i]; } @@ -753,7 +759,10 @@ class ConstRestrictComplexPtr /// /// + operator for pointer arithmetic. /// - const Complex_type* operator+(Index_type i) const { return dptr + i; } + constexpr const Complex_type* operator+(Index_type i) const + { + return dptr + i; + } private: const Complex_type* dptr; @@ -773,11 +782,11 @@ class RestrictComplexPtr /// Ctors and assignment op. /// - RestrictComplexPtr() : dptr(0) { ; } + constexpr RestrictComplexPtr() : dptr(0) { ; } - RestrictComplexPtr(Complex_type* d) : dptr(d) { ; } + constexpr RestrictComplexPtr(Complex_type* d) : dptr(d) { ; } - RestrictComplexPtr& operator=(Complex_type* d) + constexpr RestrictComplexPtr& operator=(Complex_type* d) { RestrictComplexPtr copy(d); std::swap(dptr, copy.dptr); @@ -791,35 +800,38 @@ class RestrictComplexPtr /// /// Implicit conversion operator to (non-const) bare pointer. /// - operator Complex_type*() { return dptr; } + constexpr operator Complex_type*() { return dptr; } /// /// Implicit conversion operator to const bare pointer. /// - operator const Complex_type*() const { return dptr; } + constexpr operator const Complex_type*() const { return dptr; } /// /// "Explicit conversion operator" to (non-const) bare pointer, /// consistent with boost shared ptr. /// - Complex_type* get() { return dptr; } + constexpr Complex_type* get() { return dptr; } /// /// "Explicit conversion operator" to const bare pointer, /// consistent with boost shared ptr. /// - const Complex_type* get() const { return dptr; } + constexpr const Complex_type* get() const { return dptr; } /// /// Operator that enables implicit conversion from RestrictComplexPtr to /// RestrictComplexConstPtr. /// - operator ConstRestrictComplexPtr() { return ConstRestrictComplexPtr(dptr); } + constexpr operator ConstRestrictComplexPtr() + { + return ConstRestrictComplexPtr(dptr); + } /// /// (Non-const) bracket operator. /// - Complex_type& operator[](Index_type i) + constexpr Complex_type& operator[](Index_type i) { return ((Complex_type * RAJA_RESTRICT) dptr)[i]; } @@ -827,7 +839,7 @@ class RestrictComplexPtr /// /// Const bracket operator. /// - const Complex_type& operator[](Index_type i) const + constexpr const Complex_type& operator[](Index_type i) const { return ((Complex_type * RAJA_RESTRICT) dptr)[i]; } @@ -835,12 +847,15 @@ class RestrictComplexPtr /// /// + operator for (non-const) pointer arithmetic. /// - Complex_type* operator+(Index_type i) { return dptr + i; } + constexpr Complex_type* operator+(Index_type i) { return dptr + i; } /// /// + operator for const pointer arithmetic. /// - const Complex_type* operator+(Index_type i) const { return dptr + i; } + constexpr const Complex_type* operator+(Index_type i) const + { + return dptr + i; + } private: Complex_type* dptr; @@ -920,13 +935,15 @@ namespace detail struct DefaultAccessor { template - static RAJA_HOST_DEVICE RAJA_INLINE T get(T* ptr, size_t i) + static RAJA_HOST_DEVICE RAJA_INLINE constexpr T get(T* ptr, size_t i) { return ptr[i]; } template - static RAJA_HOST_DEVICE RAJA_INLINE void set(T* ptr, size_t i, T val) + static RAJA_HOST_DEVICE RAJA_INLINE constexpr void set(T* ptr, + size_t i, + T val) { ptr[i] = val; } @@ -987,14 +1004,14 @@ struct AsIntegerArray return num_integer_type; } - RAJA_HOST_DEVICE constexpr T get_value() const + RAJA_HOST_DEVICE T get_value() const { T value; memcpy(&value, &array[0], sizeof(T)); return value; } - RAJA_HOST_DEVICE constexpr void set_value(T value) + RAJA_HOST_DEVICE void set_value(T value) { memcpy(&array[0], &value, sizeof(T)); } @@ -1007,14 +1024,14 @@ struct AsIntegerArray template struct ScopedAssignment { - ScopedAssignment(T& val, T const& new_val) + constexpr ScopedAssignment(T& val, T const& new_val) : m_ref_to_val(val), m_prev_val(std::move(val)) { m_ref_to_val = new_val; } - ScopedAssignment(T& val, T&& new_val) + constexpr ScopedAssignment(T& val, T&& new_val) : m_ref_to_val(val), m_prev_val(std::move(val)) { @@ -1026,6 +1043,7 @@ struct ScopedAssignment ScopedAssignment& operator=(ScopedAssignment const&) = delete; ScopedAssignment& operator=(ScopedAssignment&&) = delete; + // constexpr in c++20 ~ScopedAssignment() { m_ref_to_val = std::move(m_prev_val); } private: diff --git a/include/RAJA/util/zip.hpp b/include/RAJA/util/zip.hpp index 8bbfbf2c01..83da60f8d4 100644 --- a/include/RAJA/util/zip.hpp +++ b/include/RAJA/util/zip.hpp @@ -55,112 +55,125 @@ struct ZipIterator zip_ref::reference...>; using iterator_category = std::random_access_iterator_tag; - RAJA_HOST_DEVICE inline ZipIterator() : m_iterators() {} + RAJA_HOST_DEVICE RAJA_INLINE constexpr ZipIterator() : m_iterators() {} template...>> - RAJA_HOST_DEVICE inline ZipIterator(Args&&... args) + RAJA_HOST_DEVICE RAJA_INLINE constexpr ZipIterator(Args&&... args) : m_iterators(std::forward(args)...) {} - RAJA_HOST_DEVICE inline ZipIterator(const ZipIterator& rhs) + RAJA_HOST_DEVICE RAJA_INLINE constexpr ZipIterator(const ZipIterator& rhs) : m_iterators(rhs.m_iterators) {} - RAJA_HOST_DEVICE inline ZipIterator(ZipIterator&& rhs) + RAJA_HOST_DEVICE RAJA_INLINE constexpr ZipIterator(ZipIterator&& rhs) : m_iterators(std::move(rhs.m_iterators)) {} - RAJA_HOST_DEVICE inline ZipIterator& operator=(const ZipIterator& rhs) + RAJA_HOST_DEVICE RAJA_INLINE constexpr ZipIterator& operator=( + const ZipIterator& rhs) { m_iterators = rhs.m_iterators; return *this; } - RAJA_HOST_DEVICE inline ZipIterator& operator=(ZipIterator&& rhs) + RAJA_HOST_DEVICE RAJA_INLINE constexpr ZipIterator& operator=( + ZipIterator&& rhs) { m_iterators = std::move(rhs.m_iterators); return *this; } - RAJA_HOST_DEVICE inline difference_type get_stride() const { return 1; } + RAJA_HOST_DEVICE RAJA_INLINE constexpr difference_type get_stride() const + { + return 1; + } - RAJA_HOST_DEVICE inline bool operator==(const ZipIterator& rhs) const + RAJA_HOST_DEVICE RAJA_INLINE constexpr bool operator==( + const ZipIterator& rhs) const { return RAJA::get<0>(m_iterators) == RAJA::get<0>(rhs.m_iterators); } - RAJA_HOST_DEVICE inline bool operator!=(const ZipIterator& rhs) const + RAJA_HOST_DEVICE RAJA_INLINE constexpr bool operator!=( + const ZipIterator& rhs) const { return RAJA::get<0>(m_iterators) != RAJA::get<0>(rhs.m_iterators); } - RAJA_HOST_DEVICE inline bool operator>(const ZipIterator& rhs) const + RAJA_HOST_DEVICE RAJA_INLINE constexpr bool operator>( + const ZipIterator& rhs) const { return RAJA::get<0>(m_iterators) > RAJA::get<0>(rhs.m_iterators); } - RAJA_HOST_DEVICE inline bool operator<(const ZipIterator& rhs) const + RAJA_HOST_DEVICE RAJA_INLINE constexpr bool operator<( + const ZipIterator& rhs) const { return RAJA::get<0>(m_iterators) < RAJA::get<0>(rhs.m_iterators); } - RAJA_HOST_DEVICE inline bool operator>=(const ZipIterator& rhs) const + RAJA_HOST_DEVICE RAJA_INLINE constexpr bool operator>=( + const ZipIterator& rhs) const { return RAJA::get<0>(m_iterators) >= RAJA::get<0>(rhs.m_iterators); } - RAJA_HOST_DEVICE inline bool operator<=(const ZipIterator& rhs) const + RAJA_HOST_DEVICE RAJA_INLINE constexpr bool operator<=( + const ZipIterator& rhs) const { return RAJA::get<0>(m_iterators) <= RAJA::get<0>(rhs.m_iterators); } - RAJA_HOST_DEVICE inline ZipIterator& operator++() + RAJA_HOST_DEVICE RAJA_INLINE constexpr ZipIterator& operator++() { detail::zip_for_each(m_iterators, detail::PreInc {}); return *this; } - RAJA_HOST_DEVICE inline ZipIterator& operator--() + RAJA_HOST_DEVICE RAJA_INLINE constexpr ZipIterator& operator--() { detail::zip_for_each(m_iterators, detail::PreDec {}); return *this; } - RAJA_HOST_DEVICE inline ZipIterator operator++(int) + RAJA_HOST_DEVICE RAJA_INLINE constexpr ZipIterator operator++(int) { ZipIterator tmp(*this); ++(*this); return tmp; } - RAJA_HOST_DEVICE inline ZipIterator operator--(int) + RAJA_HOST_DEVICE RAJA_INLINE constexpr ZipIterator operator--(int) { ZipIterator tmp(*this); --(*this); return tmp; } - RAJA_HOST_DEVICE inline ZipIterator& operator+=(const difference_type& rhs) + RAJA_HOST_DEVICE RAJA_INLINE constexpr ZipIterator& operator+=( + const difference_type& rhs) { detail::zip_for_each(m_iterators, detail::PlusEq {rhs}); return *this; } - RAJA_HOST_DEVICE inline ZipIterator& operator-=(const difference_type& rhs) + RAJA_HOST_DEVICE RAJA_INLINE constexpr ZipIterator& operator-=( + const difference_type& rhs) { detail::zip_for_each(m_iterators, detail::MinusEq {rhs}); return *this; } - RAJA_HOST_DEVICE inline difference_type operator-( + RAJA_HOST_DEVICE RAJA_INLINE constexpr difference_type operator-( const ZipIterator& rhs) const { return RAJA::get<0>(m_iterators) - RAJA::get<0>(rhs.m_iterators); } - RAJA_HOST_DEVICE inline ZipIterator operator+( + RAJA_HOST_DEVICE RAJA_INLINE constexpr ZipIterator operator+( const difference_type& rhs) const { ZipIterator tmp(*this); @@ -168,7 +181,7 @@ struct ZipIterator return tmp; } - RAJA_HOST_DEVICE inline ZipIterator operator-( + RAJA_HOST_DEVICE RAJA_INLINE constexpr ZipIterator operator-( const difference_type& rhs) const { ZipIterator tmp(*this); @@ -176,31 +189,34 @@ struct ZipIterator return tmp; } - RAJA_HOST_DEVICE friend ZipIterator operator+(difference_type lhs, - const ZipIterator& rhs) + RAJA_HOST_DEVICE RAJA_INLINE constexpr friend ZipIterator operator+( + difference_type lhs, + const ZipIterator& rhs) { ZipIterator tmp(rhs); tmp += lhs; return tmp; } - RAJA_HOST_DEVICE inline reference operator*() const + RAJA_HOST_DEVICE RAJA_INLINE constexpr reference operator*() const { return deref_helper(camp::make_idx_seq_t {}); } // TODO:: figure out what to do with this - // RAJA_HOST_DEVICE inline reference operator->() const + // RAJA_HOST_DEVICE RAJA_INLINE constexpr reference operator->() const // { // return *(*this); // } - RAJA_HOST_DEVICE reference operator[](difference_type rhs) const + RAJA_HOST_DEVICE RAJA_INLINE constexpr reference operator[]( + difference_type rhs) const { return *((*this) + rhs); } - RAJA_HOST_DEVICE friend inline void safe_iter_swap(ZipIterator lhs, - ZipIterator rhs) + RAJA_HOST_DEVICE RAJA_INLINE constexpr friend void safe_iter_swap( + ZipIterator lhs, + ZipIterator rhs) { detail::zip_for_each(lhs.m_iterators, rhs.m_iterators, detail::IterSwap {}); } @@ -209,7 +225,8 @@ struct ZipIterator zip_val...> m_iterators; template - RAJA_HOST_DEVICE inline reference deref_helper(camp::idx_seq) const + RAJA_HOST_DEVICE RAJA_INLINE constexpr reference deref_helper( + camp::idx_seq) const { return reference(*RAJA::get(m_iterators)...); } @@ -220,7 +237,8 @@ struct ZipIterator a single ZipIterator object. */ template -RAJA_HOST_DEVICE auto zip(Args&&... args) -> ZipIterator...> +RAJA_HOST_DEVICE RAJA_INLINE constexpr auto zip(Args&&... args) + -> ZipIterator...> { return {std::forward(args)...}; } @@ -230,7 +248,7 @@ RAJA_HOST_DEVICE auto zip(Args&&... args) -> ZipIterator...> ZipIterator objects. */ template -RAJA_HOST_DEVICE RAJA_INLINE auto zip_span(Args&&... args) +RAJA_HOST_DEVICE RAJA_INLINE constexpr auto zip_span(Args&&... args) -> Span>...>, typename ZipIterator< detail::ContainerIter>...>::difference_type> @@ -251,9 +269,12 @@ RAJA_HOST_DEVICE RAJA_INLINE auto zip_span(Args&&... args) template struct CompareFirst { - RAJA_HOST_DEVICE inline CompareFirst(Compare comp_) : comp(comp_) {} + RAJA_HOST_DEVICE RAJA_INLINE constexpr CompareFirst(Compare comp_) + : comp(comp_) + {} - RAJA_HOST_DEVICE inline bool operator()(T const& lhs, T const& rhs) const + RAJA_HOST_DEVICE RAJA_INLINE constexpr bool operator()(T const& lhs, + T const& rhs) const { return comp(RAJA::get<0>(lhs), RAJA::get<0>(rhs)); } @@ -267,7 +288,8 @@ struct CompareFirst like objects of type T. */ template -RAJA_HOST_DEVICE auto compare_first(Compare comp) -> CompareFirst +RAJA_HOST_DEVICE RAJA_INLINE constexpr auto compare_first(Compare comp) + -> CompareFirst { return {comp}; } diff --git a/include/RAJA/util/zip_tuple.hpp b/include/RAJA/util/zip_tuple.hpp index 03bd098d99..fb2245f978 100644 --- a/include/RAJA/util/zip_tuple.hpp +++ b/include/RAJA/util/zip_tuple.hpp @@ -49,15 +49,15 @@ using zip_tuple_element_t = typename zip_tuple_element::type; // the reference type returned by get depends on the reference type // of the zip_tuple that get is called on template -RAJA_HOST_DEVICE constexpr RAJA::zip_tuple_element_t>& -get(zip_tuple& z) noexcept +RAJA_HOST_DEVICE RAJA_INLINE constexpr RAJA:: + zip_tuple_element_t>& + get(zip_tuple& z) noexcept { return z.template get(); } template -RAJA_HOST_DEVICE constexpr RAJA:: +RAJA_HOST_DEVICE RAJA_INLINE constexpr RAJA:: zip_tuple_element_t> const& get(zip_tuple const& z) noexcept { @@ -65,7 +65,7 @@ RAJA_HOST_DEVICE constexpr RAJA:: } template -RAJA_HOST_DEVICE constexpr std::remove_reference_t< +RAJA_HOST_DEVICE RAJA_INLINE constexpr std::remove_reference_t< RAJA::zip_tuple_element_t>>&& get(zip_tuple&& z) noexcept { @@ -73,7 +73,7 @@ get(zip_tuple&& z) noexcept } template -RAJA_HOST_DEVICE constexpr std::remove_reference_t< +RAJA_HOST_DEVICE RAJA_INLINE constexpr std::remove_reference_t< RAJA::zip_tuple_element_t>> const&& get(zip_tuple const&& z) noexcept { @@ -86,7 +86,7 @@ namespace detail struct PassThrough { template - RAJA_HOST_DEVICE RAJA_INLINE auto operator()(T&& t) const + RAJA_HOST_DEVICE RAJA_INLINE constexpr auto operator()(T&& t) const -> decltype(std::forward(t)) { return std::forward(t); @@ -96,7 +96,7 @@ struct PassThrough struct Move { template - RAJA_HOST_DEVICE RAJA_INLINE auto operator()(T&& t) const + RAJA_HOST_DEVICE RAJA_INLINE constexpr auto operator()(T&& t) const -> decltype(std::move(t)) { return std::move(t); @@ -106,7 +106,7 @@ struct Move struct PreInc { template - RAJA_HOST_DEVICE inline auto operator()(Iter&& iter) const + RAJA_HOST_DEVICE RAJA_INLINE constexpr auto operator()(Iter&& iter) const -> decltype(++std::forward(iter)) { return ++std::forward(iter); @@ -116,7 +116,7 @@ struct PreInc struct PreDec { template - RAJA_HOST_DEVICE inline auto operator()(Iter&& iter) const + RAJA_HOST_DEVICE RAJA_INLINE constexpr auto operator()(Iter&& iter) const -> decltype(--std::forward(iter)) { return --std::forward(iter); @@ -129,7 +129,7 @@ struct PlusEq const difference_type& rhs; template - RAJA_HOST_DEVICE inline auto operator()(Iter&& iter) const + RAJA_HOST_DEVICE RAJA_INLINE constexpr auto operator()(Iter&& iter) const -> decltype(std::forward(iter) += rhs) { return std::forward(iter) += rhs; @@ -142,7 +142,7 @@ struct MinusEq const difference_type& rhs; template - RAJA_HOST_DEVICE inline auto operator()(Iter&& iter) const + RAJA_HOST_DEVICE RAJA_INLINE constexpr auto operator()(Iter&& iter) const -> decltype(std::forward(iter) -= rhs) { return std::forward(iter) -= rhs; @@ -152,7 +152,7 @@ struct MinusEq struct DeRef { template - RAJA_HOST_DEVICE inline auto operator()(Iter&& iter) const + RAJA_HOST_DEVICE RAJA_INLINE constexpr auto operator()(Iter&& iter) const -> decltype(*std::forward(iter)) { return *std::forward(iter); @@ -162,7 +162,7 @@ struct DeRef struct Swap { template - RAJA_HOST_DEVICE inline int operator()(T0&& t0, T1&& t1) const + RAJA_HOST_DEVICE RAJA_INLINE constexpr int operator()(T0&& t0, T1&& t1) const { using camp::safe_swap; safe_swap(std::forward(t0), std::forward(t1)); @@ -173,7 +173,7 @@ struct Swap struct IterSwap { template - RAJA_HOST_DEVICE inline int operator()(T0&& t0, T1&& t1) const + RAJA_HOST_DEVICE RAJA_INLINE constexpr int operator()(T0&& t0, T1&& t1) const { using RAJA::safe_iter_swap; safe_iter_swap(std::forward(t0), std::forward(t1)); @@ -185,9 +185,10 @@ struct IterSwap \brief Call f on each member of t (f(t)...). */ template -RAJA_HOST_DEVICE inline void zip_for_each_impl(Tuple&& t, - F&& f, - camp::idx_seq) +RAJA_HOST_DEVICE RAJA_INLINE constexpr void zip_for_each_impl( + Tuple&& t, + F&& f, + camp::idx_seq) { camp::sink(std::forward(f)(RAJA::get(std::forward(t)))...); } @@ -196,10 +197,11 @@ RAJA_HOST_DEVICE inline void zip_for_each_impl(Tuple&& t, \brief Call f on each member of t0 and t1 (f(t0, t1)...). */ template -RAJA_HOST_DEVICE inline void zip_for_each_impl(Tuple0&& t0, - Tuple1&& t1, - F&& f, - camp::idx_seq) +RAJA_HOST_DEVICE RAJA_INLINE constexpr void zip_for_each_impl( + Tuple0&& t0, + Tuple1&& t1, + F&& f, + camp::idx_seq) { camp::sink(std::forward(f)(RAJA::get(std::forward(t0)), RAJA::get(std::forward(t1)))...); @@ -209,7 +211,7 @@ RAJA_HOST_DEVICE inline void zip_for_each_impl(Tuple0&& t0, \brief Call f on each member of t (f(t)...). */ template -RAJA_HOST_DEVICE inline void zip_for_each(Tuple&& t, F&& f) +RAJA_HOST_DEVICE RAJA_INLINE constexpr void zip_for_each(Tuple&& t, F&& f) { zip_for_each_impl(std::forward(t), std::forward(f), typename camp::decay::IdxSeq {}); @@ -219,7 +221,9 @@ RAJA_HOST_DEVICE inline void zip_for_each(Tuple&& t, F&& f) \brief Call f on each member of t0 and t1 (f(t0, t1)...). */ template -RAJA_HOST_DEVICE inline void zip_for_each(Tuple0&& t0, Tuple1&& t1, F&& f) +RAJA_HOST_DEVICE RAJA_INLINE constexpr void zip_for_each(Tuple0&& t0, + Tuple1&& t1, + F&& f) { static_assert(std::is_same::IdxSeq, typename camp::decay::IdxSeq>::value, @@ -257,7 +261,7 @@ struct zip_tuple template< typename... Os, typename = concepts::enable_if...>> - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple(Os&&... os) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple(Os&&... os) : m_tuple(std::forward(os)...) {} @@ -266,63 +270,67 @@ struct zip_tuple typename = concepts::enable_if::type>...>> - zip_tuple& assign(Os&&... os) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple& assign(Os&&... os) { return assign_helper(IdxSeq {}, std::forward(os)...); } // copy and move constructors - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple(zip_tuple& o) : zip_tuple(o, IdxSeq {}) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple(zip_tuple& o) + : zip_tuple(o, IdxSeq {}) {} - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple(zip_tuple const& o) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple(zip_tuple const& o) : zip_tuple(o, IdxSeq {}) {} - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple(zip_tuple&& o) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple(zip_tuple&& o) : zip_tuple(std::move(o), IdxSeq {}) {} // move if is_val, pass-through otherwise // copy and move assignment operators - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple& operator=(zip_tuple& o) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple& operator=(zip_tuple& o) { return assign_helper(o, IdxSeq {}); } - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple& operator=(zip_tuple const& o) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple& operator=( + zip_tuple const& o) { return assign_helper(o, IdxSeq {}); } - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple& operator=(zip_tuple&& o) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple& operator=(zip_tuple&& o) { return assign_helper(std::move(o), IdxSeq {}); } // copy and move constructors from opp_tuple type zip_tuples - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple(opp_tuple& o) : zip_tuple(o, IdxSeq {}) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple(opp_tuple& o) + : zip_tuple(o, IdxSeq {}) {} - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple(opp_tuple const& o) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple(opp_tuple const& o) : zip_tuple(o, IdxSeq {}) {} - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple(opp_tuple&& o) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple(opp_tuple&& o) : zip_tuple(std::move(o), IdxSeq {}) {} // move if is_val, pass-through otherwise // copy and move assignment operators from opp_tuple type zip_tuples - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple& operator=(opp_tuple& o) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple& operator=(opp_tuple& o) { return assign_helper(o, IdxSeq {}); } - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple& operator=(opp_tuple const& o) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple& operator=( + opp_tuple const& o) { return assign_helper(o, IdxSeq {}); } - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple& operator=(opp_tuple&& o) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple& operator=(opp_tuple&& o) { return assign_helper(std::move(o), IdxSeq {}); } @@ -331,21 +339,22 @@ struct zip_tuple // the reference type returned by get depends on the reference type // of the zip_tuple that get is called on template - RAJA_HOST_DEVICE constexpr RAJA::tuple_element_t& + RAJA_HOST_DEVICE RAJA_INLINE constexpr RAJA::tuple_element_t& get() & noexcept { return RAJA::get(m_tuple); } template - RAJA_HOST_DEVICE constexpr RAJA::tuple_element_t const& get() - const& noexcept + RAJA_HOST_DEVICE RAJA_INLINE constexpr RAJA:: + tuple_element_t const& + get() const& noexcept { return RAJA::get(m_tuple); } template - RAJA_HOST_DEVICE constexpr std::remove_reference_t< + RAJA_HOST_DEVICE RAJA_INLINE constexpr std::remove_reference_t< RAJA::tuple_element_t>&& get() && noexcept { @@ -353,7 +362,7 @@ struct zip_tuple } template - RAJA_HOST_DEVICE constexpr std::remove_reference_t< + RAJA_HOST_DEVICE RAJA_INLINE constexpr std::remove_reference_t< RAJA::tuple_element_t> const&& get() const&& noexcept { @@ -361,16 +370,16 @@ struct zip_tuple } // safe_swap that calls swap on each pair in the tuple - RAJA_HOST_DEVICE friend RAJA_INLINE void safe_swap(zip_tuple& lhs, - zip_tuple& rhs) + RAJA_HOST_DEVICE RAJA_INLINE constexpr friend void safe_swap(zip_tuple& lhs, + zip_tuple& rhs) { detail::zip_for_each(lhs, rhs, detail::Swap {}); } // safe_swap for swapping zip_tuples with opposite is_val // calls swap on each pair in the tuple - RAJA_HOST_DEVICE friend RAJA_INLINE void safe_swap(zip_tuple& lhs, - opp_tuple& rhs) + RAJA_HOST_DEVICE RAJA_INLINE constexpr friend void safe_swap(zip_tuple& lhs, + opp_tuple& rhs) { detail::zip_for_each(lhs, rhs, detail::Swap {}); } @@ -391,7 +400,9 @@ struct zip_tuple // assignment helper from types convertible to Ts template - zip_tuple& assign_helper(camp::idx_seq, Os&&... os) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple& assign_helper( + camp::idx_seq, + Os&&... os) { camp::sink(get() = std::forward(os)...); return *this; @@ -399,25 +410,28 @@ struct zip_tuple // copy and move constructor helpers template - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple(zip_tuple& o, camp::idx_seq) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple(zip_tuple& o, + camp::idx_seq) : zip_tuple(RAJA::get(o)...) {} template - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple(zip_tuple const& o, - camp::idx_seq) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple(zip_tuple const& o, + camp::idx_seq) : zip_tuple(RAJA::get(o)...) {} template - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple(zip_tuple&& o, camp::idx_seq) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple(zip_tuple&& o, + camp::idx_seq) : zip_tuple(RAJA::get(IsValMover {}(o))...) {} // move if is_val, pass-through otherwise // copy and move assignment operator helpers template - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple& assign_helper(zip_tuple& o, - camp::idx_seq) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple& assign_helper( + zip_tuple& o, + camp::idx_seq) { if (this != &o) { @@ -427,8 +441,9 @@ struct zip_tuple } template - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple& assign_helper(zip_tuple const& o, - camp::idx_seq) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple& assign_helper( + zip_tuple const& o, + camp::idx_seq) { if (this != &o) { @@ -438,8 +453,9 @@ struct zip_tuple } template - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple& assign_helper(zip_tuple&& o, - camp::idx_seq) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple& assign_helper( + zip_tuple&& o, + camp::idx_seq) { if (this != &o) { @@ -450,41 +466,46 @@ struct zip_tuple // copy and move constructor helpers from opp_tuple type zip_tuples template - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple(opp_tuple& o, camp::idx_seq) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple(opp_tuple& o, + camp::idx_seq) : zip_tuple(RAJA::get(o)...) {} template - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple(opp_tuple const& o, - camp::idx_seq) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple(opp_tuple const& o, + camp::idx_seq) : zip_tuple(RAJA::get(o)...) {} template - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple(opp_tuple&& o, camp::idx_seq) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple(opp_tuple&& o, + camp::idx_seq) : zip_tuple(RAJA::get(IsValMover {}(o))...) {} // move if is_val, pass-through otherwise // copy and move assignment operator helpers from opp_tuple type zip_tuples template - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple& assign_helper(opp_tuple& o, - camp::idx_seq) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple& assign_helper( + opp_tuple& o, + camp::idx_seq) { camp::sink(get() = RAJA::get(o)...); return *this; } template - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple& assign_helper(opp_tuple const& o, - camp::idx_seq) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple& assign_helper( + opp_tuple const& o, + camp::idx_seq) { camp::sink(get() = RAJA::get(o)...); return *this; } template - RAJA_HOST_DEVICE RAJA_INLINE zip_tuple& assign_helper(opp_tuple&& o, - camp::idx_seq) + RAJA_HOST_DEVICE RAJA_INLINE constexpr zip_tuple& assign_helper( + opp_tuple&& o, + camp::idx_seq) { camp::sink(get() = RAJA::get(std::move(o))...); return *this; diff --git a/scripts/gitlab/build_and_test.sh b/scripts/gitlab/build_and_test.sh index 3297214ff3..6bb684e9ec 100755 --- a/scripts/gitlab/build_and_test.sh +++ b/scripts/gitlab/build_and_test.sh @@ -190,7 +190,7 @@ then timed_message "Cleaning working directory" # Map CPU core allocations - declare -A core_counts=(["lassen"]=40 ["poodle"]=28 ["dane"]=28 ["corona"]=32 ["rzansel"]=48 ["tioga"]=32 ["tuolumne"]=48) + declare -A core_counts=(["lassen"]=40 ["poodle"]=28 ["dane"]=28 ["matrix"]=28 ["corona"]=32 ["rzansel"]=48 ["tioga"]=32 ["tuolumne"]=48) # If building, then delete everything first # NOTE: 'cmake --build . -j core_counts' attempts to reduce individual build resources. @@ -204,7 +204,7 @@ then # Shared allocation: Allows build_and_test.sh to run within a sub-allocation (see CI config). # Use /dev/shm: Prevent MPI tests from running on a node where the build dir doesn't exist. cmake_options="" - if [[ "${truehostname}" == "poodle" || "${truehostname}" == "dane" ]] + if [[ "${truehostname}" == "poodle" || "${truehostname}" == "dane" || "${truehostname}" == "matrix" ]] then cmake_options="-DBLT_MPI_COMMAND_APPEND:STRING=--overlap" fi @@ -257,24 +257,19 @@ then echo "[Error]: Failure(s) while running CTest" && exit 1 fi - if grep -q -i "ENABLE_HIP.*ON" ${hostconfig_path} + if [[ ! -d ${install_dir} ]] then - echo "[Warning]: Not testing install with HIP" - else - if [[ ! -d ${install_dir} ]] - then - echo "[Error]: Install directory not found : ${install_dir}" && exit 1 - fi - - cd ${install_dir}/examples/RAJA/using-with-cmake - mkdir build && cd build - if ! $cmake_exe -C ../host-config.cmake ..; then - echo "[Error]: Running $cmake_exe for using-with-cmake test" && exit 1 - fi - - if ! make; then - echo "[Error]: Running make for using-with-cmake test" && exit 1 - fi + echo "[Error]: Install directory not found : ${install_dir}" && exit 1 + fi + + cd ${install_dir}/examples/RAJA/using-with-cmake + mkdir build && cd build + if ! $cmake_exe -C ../host-config.cmake ..; then + echo "[Error]: Running $cmake_exe for using-with-cmake test" && exit 1 + fi + + if ! make; then + echo "[Error]: Running make for using-with-cmake test" && exit 1 fi timed_message "RAJA tests completed" diff --git a/scripts/lc-builds/corona_sycl.sh b/scripts/lc-builds/corona_sycl.sh index b84d594b1a..b375b7ef45 100755 --- a/scripts/lc-builds/corona_sycl.sh +++ b/scripts/lc-builds/corona_sycl.sh @@ -13,7 +13,7 @@ if [[ $# -lt 1 ]]; then echo " 1) SYCL compiler installation path" echo echo "For example: " - echo " corona_sycl.sh /usr/workspace/raja-dev/clang_sycl_730cd3a5275f_hip_gcc10.3.1_rocm6.0.2" + echo " corona_sycl.sh /usr/workspace/raja-dev/clang_sycl_16b7bcb09915_hip_gcc10.3.1_rocm6.4.2" exit fi diff --git a/scripts/lc-builds/toss4_cce_omptarget.sh b/scripts/lc-builds/toss4_cce_omptarget.sh new file mode 100755 index 0000000000..96a9712f85 --- /dev/null +++ b/scripts/lc-builds/toss4_cce_omptarget.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +############################################################################### +# Copyright (c) 2016-25, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJA/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +if [[ $# -lt 2 ]]; then + echo + echo "You must pass 3 or more arguments to the script (in this order): " + echo " 1) compiler version number" + echo " 2) HIP compute architecture" + echo " 3...) optional arguments to cmake" + echo + echo "For example: " + echo " toss4_cce_omptarget.sh 20.0.0-magic gfx942" + exit +fi + +COMP_VER=$1 +HIP_ARCH=$2 +shift 2 + +HOSTCONFIG="cce_omptarget_X" + +BUILD_SUFFIX=lc_toss4-cce-${COMP_VER}-${HIP_ARCH}-omptarget + +echo +echo "Creating build directory build_${BUILD_SUFFIX} and generating configuration in it" +echo "Configuration extra arguments:" +echo " $@" +echo + +rm -rf build_${BUILD_SUFFIX} >/dev/null +mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} + + +module load cmake/3.24.2 + +cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DHIP_ARCH=${HIP_ARCH} \ + -DCMAKE_C_COMPILER="/usr/tce/packages/cce/cce-${COMP_VER}/bin/craycc" \ + -DCMAKE_CXX_COMPILER="/usr/tce/packages/cce/cce-${COMP_VER}/bin/crayCC" \ + -DBLT_CXX_STD=c++17 \ + -DENABLE_CLANGFORMAT=Off \ + -C "../host-configs/lc-builds/toss4/${HOSTCONFIG}.cmake" \ + -DENABLE_HIP=OFF \ + -DENABLE_OPENMP=ON \ + -DRAJA_ENABLE_TARGET_OPENMP=ON \ + -DENABLE_CUDA=OFF \ + -DENABLE_BENCHMARKS=On \ + -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ + "$@" \ + .. + +echo +echo "***********************************************************************" +echo +echo "cd into directory build_${BUILD_SUFFIX} and run make to build RAJA" +echo +echo " srun -n1 make" +echo +echo "***********************************************************************" diff --git a/scripts/lc-builds/toss4_nvcc_clang.sh b/scripts/lc-builds/toss4_nvcc_clang.sh new file mode 100755 index 0000000000..dac9fef11b --- /dev/null +++ b/scripts/lc-builds/toss4_nvcc_clang.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash + +############################################################################### +# Copyright (c) 2016-25, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJA/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +if [[ $# -lt 3 ]]; then + echo + echo "You must pass 3 arguments to the script (in this order): " + echo " 1) compiler version number for nvcc" + echo " 2) CUDA compute architecture (number only, e.g., '90' not 'sm_90')" + echo " 3) compiler version number for clang" + echo + echo "For example: " + echo " toss4_nvcc_clang.sh 12.6.0 90 14.0.6" + echo + echo " toss4_nvcc_clang.sh 12.9.1 90 19.1.3-magic" + echo " (note: a compilation issue with one RAJA benchmark code)" + exit +fi + +COMP_NVCC_VER=$1 +COMP_ARCH=$2 +COMP_CLANG_VER=$3 +shift 3 + +BUILD_SUFFIX=lc_toss4-nvcc${COMP_NVCC_VER}-${COMP_ARCH}-clang${COMP_CLANG_VER} + +echo +echo "Creating build directory build_${BUILD_SUFFIX} and generating configuration in it" +echo "Configuration extra arguments:" +echo " $@" +echo + +rm -rf build_${BUILD_SUFFIX} >/dev/null +mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} + +module load cmake/3.25.2 + +cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_COMPILER=/usr/tce/packages/clang/clang-${COMP_CLANG_VER}/bin/clang++ \ + -DBLT_CXX_STD=c++17 \ + -C ../host-configs/lc-builds/toss4/nvcc_clang_X.cmake \ + -DENABLE_CLANGFORMAT=Off \ + -DCLANGFORMAT_EXECUTABLE=/usr/tce/packages/clang/clang-14.0.6/bin/clang-format \ + -DENABLE_OPENMP=On \ + -DENABLE_CUDA=On \ + -DRAJA_ENABLE_NVTX=On \ + -DCUDA_TOOLKIT_ROOT_DIR=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER} \ + -DCMAKE_CUDA_COMPILER=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER}/bin/nvcc \ + -DCMAKE_CUDA_ARCHITECTURES=${COMP_ARCH} \ + -DENABLE_BENCHMARKS=On \ + -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ + "$@" \ + .. + +echo +echo "***********************************************************************" +echo +echo "cd into directory build_${BUILD_SUFFIX} and run make to build RAJA" +echo +echo "On LC machines, you are likely to hit cudafe++ signal errors due to" +echo "insufficient virtual memory if you try to build in parallel using all" +echo "available cores; i.e., make -j. Try backing off to make -j 16." +echo +echo "***********************************************************************" diff --git a/scripts/lc-builds/toss4_nvcc_gcc.sh b/scripts/lc-builds/toss4_nvcc_gcc.sh new file mode 100755 index 0000000000..d5b78b5da3 --- /dev/null +++ b/scripts/lc-builds/toss4_nvcc_gcc.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash + +############################################################################### +# Copyright (c) 2016-25, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJA/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +if [[ $# -lt 3 ]]; then + echo + echo "You must pass 3 arguments to the script (in this order): " + echo " 1) compiler version number for nvcc" + echo " 2) CUDA compute architecture (number only, e.g., '90' not 'sm_90')" + echo " 3) compiler version number for gcc" + echo + echo "For example: " + echo " toss4_nvcc_gcc.sh 12.6.0 90 13.3.1-magic" + echo + echo " toss4_nvcc_gcc.sh 12.9.1 90 13.3.1-magic" + echo " (note: a compilation issue with one RAJA benchmark code)" + exit +fi + +COMP_NVCC_VER=$1 +COMP_ARCH=$2 +COMP_GCC_VER=$3 +shift 3 + +BUILD_SUFFIX=lc_toss4-nvcc${COMP_NVCC_VER}-${COMP_ARCH}-gcc${COMP_GCC_VER} + +echo +echo "Creating build directory build_${BUILD_SUFFIX} and generating configuration in it" +echo "Configuration extra arguments:" +echo " $@" +echo + +rm -rf build_${BUILD_SUFFIX} >/dev/null +mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} + +module load cmake/3.25.2 + +cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_COMPILER=/usr/tce/packages/gcc/gcc-${COMP_GCC_VER}/bin/g++ \ + -DBLT_CXX_STD=c++17 \ + -C ../host-configs/lc-builds/toss4/nvcc_gcc_X.cmake \ + -DENABLE_CLANGFORMAT=Off \ + -DENABLE_OPENMP=On \ + -DENABLE_CUDA=On \ + -DRAJA_ENABLE_NVTX=On \ + -DCUDA_TOOLKIT_ROOT_DIR=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER} \ + -DCMAKE_CUDA_COMPILER=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER}/bin/nvcc \ + -DCMAKE_CUDA_ARCHITECTURES=${COMP_ARCH} \ + -DENABLE_BENCHMARKS=On \ + -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ + "$@" \ + .. + +echo +echo "***********************************************************************" +echo +echo "cd into directory build_${BUILD_SUFFIX} and run make to build RAJA" +echo +echo "On LC machines, you are likely to hit cudafe++ signal errors due to" +echo "insufficient virtual memory if you try to build in parallel using all" +echo "available cores; i.e., make -j. Try backing off to make -j 16." +echo +echo "***********************************************************************" diff --git a/scripts/radiuss-spack-configs b/scripts/radiuss-spack-configs index b7b88725bc..fddc4f16ee 160000 --- a/scripts/radiuss-spack-configs +++ b/scripts/radiuss-spack-configs @@ -1 +1 @@ -Subproject commit b7b88725bc2e5045fd3adef2b971a0f25f3b96e1 +Subproject commit fddc4f16ee987abc9c1c61879eaf8a2d6a8253d9 diff --git a/scripts/uberenv b/scripts/uberenv index 6ba67dcbd7..bec05e20bf 160000 --- a/scripts/uberenv +++ b/scripts/uberenv @@ -1 +1 @@ -Subproject commit 6ba67dcbd7ccbb9c03920b89de19ac959e2c3bdd +Subproject commit bec05e20bf2a1634d97ead358a9072c36f1fdcac diff --git a/share/raja/cmake/RAJA-config.cmake.in b/share/raja/cmake/RAJA-config.cmake.in index ca30ea323e..54531c8c60 100644 --- a/share/raja/cmake/RAJA-config.cmake.in +++ b/share/raja/cmake/RAJA-config.cmake.in @@ -39,25 +39,39 @@ include(CMakeFindDependencyMacro) if (NOT TARGET camp) set(RAJA_CAMP_DIR "@camp_DIR@") - if(NOT camp_DIR) - set(camp_DIR ${RAJA_CAMP_DIR}) + if(NOT camp_DIR) + set(camp_DIR ${RAJA_CAMP_DIR}) endif() - find_dependency(camp CONFIG NO_DEFAULT_PATH PATHS + find_dependency(camp CONFIG NO_DEFAULT_PATH PATHS ${camp_DIR} ${camp_DIR}/lib/cmake/camp ${RAJA_PACKAGE_PREFIX_DIR} ${RAJA_PACKAGE_PREFIX_DIR}/lib/cmake/camp) endif () +if (@RAJA_ENABLE_CALIPER@) + if (NOT TARGET caliper) + set(RAJA_CALIPER_DIR "@caliper_DIR@") + if(NOT caliper_DIR) + set(caliper_DIR ${RAJA_CALIPER_DIR}) + endif() + find_dependency(caliper CONFIG NO_DEFAULT_PATH PATHS + ${caliper_DIR} + ${caliper_DIR}/share/cmake/caliper + ${RAJA_PACKAGE_PREFIX_DIR} + ${RAJA_PACKAGE_PREFIX_DIR}/share/cmake/caliper) + endif () +endif () + if (@RAJA_ENABLE_DESUL_ATOMICS@) if (NOT TARGET desul_atomics) set(RAJA_DESUL_DIR "@desul_DIR@") - if(NOT desul_DIR) - set(desul_DIR ${RAJA_DESUL_DIR}) + if(NOT desul_DIR) + set(desul_DIR ${RAJA_DESUL_DIR}) endif() - find_dependency(desul_atomics CONFIG NO_DEFAULT_PATH PATHS + find_dependency(desul_atomics CONFIG NO_DEFAULT_PATH PATHS ${desul_DIR} ${desul_DIR}/lib/cmake/desul ${RAJA_PACKAGE_PREFIX_DIR} diff --git a/src/MemUtils_CUDA.cpp b/src/MemUtils_CUDA.cpp index f327d5b22d..de857b2fe4 100644 --- a/src/MemUtils_CUDA.cpp +++ b/src/MemUtils_CUDA.cpp @@ -44,10 +44,7 @@ namespace detail cudaStatusInfo g_status; //! State of the host code in this thread -cudaStatusInfo tl_status; -#if defined(RAJA_ENABLE_OPENMP) -#pragma omp threadprivate(tl_status) -#endif +thread_local cudaStatusInfo tl_status; //! State of raja cuda stream synchronization for cuda reducer objects std::unordered_map g_stream_info_map; diff --git a/src/MemUtils_HIP.cpp b/src/MemUtils_HIP.cpp index ccb02126dc..d9e72bef05 100644 --- a/src/MemUtils_HIP.cpp +++ b/src/MemUtils_HIP.cpp @@ -44,10 +44,7 @@ namespace detail hipStatusInfo g_status; //! State of the host code in this thread -hipStatusInfo tl_status; -#if defined(RAJA_ENABLE_OPENMP) -#pragma omp threadprivate(tl_status) -#endif +thread_local hipStatusInfo tl_status; //! State of raja hip stream synchronization for hip reducer objects std::unordered_map g_stream_info_map; diff --git a/src/MemUtils_SYCL.cpp b/src/MemUtils_SYCL.cpp index 568e86e528..d7a5628456 100644 --- a/src/MemUtils_SYCL.cpp +++ b/src/MemUtils_SYCL.cpp @@ -42,10 +42,7 @@ namespace detail syclInfo g_status; //! State of the host code in this thread -syclInfo tl_status; -#if defined(RAJA_ENABLE_OPENMP) -#pragma omp threadprivate(tl_status) -#endif +thread_local syclInfo tl_status; //! State of raja sycl queue synchronization for sycl reducer objects std::unordered_map<::sycl::queue, bool> g_queue_info_map { diff --git a/test/install/using-with-cmake/CMakeLists.txt b/test/install/using-with-cmake/CMakeLists.txt index bc07743edf..5c42c01632 100644 --- a/test/install/using-with-cmake/CMakeLists.txt +++ b/test/install/using-with-cmake/CMakeLists.txt @@ -5,22 +5,18 @@ # SPDX-License-Identifier: (BSD-3-Clause) ############################################################################### -if (ENABLE_HIP) - cmake_minimum_required(VERSION 3.23) -else() - cmake_minimum_required(VERSION 3.20) -endif() +cmake_minimum_required(VERSION 3.23) - project(using_with_cmake) +project(using_with_cmake) - if(NOT DEFINED RAJA_DIR OR NOT EXISTS ${RAJA_DIR}/lib/cmake/raja/raja-config.cmake) - message(FATAL_ERROR "Missing required 'RAJA_DIR' variable pointing to an installed RAJA") - endif() +if(NOT DEFINED RAJA_DIR OR NOT EXISTS ${RAJA_DIR}/lib/cmake/raja/raja-config.cmake) + message(FATAL_ERROR "Missing required 'RAJA_DIR' variable pointing to an installed RAJA") +endif() - find_package(RAJA REQUIRED - NO_DEFAULT_PATH - PATHS ${RAJA_DIR}/lib/cmake/raja) +find_package(RAJA REQUIRED + NO_DEFAULT_PATH + PATHS ${RAJA_DIR}/lib/cmake/raja) - add_executable(using-with-cmake using-with-cmake.cpp) - target_link_libraries(using-with-cmake RAJA) +add_executable(using-with-cmake using-with-cmake.cpp) +target_link_libraries(using-with-cmake RAJA) diff --git a/test/install/using-with-cmake/host-config.cmake.in b/test/install/using-with-cmake/host-config.cmake.in index 6a939d69dc..c2e3c42fab 100644 --- a/test/install/using-with-cmake/host-config.cmake.in +++ b/test/install/using-with-cmake/host-config.cmake.in @@ -16,6 +16,17 @@ set(CMAKE_CXX_FLAGS "@CMAKE_CXX_FLAGS@" CACHE STRING "") set(CMAKE_Fortran_FLAGS "@CMAKE_Fortran_FLAGS@" CACHE STRING "") set(CMAKE_EXE_LINKER_FLAGS "@CMAKE_EXE_LINKER_FLAGS@" CACHE STRING "") +# CUDA +set(CMAKE_CUDA_COMPILER "@CMAKE_CUDA_COMPILER@" CACHE PATH "") +set(CMAKE_CUDA_HOST_COMPILER "@CMAKE_CXX_COMPILER@" CACHE PATH "") +set(CUDA_TOOLKIT_ROOT_DIR "@CUDAToolkit_ROOT@" CACHE PATH "") +set(CMAKE_CUDA_ARCHITECTURES "@CMAKE_CUDA_ARCHITECTURES@" CACHE STRING "") + +# HIP +set(ROCM_PATH "@ROCM_PATH@" CACHE PATH "") +set(CMAKE_HIP_COMPILER "@CMAKE_HIP_COMPILER@" CACHE PATH "") +set(CMAKE_HIP_ARCHITECTURES "@CMAKE_HIP_ARCHITECTURES@" CACHE STRING "") + # MPI set(ENABLE_MPI @ENABLE_MPI@ CACHE BOOL "") set(MPI_C_COMPILER "@MPI_C_COMPILER@" CACHE PATH "") diff --git a/tpl/camp b/tpl/camp index 4070ce93a8..a8caefa9f4 160000 --- a/tpl/camp +++ b/tpl/camp @@ -1 +1 @@ -Subproject commit 4070ce93a802849d61037310a87c50cc24c9e498 +Subproject commit a8caefa9f4c811b1a114b4ed2c9b681d40f12325