Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion checkout-versions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
versions:
ramble: 6b4ecac # develop on 5/06/2025 (newer than 0.6.0 release)
spack: a85ec51 # Dec. 1 2025 v1.1.0
spack-packages: d13c881 # Dec. 3 2025
spack-packages: 7a3a7ad # Jan. 5 2026
368 changes: 2 additions & 366 deletions repo/raja-perf/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,379 +3,15 @@
#
# SPDX-License-Identifier: Apache-2.0

import glob
import os
import re
import socket
from os import environ as env
from os.path import join as pjoin

from spack.package import *
from spack_repo.builtin.packages.raja_perf.package import RajaPerf as BuiltinRajaPerf


def spec_uses_toolchain(spec):
gcc_toolchain_regex = re.compile(".*gcc-toolchain.*")
using_toolchain = list(filter(gcc_toolchain_regex.match, spec.compiler_flags["cxxflags"]))
return using_toolchain

def spec_uses_gccname(spec):
gcc_name_regex = re.compile(".*gcc-name.*")
using_gcc_name = list(filter(gcc_name_regex.match, spec.compiler_flags["cxxflags"]))
return using_gcc_name

def hip_repair_options(options, spec):
# there is only one dir like this, but the version component is unknown
options.append(
"-DHIP_CLANG_INCLUDE_PATH="
+ glob.glob("{}/lib/clang/*/include".format(spec["llvm-amdgpu"].prefix))[0]
)

def hip_repair_cache(options, spec):
# there is only one dir like this, but the version component is unknown
options.append(
cmake_cache_path(
"HIP_CLANG_INCLUDE_PATH",
glob.glob("{}/llvm/lib/clang/*/include".format(spec["llvm-amdgpu"].prefix))[0],
)
)

def hip_for_radiuss_projects(options, spec, spec_compiler):
# Here is what is typically needed for radiuss projects when building with rocm
hip_root = spec["hip"].prefix
rocm_root = hip_root + "/.."
options.append(cmake_cache_path("HIP_ROOT_DIR", hip_root))
options.append(cmake_cache_path("ROCM_ROOT_DIR", rocm_root))

hip_repair_cache(options, spec)

archs = spec.variants["amdgpu_target"].value
if archs != "none":
arch_str = ",".join(archs)
options.append(
cmake_cache_string("AMDGPU_TARGETS", "{0}".format(arch_str))
)
options.append(
cmake_cache_string("HIP_HIPCC_FLAGS", "--amdgpu-target={0}".format(arch_str))
)
options.append(
cmake_cache_string("CMAKE_HIP_ARCHITECTURES", arch_str)
)

# adrienbernede-22-11:
# Specific to Umpire, attempt port to RAJA and CHAI
hip_link_flags = ""
if "%gcc" in spec or spec_uses_toolchain(spec):
if "%gcc" in spec:
gcc_bin = os.path.dirname(spec_compiler.cxx)
gcc_prefix = join_path(gcc_bin, "..")
else:
gcc_prefix = spec_uses_toolchain(spec)[0]
options.append(cmake_cache_string("HIP_CLANG_FLAGS", "--gcc-toolchain={0}".format(gcc_prefix)))
options.append(cmake_cache_string("CMAKE_EXE_LINKER_FLAGS", hip_link_flags + " -Wl,-rpath {}/lib64".format(gcc_prefix)))
else:
options.append(cmake_cache_string("CMAKE_EXE_LINKER_FLAGS", "-Wl,-rpath={0}/llvm/lib/".format(rocm_root)))

def cuda_for_radiuss_projects(options, spec):
# Here is what is typically needed for radiuss projects when building with cuda

cuda_flags = []
if not spec.satisfies("cuda_arch=none"):
cuda_arch = spec.variants["cuda_arch"].value
cuda_flags.append("-arch sm_{0}".format(cuda_arch[0]))
options.append(
cmake_cache_string("CUDA_ARCH", "sm_{0}".format(cuda_arch[0])))
options.append(
cmake_cache_string("CMAKE_CUDA_ARCHITECTURES", "{0}".format(cuda_arch[0])))
if spec_uses_toolchain(spec):
cuda_flags.append("-Xcompiler {}".format(spec_uses_toolchain(spec)[0]))
if (spec.satisfies("target=ppc64le %[email protected]:")):
cuda_flags.append("-Xcompiler -mno-float128")
options.append(cmake_cache_string("CMAKE_CUDA_FLAGS", " ".join(cuda_flags)))

def blt_link_helpers(options, spec, spec_compiler):

spec_cxx = getattr(spec_compiler, "cxx", None) or ""
spec_fc = getattr(spec_compiler, "fc", None) or ""

### From local package:
fortran_compilers = ["gfortran", "xlf"]
if any(compiler in spec_fc for compiler in fortran_compilers) and ("clang" in spec_cxx):
# Pass fortran compiler lib as rpath to find missing libstdc++
libdir = os.path.join(os.path.dirname(
os.path.dirname(spec_compiler.fc)), "lib")
flags = ""
for _libpath in [libdir, libdir + "64"]:
if os.path.exists(_libpath):
flags += " -Wl,-rpath,{0}".format(_libpath)
description = ("Adds a missing libstdc++ rpath")
if flags:
options.append(cmake_cache_string("BLT_EXE_LINKER_FLAGS", flags, description))

# Ignore conflicting default gcc toolchain
options.append(cmake_cache_string("BLT_CMAKE_IMPLICIT_LINK_DIRECTORIES_EXCLUDE",
"/usr/tce/packages/gcc/gcc-4.9.3/lib64;/usr/tce/packages/gcc/gcc-4.9.3/gnu/lib64/gcc/powerpc64le-unknown-linux-gnu/4.9.3;/usr/tce/packages/gcc/gcc-4.9.3/gnu/lib64;/usr/tce/packages/gcc/gcc-4.9.3/lib64/gcc/x86_64-unknown-linux-gnu/4.9.3"))

compilers_using_toolchain = ["pgi", "xl", "icpc"]
if any(compiler in spec_cxx for compiler in compilers_using_toolchain):
if spec_uses_toolchain(spec) or spec_uses_gccname(spec):

# Ignore conflicting default gcc toolchain
options.append(cmake_cache_string("BLT_CMAKE_IMPLICIT_LINK_DIRECTORIES_EXCLUDE",
"/usr/tce/packages/gcc/gcc-4.9.3/lib64;/usr/tce/packages/gcc/gcc-4.9.3/gnu/lib64/gcc/powerpc64le-unknown-linux-gnu/4.9.3;/usr/tce/packages/gcc/gcc-4.9.3/gnu/lib64;/usr/tce/packages/gcc/gcc-4.9.3/lib64/gcc/x86_64-unknown-linux-gnu/4.9.3"))

class RajaPerf(CachedCMakePackage, CudaPackage, ROCmPackage):
class RajaPerf(BuiltinRajaPerf, CachedCMakePackage, CudaPackage, ROCmPackage):
"""RAJA Performance Suite."""

homepage = "http://software.llnl.gov/RAJAPerf/"
git = "https://github.com/LLNL/RAJAPerf.git"

version("develop", branch="develop", submodules="True")
version("main", branch="main", submodules="True")
version("2025.03.0", tag="v2025.03.0", submodules="True")
version("2024.07.0", tag="v2024.07.0", submodules="True")
version("2022.10.0", tag="v2022.10.0", submodules="True")
version("0.12.0", tag="v0.12.0", submodules="True")
version("0.11.0", tag="v0.11.0", submodules="True")
version("0.10.0", tag="v0.10.0", submodules="True")
version("0.9.0", tag="v0.9.0", submodules="True")
version("0.8.0", tag="v0.8.0", submodules="True")
version("0.7.0", tag="v0.7.0", submodules="True")
version("0.6.0", tag="v0.6.0", submodules="True")
version("0.5.2", tag="v0.5.2", submodules="True")
version("0.5.1", tag="v0.5.1", submodules="True")
version("0.5.0", tag="v0.5.0", submodules="True")
version("0.4.0", tag="v0.4.0", submodules="True")

variant("mpi", default=True, description="Enable MPI support")
variant("openmp", default=True, description="Build OpenMP backend")
variant("openmp_target", default=False, description="Build with OpenMP target support")
variant("shared", default=False, description="Build Shared Libs")
variant("libcpp", default=False, description="Uses libc++ instead of libstdc++")
variant("tests", default="basic", values=("none", "basic", "benchmarks"),
multi=False, description="Tests to run")
variant("caliper",default=False, description="Build with support for Caliper based profiling")
variant("kokkos", default=False, description="Include Kokkos implementations of the kernels in RAJAPerf")

depends_on("c", type="build")
depends_on("cxx", type="build")
depends_on("fortran", type="build")

depends_on("blt")
depends_on("[email protected]:", type="build", when="@2025.03.0:")
depends_on("[email protected]:", type="build", when="@2022.10.0:")
depends_on("[email protected]:", type="build", when="@0.12.0:")
depends_on("[email protected]:", type="build", when="@0.11.0:")
depends_on("[email protected]:", type="build", when="@0.8.0:")
depends_on("[email protected]:", type="build", when="@:0.7.0")

depends_on("[email protected]:", when="@0.12.0:", type="build")
depends_on("[email protected]:", when="@0.12.0: +rocm", type="build")
depends_on("[email protected]:", when="@0.12.0: +cuda")
depends_on("[email protected]:", when="@:0.12.0", type="build")

depends_on("llvm-openmp", when="+openmp %apple-clang")

depends_on("rocprim", when="+rocm")

conflicts("~openmp", when="+openmp_target", msg="OpenMP target requires OpenMP")
conflicts("+cuda", when="+openmp_target", msg="Cuda may not be activated when openmp_target is ON")

depends_on("caliper", when="+caliper")
depends_on("[email protected]:", when="+caliper")

depends_on("mpi", when="+mpi")

def _get_sys_type(self, spec):
sys_type = str(spec.architecture)
# if on llnl systems, we can use the SYS_TYPE
if "SYS_TYPE" in env:
sys_type = env["SYS_TYPE"]
return sys_type

@property
# TODO: name cache file conditionally to cuda and libcpp variants
def cache_name(self):
hostname = socket.gethostname()
if "SYS_TYPE" in env:
hostname = hostname.rstrip("1234567890")
var=""
if "+cuda" in self.spec:
var= "-".join([var,"cuda"])
if "+libcpp" in self.spec:
var="-".join([var,"libcpp"])

return "{0}-{1}{2}-{3}@{4}-{5}.cmake".format(
hostname,
self._get_sys_type(self.spec),
var,
self.spec.compiler.name,
self.spec.compiler.version,
self.spec.dag_hash(8)
)

def initconfig_compiler_entries(self):
spec = self.spec
compiler = self.compiler
# Default entries are already defined in CachedCMakePackage, inherit them:
entries = super(RajaPerf, self).initconfig_compiler_entries()

# Switch to hip as a CPP compiler.
# adrienbernede-22-11:
# This was only done in upstream Spack raja package.
# I could not find the equivalent logic in Spack source, so keeping it.
#if "+rocm" in spec:
# entries.insert(0, cmake_cache_path("CMAKE_CXX_COMPILER", spec["hip"].hipcc))

# Override CachedCMakePackage CMAKE_C_FLAGS and CMAKE_CXX_FLAGS add
# +libcpp specific flags
flags = spec.compiler_flags

# use global spack compiler flags
cppflags = " ".join(flags["cppflags"])
if cppflags:
# avoid always ending up with " " with no flags defined
cppflags += " "

cflags = cppflags + " ".join(flags["cflags"])
if "+libcpp" in spec:
cflags += " ".join([cflags,"-DGTEST_HAS_CXXABI_H_=0"])
if cflags:
entries.append(cmake_cache_string("CMAKE_C_FLAGS", cflags))

cxxflags = cppflags + " ".join(flags["cxxflags"])
if "+libcpp" in spec:
cxxflags += " ".join([cxxflags,"-stdlib=libc++ -DGTEST_HAS_CXXABI_H_=0"])
if cxxflags:
entries.append(cmake_cache_string("CMAKE_CXX_FLAGS", cxxflags))

blt_link_helpers(entries, spec, compiler)

# adrienbernede-23-01
# Maybe we want to share this in the above blt_link_helpers function.
compilers_using_cxx14 = ["intel-17", "intel-18", "xl"]
cxx_name = getattr(self.compiler, "cxx", None) or ""
if any(compiler in cxx_name for compiler in compilers_using_cxx14):
entries.append(cmake_cache_string("BLT_CXX_STD", "c++14"))

return entries

def initconfig_hardware_entries(self):
spec = self.spec
compiler = self.compiler
entries = super(RajaPerf, self).initconfig_hardware_entries()

entries.append(cmake_cache_option("ENABLE_OPENMP", "+openmp" in spec))

entries.append(cmake_cache_option("ENABLE_MPI", "+mpi" in spec))
if "+mpi" in spec:
entries.append(cmake_cache_string("MPI_CXX_COMPILER", spec["mpi"].mpicxx))

# T benefit from the shared function "cuda_for_radiuss_projects",
# we do not modify CMAKE_CUDA_FLAGS: it is already appended by the
# shared function.
if "+cuda" in spec:
entries.append(cmake_cache_option("ENABLE_CUDA", True))
# Shared handling of cuda.
cuda_for_radiuss_projects(entries, spec)

# Custom options. We place everything in CMAKE_CUDA_FLAGS_(RELEASE|RELWITHDEBINFO|DEBUG) which are not set by cuda_for_radiuss_projects
if ("xl" in self.compiler.cxx):
all_targets_flags = "-Xcompiler -qstrict -Xcompiler -qxlcompatmacros -Xcompiler -qalias=noansi" \
+ "-Xcompiler -qsmp=omp -Xcompiler -qhot -Xcompiler -qnoeh" \
+ "-Xcompiler -qsuppress=1500-029 -Xcompiler -qsuppress=1500-036" \
+ "-Xcompiler -qsuppress=1500-030" \

cuda_release_flags = "-O3 -Xcompiler -O2 " + all_targets_flags
cuda_reldebinf_flags = "-O3 -g -Xcompiler -O2 " + all_targets_flags
cuda_debug_flags = "-O0 -g -Xcompiler -O2 " + all_targets_flags

elif ("gcc" in self.compiler.cxx):
all_targets_flags = "-Xcompiler -finline-functions -Xcompiler -finline-limit=20000"

cuda_release_flags = "-O3 -Xcompiler -Ofast " + all_targets_flags
cuda_reldebinf_flags = "-O3 -g -Xcompiler -Ofast " + all_targets_flags
cuda_debug_flags = "-O0 -g -Xcompiler -O0 " + all_targets_flags

else:
all_targets_flags = "-Xcompiler -finline-functions"

cuda_release_flags = "-O3 -Xcompiler -Ofast " + all_targets_flags
cuda_reldebinf_flags = "-O3 -g -Xcompiler -Ofast " + all_targets_flags
cuda_debug_flags = "-O0 -g -Xcompiler -O0 " + all_targets_flags

entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS_RELEASE", cuda_release_flags))
entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS_RELWITHDEBINFO", cuda_reldebinf_flags))
entries.append(cmake_cache_string("CMAKE_CUDA_FLAGS_DEBUG", cuda_debug_flags))

else:
entries.append(cmake_cache_option("ENABLE_CUDA", False))

if "+rocm" in spec:
entries.append(cmake_cache_option("ENABLE_HIP", True))
hip_for_radiuss_projects(entries, spec, compiler)
else:
entries.append(cmake_cache_option("ENABLE_HIP", False))

if "+cuda" in spec or "+rocm" in spec:
entries.append(cmake_cache_string("RAJA_PERFSUITE_GPU_BLOCKSIZES", "64,128,256,512,1024"))

entries.append(cmake_cache_option("ENABLE_OPENMP_TARGET", "+openmp_target" in spec))
if "+openmp_target" in spec:
if ("%xl" in spec):
entries.append(cmake_cache_string("BLT_OPENMP_COMPILE_FLAGS", "-qoffload;-qsmp=omp;-qnoeh;-qalias=noansi"))
entries.append(cmake_cache_string("BLT_OPENMP_LINK_FLAGS", "-qoffload;-qsmp=omp;-qnoeh;-qalias=noansi"))
if ("%clang" in spec):
entries.append(cmake_cache_string("BLT_OPENMP_COMPILE_FLAGS", "-fopenmp;-fopenmp-targets=nvptx64-nvidia-cuda"))
entries.append(cmake_cache_string("BLT_OPENMP_LINK_FLAGS", "-fopenmp;-fopenmp-targets=nvptx64-nvidia-cuda"))

return entries

def initconfig_package_entries(self):
spec = self.spec
entries = []

option_prefix = "RAJA_" if spec.satisfies("@0.14.0:") else ""

# TPL locations
entries.append("#------------------{0}".format("-" * 60))
entries.append("# TPLs")
entries.append("#------------------{0}\n".format("-" * 60))

entries.append(cmake_cache_path("BLT_SOURCE_DIR", spec["blt"].prefix))

# Build options
entries.append("#------------------{0}".format("-" * 60))
entries.append("# Build Options")
entries.append("#------------------{0}\n".format("-" * 60))

entries.append(cmake_cache_string(
"CMAKE_BUILD_TYPE", spec.variants["build_type"].value))

entries.append(cmake_cache_string("RAJA_RANGE_ALIGN", "4"))
entries.append(cmake_cache_string("RAJA_RANGE_MIN_LENGTH", "32"))
entries.append(cmake_cache_string("RAJA_DATA_ALIGN", "64"))

entries.append(cmake_cache_option("RAJA_HOST_CONFIG_LOADED", True))

entries.append(cmake_cache_option("BUILD_SHARED_LIBS","+shared" in spec))
entries.append(cmake_cache_option("ENABLE_OPENMP","+openmp" in spec))

entries.append(cmake_cache_option("ENABLE_BENCHMARKS", "tests=benchmarks" in spec))
entries.append(cmake_cache_option("ENABLE_TESTS", not "tests=none" in spec or self.run_tests))

entries.append(cmake_cache_option("RAJA_PERFSUITE_USE_CALIPER","+caliper" in spec))
if "+caliper" in self.spec:
entries.append(cmake_cache_path("caliper_DIR", spec["caliper"].prefix+"/share/cmake/caliper/"))
entries.append(cmake_cache_path("adiak_DIR", spec["adiak"].prefix+"/lib/cmake/adiak/"))

entries.append(cmake_cache_option("ENABLE_KOKKOS", "+kokkos" in spec))

return entries

def cmake_args(self):
options = [f"-DMPI_CXX_LINK_FLAGS='{self.spec['mpi'].libs.ld_flags}'"]
return options

def setup_build_environment(self, env):
super().setup_build_environment(env)
if "+cuda" in self.spec:
Expand Down
Loading