diff --git a/experiments/py-scaffold/experiment.py b/experiments/py-scaffold/experiment.py new file mode 100644 index 000000000..11b7f234b --- /dev/null +++ b/experiments/py-scaffold/experiment.py @@ -0,0 +1,110 @@ +# Copyright 2024 Lawrence Livermore National Security, LLC and other +# Benchpark Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: Apache-2.0 + +from benchpark.error import BenchparkError +from benchpark.directives import variant, maintainers +from benchpark.experiment import Experiment +from benchpark.cuda import CudaExperiment +from benchpark.rocm import ROCmExperiment +from benchpark.scaling import ScalingMode, Scaling +from benchpark.caliper import Caliper + + +class PyScaffold( + Experiment, + CudaExperiment, + ROCmExperiment, + Scaling(ScalingMode.Strong, ScalingMode.Weak), + Caliper, +): + + maintainers("michaelmckinsey1") + + variant( + "workload", + default="sweep", + values=("sweep",), + ) + + variant( + "scaffold_path", + default=" ", + description="Path to local repository of ScaFFold (i.e. git clone), since it is private.", + ) + + variant( + "distconv_path", + default=" ", + description="Path to private distconv repository (required package)", + ) + + variant("version", default="main", values=("main", "sharedmem", "procruns"), description="app version") + + def compute_applications_section(self): + self.add_experiment_variable( + "package_path", self.spec.variants["scaffold_path"][0], False + ) + + if self.spec.satisfies("+strong"): + n_gpus = 4 + problem_scale = 6 + elif self.spec.satisfies("+weak"): + n_gpus = 1 + problem_scale = 5 + else: + n_gpus = 1 + problem_scale = 6 + + self.add_experiment_variable("n_gpus", n_gpus, True) + self.add_experiment_variable("problem_scale", problem_scale, True) + + self.register_scaling_config( + { + ScalingMode.Strong: { + "n_gpus": lambda var, itr, dim, scaling_factor: var.val(dim) + * scaling_factor, + "problem_scale": lambda var, itr, dim, scaling_factor: var.val(dim), + }, + ScalingMode.Weak: { + "n_gpus": lambda var, itr, dim, scaling_factor: var.val(dim) + * 2**3, + "problem_scale": lambda var, itr, dim, scaling_factor: var.val(dim) + + 1, + } + } + ) + + self.set_required_variables( + n_resources="{n_gpus}", + process_problem_size="({problem_scale}-4)/({n_gpus}/({problem_scale}-4)**3)", + total_problem_size="{problem_scale}", + ) + + def compute_package_section(self): + # Spec that will be written into requirements.txt for pip install + sys_name = self.system_spec._name + if self.spec.satisfies("+rocm"): + if "llnl" in sys_name: + # site-specific wheel for rocm + model = "rocmwci" + else: + model = "rocm" + elif self.spec.satisfies("+cuda"): + model = "cuda" + self.add_package_spec( + self.name, + [f"py-scaffold@{self.spec.variants['version'][0]}"], + package_manager="spack", + ) + if self.spec.variants["distconv_path"][0] == " ": + raise ValueError("Must set distconv_path variant to valid repository path") + self.add_package_spec( + self.name, + [ + # extra index for torch wheel and pypi index for packages that won't be found on WCI + f"--extra-index-url https://download.pytorch.org/whl/\n--extra-index-url https://pypi.org/simple\n{self.spec.variants['scaffold_path'][0]}[{model}]\n{self.spec.variants['distconv_path'][0]}", + ], + package_manager="pip", + ) diff --git a/lib/benchpark/caliper.py b/lib/benchpark/caliper.py index 403a3f70d..fb5416c63 100644 --- a/lib/benchpark/caliper.py +++ b/lib/benchpark/caliper.py @@ -28,6 +28,16 @@ class Caliper: description="caliper mode", ) + variant( + "cali_version", + default="master", + values=( + "master", + "2.13.1", + ), + description="version", + ) + class Helper(ExperimentHelper): def compute_modifiers_section(self): modifier_list = [] @@ -44,7 +54,7 @@ def compute_modifiers_section(self): def compute_package_section(self): # set package versions - caliper_version = "master" + caliper_version = self.spec.variants["cali_version"][0] # get system config options # TODO: Get compiler/mpi/package handles directly from system.py @@ -60,12 +70,12 @@ def compute_package_section(self): if not self.spec.satisfies("caliper=none"): package_specs["caliper"] = { - "pkg_spec": f"caliper@{caliper_version}+adiak+mpi~libunwind~libdw", + "spack_pkg_spec": f"caliper@{caliper_version}+adiak+mpi~libunwind~libdw", } if any("topdown" in var for var in self.spec.variants["caliper"]): papi_support = True # check if target system supports papi if papi_support: - package_specs["caliper"]["pkg_spec"] += "+papi" + package_specs["caliper"]["spack_pkg_spec"] += "+papi" else: raise NotImplementedError( "Target system does not support the papi interface" @@ -76,7 +86,7 @@ def compute_package_section(self): ) # check if target system supports cuda if cuda_support: package_specs["caliper"][ - "pkg_spec" + "spack_pkg_spec" ] += "~papi+cuda cuda_arch={}".format(system_specs["cuda_arch"]) else: raise NotImplementedError( @@ -88,7 +98,7 @@ def compute_package_section(self): ) # check if target system supports rocm if rocm_support: package_specs["caliper"][ - "pkg_spec" + "spack_pkg_spec" ] += "~papi+rocm amdgpu_target={}".format( system_specs["rocm_arch"] ) @@ -99,7 +109,7 @@ def compute_package_section(self): elif self.spec.satisfies("caliper=time") or self.spec.satisfies( "caliper=mpi" ): - package_specs["caliper"]["pkg_spec"] += "~papi" + package_specs["caliper"]["spack_pkg_spec"] += "~papi" return { "packages": {k: v for k, v in package_specs.items() if v}, diff --git a/lib/benchpark/cmd/setup.py b/lib/benchpark/cmd/setup.py index 4c364e826..dcf362e5d 100644 --- a/lib/benchpark/cmd/setup.py +++ b/lib/benchpark/cmd/setup.py @@ -204,7 +204,7 @@ def include_fn(fname): ) pkg_str = "" - if pkg_manager == "spack": + if "spack" in pkg_manager: spack_build_stage = experiments_root / "builds" spack_user_cache_path = experiments_root / "spack-cache" spack, first_time_spack = per_workspace_setup.spack_first_time_setup() diff --git a/lib/benchpark/experiment.py b/lib/benchpark/experiment.py index ab2d299a7..5f23a48f2 100644 --- a/lib/benchpark/experiment.py +++ b/lib/benchpark/experiment.py @@ -129,14 +129,14 @@ def compute_package_section(self): if not self.spec.satisfies("affinity=none"): package_specs["affinity"] = { - "pkg_spec": f"affinity@{affinity_version}+mpi", + "spack_pkg_spec": f"affinity@{affinity_version}+mpi", "compiler": system_specs["compiler"], } if self.spec.satisfies("+cuda"): - package_specs["affinity"]["pkg_spec"] += "+cuda" + package_specs["affinity"]["spack_pkg_spec"] += "+cuda" elif self.spec.satisfies("+rocm"): package_specs["affinity"][ - "pkg_spec" + "spack_pkg_spec" ] += "+rocm amdgpu_target={rocm_arch}" return { @@ -201,7 +201,7 @@ class Experiment(ExperimentSystemBase, ExecMode, Affinity, Hwloc): variant( "package_manager", default="spack", - values=("spack", "environment-modules", "user-managed"), + values=("spack", "environment-modules", "user-managed", "pip", "spack-pip"), description="package manager to use", ) @@ -360,7 +360,7 @@ def compute_config_section(self): "system": system_dict, "spec": str(self.spec), } - if self.spec.variants["package_manager"][0] == "spack": + if "spack" in self.spec.variants["package_manager"][0]: default_config["spack_flags"] = { "install": "--add --keep-stage", "concretize": "-U -f", @@ -485,11 +485,16 @@ def compute_applications_section_wrapper(self): } } - def add_package_spec(self, package_name, spec=None): + def add_package_spec(self, package_name, spec=None, package_manager="spack"): if spec: - self.package_specs[package_name] = { - "pkg_spec": spec[0], - } + if package_name not in self.package_specs: + self.package_specs[package_name] = { + f"{package_manager}_pkg_spec": spec[0], + } + else: + self.package_specs[package_name][f"{package_manager}_pkg_spec"] = spec[ + 0 + ] else: self.package_specs[package_name] = {} @@ -517,14 +522,14 @@ def compute_package_section_wrapper(self): f"Package section must be defined for application package {self.name}" ) - if pkg_manager == "spack": + if "spack" in pkg_manager: spack_variants = list( filter( lambda v: v is not None, (cls.get_spack_variants() for cls in self.helpers), ) ) - self.package_specs[self.name]["pkg_spec"] += " ".join( + self.package_specs[self.name]["spack_pkg_spec"] += " ".join( spack_variants ).strip() diff --git a/lib/benchpark/test/caliper.py b/lib/benchpark/test/caliper.py index 7aca10d05..0636c73f3 100644 --- a/lib/benchpark/test/caliper.py +++ b/lib/benchpark/test/caliper.py @@ -39,6 +39,7 @@ def test_experiment_compute_variables_section_caliper(monkeypatch): "n_threads_per_proc": "{n_threads_per_proc}", "benchpark_spec": ["~cuda+mpi~openmp~rocm"], "append_path": "'", + "cali_version": "master", "caliper": "time", "exec_mode": "test", "package_manager": "spack", @@ -119,6 +120,7 @@ def test_caliper_modifier(monkeypatch): "benchpark_spec": "['~cuda+mpi~openmp~rocm']", "affinity": "none", "append_path": "'", + "cali_version": "master", "caliper": "time", "exec_mode": "test", "hwloc": "none", diff --git a/modifiers/caliper/modifier.py b/modifiers/caliper/modifier.py index 5f7a47d27..b98f0fa6f 100644 --- a/modifiers/caliper/modifier.py +++ b/modifiers/caliper/modifier.py @@ -89,7 +89,7 @@ def modify_experiment(self, app): add_mode( mode_name="rocm", - mode_option="profile.hip,rocm.gputime", + mode_option="rocm.gputime", description="Profile HIP API functions, time spent on GPU", ) @@ -156,6 +156,6 @@ def _build_metadata(self, workspace, app_inst): with open(cali_metadata_file, "w") as f: f.write(json.dumps(cali_metadata)) - software_spec("caliper", pkg_spec="caliper") - - required_package("caliper") + with when("package_manager_family=spack"): + software_spec("caliper", pkg_spec="caliper", package_manager="spack") + required_package("caliper") diff --git a/repo/caliper/for_aarch64.patch b/repo/caliper/for_aarch64.patch new file mode 100644 index 000000000..d3fed9a09 --- /dev/null +++ b/repo/caliper/for_aarch64.patch @@ -0,0 +1,11 @@ +--- spack-src/src/services/callpath/Callpath.cpp.bak 2020-10-28 14:38:19.668122844 +0900 ++++ spack-src/src/services/callpath/Callpath.cpp 2020-10-28 15:03:12.258061188 +0900 +@@ -63,7 +63,7 @@ + unw_context_t unw_ctx; + unw_cursor_t unw_cursor; + +- unw_getcontext(&unw_ctx); ++ unw_getcontext(unw_ctx); + + if (unw_init_local(&unw_cursor, &unw_ctx) < 0) { + Log(0).stream() << "callpath: unable to init libunwind cursor" << endl; diff --git a/repo/caliper/package.py b/repo/caliper/package.py new file mode 100644 index 000000000..9c68ef706 --- /dev/null +++ b/repo/caliper/package.py @@ -0,0 +1,312 @@ +# Copyright 2023 Lawrence Livermore National Security, LLC and other +# Benchpark Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: Apache-2.0 + +import os +import sys + +from spack.package import * + + +class Caliper(CachedCMakePackage, CudaPackage, ROCmPackage): + """Caliper is a program instrumentation and performance measurement + framework. It is designed as a performance analysis toolbox in a + library, allowing one to bake performance analysis capabilities + directly into applications and activate them at runtime. + """ + + homepage = "https://github.com/LLNL/Caliper" + git = "https://github.com/LLNL/Caliper.git" + url = "https://github.com/LLNL/Caliper/archive/v2.12.1.tar.gz" + tags = ["e4s", "radiuss"] + + maintainers("daboehme", "adrienbernede") + + test_requires_compiler = True + + license("BSD-3-Clause") + + version("master", branch="master") + version("2.13.1", tag="v2.13.1") + version("2.12.1", sha256="2b5a8f98382c94dc75cc3f4517c758eaf9a3f9cea0a8dbdc7b38506060d6955c") + version("2.11.0", sha256="b86b733cbb73495d5f3fe06e6a9885ec77365c8aa9195e7654581180adc2217c") + version("2.10.0", sha256="14c4fb5edd5e67808d581523b4f8f05ace8549698c0e90d84b53171a77f58565") + version("2.9.1", sha256="4771d630de505eff9227e0ec498d0da33ae6f9c34df23cb201b56181b8759e9e") + version("2.9.0", sha256="507ea74be64a2dfd111b292c24c4f55f459257528ba51a5242313fa50978371f") + version( + "2.8.0", + sha256="17807b364b5ac4b05997ead41bd173e773f9a26ff573ff2fe61e0e70eab496e4", + deprecated=True, + ) + version( + "2.7.0", + sha256="b3bf290ec2692284c6b4f54cc0c507b5700c536571d3e1a66e56626618024b2b", + deprecated=True, + ) + version( + "2.6.0", + sha256="6efcd3e4845cc9a6169e0d934840766b12182c6d09aa3ceca4ae776e23b6360f", + deprecated=True, + ) + version( + "2.5.0", + sha256="d553e60697d61c53de369b9ca464eb30710bda90fba9671201543b64eeac943c", + deprecated=True, + ) + version( + "2.4.0", tag="v2.4.0", commit="30577b4b8beae104b2b35ed487fec52590a99b3d", deprecated=True + ) + version( + "2.3.0", tag="v2.3.0", commit="9fd89bb0120750d1f9dfe37bd963e24e478a2a20", deprecated=True + ) + version( + "2.2.0", tag="v2.2.0", commit="c408e9b3642c7aa80eff37b0826d819c57e7bc04", deprecated=True + ) + version( + "2.1.1", tag="v2.1.1", commit="0593b0e01c1d8d3e50c990399cc0fee403485599", deprecated=True + ) + version( + "2.0.1", tag="v2.0.1", commit="4d7ff46381c53a461e62edd949e2d9dea9db7b08", deprecated=True + ) + version( + "1.9.1", tag="v1.9.1", commit="cfc1defbbee20b50dd3e3477badd09a92b1df970", deprecated=True + ) + version( + "1.9.0", tag="v1.9.0", commit="8356e747349b285aa621c5b74e71559f0babc4a1", deprecated=True + ) + version( + "1.8.0", tag="v1.8.0", commit="117c1ef596b617dc71407b8b67eebef094a654f8", deprecated=True + ) + version( + "1.7.0", tag="v1.7.0", commit="898277c93d884d4e7ca1ffcf3bbea81d22364f26", deprecated=True + ) + + #depends_on("c", type="build") # generated + #depends_on("cxx", type="build") # generated + #depends_on("fortran", type="build") # generated + + is_linux = sys.platform.startswith("linux") + variant("shared", default=True, description="Build shared libraries") + variant("adiak", default=True, description="Enable Adiak support") + variant("mpi", default=True, description="Enable MPI support") + # libunwind has some issues on Mac + variant( + "libunwind", default=sys.platform != "darwin", description="Enable stack unwind support" + ) + variant("libdw", default=is_linux, description="Enable DWARF symbol lookup") + # pthread_self() signature is incompatible with PAPI_thread_init() on Mac + variant("papi", default=sys.platform != "darwin", description="Enable PAPI service") + variant("libpfm", default=False, description="Enable libpfm (perf_events) service") + # Gotcha is Linux-only + variant("gotcha", default=is_linux, description="Enable GOTCHA support") + variant("sampler", default=is_linux, description="Enable sampling support on Linux") + variant("sosflow", default=False, description="Enable SOSflow support") + variant("fortran", default=False, description="Enable Fortran support") + variant("variorum", default=False, description="Enable Variorum support") + variant("vtune", default=False, description="Enable Intel Vtune support") + variant("kokkos", default=True, when="@2.3.0:", description="Enable Kokkos profiling support") + variant("tests", default=False, description="Enable tests") + variant("tools", default=True, description="Enable tools") + variant("python", default=False, when="@v2.12:", description="Build Python bindings") + + depends_on("c", type="build") + depends_on("cxx", type="build") + depends_on("fortran", type="build") + + depends_on("adiak@0.1:0", when="@2.2:2.10 +adiak~python") + depends_on("adiak@0.4:0", when="@2.11: +adiak~python") + + depends_on("papi@5.3:5", when="@:2.2 +papi") + depends_on("papi@5.3:", when="@2.3: +papi") + + depends_on("libpfm4@4.8:4", when="+libpfm") + + depends_on("mpi", when="+mpi") + depends_on("unwind@1.2:1", when="+libunwind") + depends_on("elfutils", when="+libdw") + depends_on("variorum", when="+variorum") + depends_on("intel-oneapi-vtune", when="+vtune") + + depends_on("sosflow@spack", when="@1.0:1+sosflow") + + depends_on("cmake", type="build") + depends_on("python", type="build") + + depends_on("python@3", when="+python", type=("build", "link", "run")) + depends_on("adiak+python", when="+python", type=("build", "link", "run")) + depends_on("py-pybind11@3.0.0:", when="+python", type=("build", "link", "run")) + + # sosflow support not yet in 2.0 + conflicts("+sosflow", "@2:") + conflicts("+adiak", "@:2.1") + conflicts("+libdw", "@:2.4") + conflicts("+rocm", "@:2.7") + conflicts("+rocm+cuda") + + patch("for_aarch64.patch", when="@:2.11 target=aarch64:") + patch( + "sampler-service-missing-libunwind-include-dir.patch", + when="@2.9.0:2.9.1 +libunwind +sampler", + ) + + def _get_sys_type(self, spec): + sys_type = spec.architecture + if "SYS_TYPE" in env: + sys_type = env["SYS_TYPE"] + return sys_type + + def initconfig_compiler_entries(self): + spec = self.spec + entries = super().initconfig_compiler_entries() + + if spec.satisfies("+rocm"): + entries.insert(0, cmake_cache_path("CMAKE_CXX_COMPILER", spec["hip"].hipcc)) + + entries.append(cmake_cache_option("WITH_FORTRAN", spec.satisfies("+fortran"))) + + entries.append(cmake_cache_option("BUILD_SHARED_LIBS", spec.satisfies("+shared"))) + entries.append(cmake_cache_option("BUILD_TESTING", spec.satisfies("+tests"))) + entries.append(cmake_cache_option("WITH_TOOLS", spec.satisfies("+tools"))) + entries.append(cmake_cache_option("BUILD_DOCS", False)) + entries.append(cmake_cache_path("PYTHON_EXECUTABLE", spec["python"].command.path)) + + return entries + + def initconfig_hardware_entries(self): + spec = self.spec + entries = super().initconfig_hardware_entries() + + if spec.satisfies("+cuda"): + entries.append(cmake_cache_option("WITH_CUPTI", True)) + entries.append(cmake_cache_option("WITH_NVTX", True)) + entries.append(cmake_cache_path("CUDA_TOOLKIT_ROOT_DIR", spec["cuda"].prefix)) + entries.append(cmake_cache_path("CUPTI_PREFIX", spec["cuda"].prefix)) + else: + entries.append(cmake_cache_option("WITH_CUPTI", False)) + entries.append(cmake_cache_option("WITH_NVTX", False)) + + if spec.satisfies("+rocm"): + entries.append(cmake_cache_option("WITH_ROCTRACER", True)) + entries.append(cmake_cache_option("WITH_ROCTX", True)) + else: + entries.append(cmake_cache_option("WITH_ROCTRACER", False)) + entries.append(cmake_cache_option("WITH_ROCTX", False)) + + return entries + + def initconfig_mpi_entries(self): + spec = self.spec + entries = super().initconfig_mpi_entries() + + entries.append(cmake_cache_option("WITH_MPI", spec.satisfies("+mpi"))) + + return entries + + def initconfig_package_entries(self): + spec = self.spec + entries = [] + + # TPL locations + entries.append("#------------------{0}".format("-" * 60)) + entries.append("# TPLs") + entries.append("#------------------{0}\n".format("-" * 60)) + + if spec.satisfies("+adiak"): + entries.append(cmake_cache_path("adiak_DIR", spec["adiak"].prefix)) + if spec.satisfies("+papi"): + entries.append(cmake_cache_path("PAPI_PREFIX", spec["papi"].prefix)) + if spec.satisfies("+libdw"): + entries.append(cmake_cache_path("LIBDW_PREFIX", spec["elfutils"].prefix)) + if spec.satisfies("+libpfm"): + entries.append(cmake_cache_path("LIBPFM_INSTALL", spec["libpfm4"].prefix)) + if spec.satisfies("+sosflow"): + entries.append(cmake_cache_path("SOS_PREFIX", spec["sosflow"].prefix)) + if spec.satisfies("+variorum"): + entries.append(cmake_cache_path("VARIORUM_PREFIX", spec["variorum"].prefix)) + if spec.satisfies("+vtune"): + itt_dir = join_path(spec["intel-oneapi-vtune"].prefix, "vtune", "latest") + entries.append(cmake_cache_path("ITT_PREFIX", itt_dir)) + if spec.satisfies("+libunwind"): + entries.append(cmake_cache_path("LIBUNWIND_PREFIX", spec["unwind"].prefix)) + + # Build options + entries.append("#------------------{0}".format("-" * 60)) + entries.append("# Build Options") + entries.append("#------------------{0}\n".format("-" * 60)) + + entries.append(cmake_cache_option("WITH_ADIAK", spec.satisfies("+adiak"))) + entries.append(cmake_cache_option("WITH_GOTCHA", spec.satisfies("+gotcha"))) + entries.append(cmake_cache_option("WITH_SAMPLER", spec.satisfies("+sampler"))) + entries.append(cmake_cache_option("WITH_PAPI", spec.satisfies("+papi"))) + entries.append(cmake_cache_option("WITH_LIBDW", spec.satisfies("+libdw"))) + entries.append(cmake_cache_option("WITH_LIBPFM", spec.satisfies("+libpfm"))) + entries.append(cmake_cache_option("WITH_SOSFLOW", spec.satisfies("+sosflow"))) + entries.append(cmake_cache_option("WITH_KOKKOS", spec.satisfies("+kokkos"))) + entries.append(cmake_cache_option("WITH_VARIORUM", spec.satisfies("+variorum"))) + entries.append(cmake_cache_option("WITH_VTUNE", spec.satisfies("+vtune"))) + entries.append(cmake_cache_option("WITH_PYTHON_BINDINGS", spec.satisfies("+python"))) + + # -DWITH_CALLPATH was renamed -DWITH_LIBUNWIND in 2.5 + callpath_flag = "LIBUNWIND" if spec.satisfies("@2.5:") else "CALLPATH" + entries.append(cmake_cache_option("WITH_%s" % callpath_flag, spec.satisfies("+libunwind"))) + + return entries + + def cmake_args(self): + args = [] + + args.append(self.define("CMAKE_EXE_LINKER_FLAGS", self.spec['mpi'].libs.ld_flags)) + args.append(self.define("MPI_CXX_LINK_FLAGS", self.spec['mpi'].libs.ld_flags)) + + if self.spec.satisfies("+python"): + args.append(f"-Dpybind11_DIR={os.path.join(self.spec['py-pybind11'].prefix, 'pybind11', 'share', 'cmake', 'pybind11')}") + + return args + + def setup_build_environment(self, env): + super().setup_build_environment(env) + + if "+mpi" in self.spec: + if self.spec["mpi"].extra_attributes and "ldflags" in self.spec["mpi"].extra_attributes: + env.append_flags("LDFLAGS", self.spec["mpi"].extra_attributes["ldflags"]) + + def setup_run_environment(self, env): + if self.spec.satisfies("+python"): + env.prepend_path("PYTHONPATH", self.spec.prefix.join(python_platlib)) + env.prepend_path("PYTHONPATH", self.spec.prefix.join(python_purelib)) + + @run_after("install") + def cache_test_sources(self): + """Copy the example source files after the package is installed to an + install test subdirectory for use during `spack test run`.""" + cache_extra_test_sources(self, [join_path("examples", "apps")]) + + def test_cxx_example(self): + """build and run cxx-example""" + + exe = "cxx-example" + source_file = "{0}.cpp".format(exe) + + source_path = find_required_file( + self.test_suite.current_test_cache_dir, source_file, expected=1, recursive=True + ) + + lib_dir = self.prefix.lib if os.path.exists(self.prefix.lib) else self.prefix.lib64 + + cxx = which(os.environ["CXX"]) + test_dir = os.path.dirname(source_path) + with working_dir(test_dir): + cxx( + "-L{0}".format(lib_dir), + "-I{0}".format(self.prefix.include), + source_path, + "-o", + exe, + "-std=c++11", + "-lcaliper", + "-lstdc++", + ) + + cxx_example = which(exe) + cxx_example() diff --git a/repo/caliper/sampler-service-missing-libunwind-include-dir.patch b/repo/caliper/sampler-service-missing-libunwind-include-dir.patch new file mode 100644 index 000000000..4acd660f6 --- /dev/null +++ b/repo/caliper/sampler-service-missing-libunwind-include-dir.patch @@ -0,0 +1,14 @@ +diff -ruN spack-src/src/services/sampler/CMakeLists.txt spack-src-patched/src/services/sampler/CMakeLists.txt +--- spack-src/src/services/sampler/CMakeLists.txt 2022-11-30 13:52:42.000000000 -0500 ++++ spack-src-patched/src/services/sampler/CMakeLists.txt 2023-05-04 20:43:47.240310306 -0400 +@@ -17,6 +17,10 @@ + + include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) + ++if (CALIPER_HAVE_LIBUNWIND) ++ include_directories(${LIBUNWIND_INCLUDE_DIRS}) ++endif() ++ + add_library(caliper-sampler OBJECT ${CALIPER_SAMPLER_SOURCES}) + + add_service_objlib("caliper-sampler") diff --git a/repo/py-scaffold/application.py b/repo/py-scaffold/application.py new file mode 100644 index 000000000..d3ed20542 --- /dev/null +++ b/repo/py-scaffold/application.py @@ -0,0 +1,55 @@ +# Copyright 2023 Lawrence Livermore National Security, LLC and other +# Benchpark Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: Apache-2.0 + +import yaml + +from ramble.appkit import * + + +class PyScaffold(ExecutableApplication): + """Scale-Free Fractal benchmark - A scalable deep learning benchmark: UNet trained on procedurally-generated, 3D fractal data""" + + name = "scaffold" + + tags = ["python"] + + register_phase("prepend_library_path", pipeline="setup", run_before=["make_experiments"]) + + def _prepend_library_path(self, workspace, app_inst=None): + """Function to prepend to LD_LIBRARY_PATH, can't do in spack because python_platlib points to wrong site-packages dir""" + paths = [] + # if cuda + if "cuda_arch" in app_inst.variables.keys(): + # Avoid libcudnn_graph.so error (unnecessary if cuX_full, necessary if cuX wheel) + paths.append("{pip_site_packages_path}/nvidia/cudnn/lib") + + app_inst.variables["rocm_mods"] = "" + if "rocm_arch" in app_inst.variables.keys(): + app_inst.variables["rocm_mods"] = "module load rocm/6.4.2 rccl/fast-env-slows-mpi libfabric\nexport SPINDLE_FLUXOPT=off\nexport LD_PRELOAD=/opt/rocm-6.4.2/llvm/lib/libomp.so\nexport MPICH_GPU_SUPPORT_ENABLED=0\nexport LD_LIBRARY_PATH=/collab/usr/global/tools/rccl/toss_4_x86_64_ib_cray/rocm-6.4.1/install/lib/:$LD_LIBRARY_PATH\nexport LD_LIBRARY_PATH=/opt/cray/pe/cce/20.0.0/cce/x86_64/lib:$LD_LIBRARY_PATH\n" + + # if caliper - Avoid libcaffe2_nvrtc.so + paths.append("{pip_site_packages_path}/torch/lib") + + app_inst.variables["ld_paths"] = ":".join(paths) + + software_spec("scaffold", None) + + # TODO: Figure out MPICH_GPU_SUPPORT_ENABLED=0, disabling GTL otherwise linker error. + executable( + "modules", + "{rocm_mods}export LD_LIBRARY_PATH={ld_paths}:$LD_LIBRARY_PATH", + ) + executable( + "generate", + "scaffold generate_fractals -c {package_path}ScaFFold/configs/benchmark_default.yml --problem-scale {problem_scale}", + use_mpi=True, + ) + executable( + "run", + "scaffold benchmark -c {package_path}ScaFFold/configs/benchmark_default.yml --problem-scale {problem_scale}", + use_mpi=True, + ) + + workload("sweep", executables=["modules", "generate", "run"]) diff --git a/repo/py-scaffold/package.py b/repo/py-scaffold/package.py new file mode 100644 index 000000000..8fb037a5d --- /dev/null +++ b/repo/py-scaffold/package.py @@ -0,0 +1,76 @@ +# Copyright 2023 Lawrence Livermore National Security, LLC and other +# Benchpark Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: Apache-2.0 + +import os + +from spack.package import * +from spack_repo.builtin.build_systems.python import PythonPackage + + +class PyScaffold(PythonPackage, CudaPackage, ROCmPackage): + """Scale-Free Fractal benchmark""" + + git = "file:///usr/workspace/mckinsey/ScaFFold-bp-docsdatagen/ScaFFold" + + version("main", branch="main") + version("sharedmem", branch="sharedmem") + version("procruns", branch="procruns") + + maintainers("michaelmckinsey") + license("Apache-2.0") + + variant("mpi", default=True, description="MPI support") + variant("caliper", default=False, description="Build with Caliper support enabled.") + + depends_on("python@3.11", type=("build", "run")) + # TODO: Get pip._vendor.pyproject_hooks._impl.BackendUnavailable: Cannot import 'setuptools.build_meta' from pip otherwise + depends_on("py-setuptools", type="build") + + depends_on("mpi") + + depends_on("c", type="build") + depends_on("cxx", type="build") + + depends_on("caliper+python", when="+caliper", type=("build", "run")) + + def cmake_args(self): + args = super().cmake_args(self) + + args.append(self.define("CMAKE_EXE_LINKER_FLAGS", self.spec['mpi'].libs.ld_flags)) + args.append(self.define("MPI_CXX_LINK_FLAGS", self.spec['mpi'].libs.ld_flags)) + + return args + + def setup_build_environment(self, env): + super().setup_build_environment(env) + + if self.compiler.extra_rpaths: + for rpath in self.compiler.extra_rpaths: + env.prepend_path("LD_LIBRARY_PATH", rpath) + + if "+mpi" in self.spec: + if self.spec["mpi"].extra_attributes: + if "ldflags" in self.spec["mpi"].extra_attributes: + env.append_flags("LDFLAGS", self.spec["mpi"].extra_attributes["ldflags"]) + if "gtl_lib_path" in self.spec["mpi"].extra_attributes: + env.prepend_path("LD_LIBRARY_PATH", self.spec['mpi'].extra_attributes["gtl_lib_path"]) + + def setup_run_environment(self, env): + super().setup_run_environment(env) + + if "+mpi" in self.spec: + if self.spec["mpi"].extra_attributes: + if "gtl_lib_path" in self.spec["mpi"].extra_attributes: + # Avoid gtl error + env.prepend_path("LD_LIBRARY_PATH", self.spec['mpi'].extra_attributes["gtl_lib_path"]) + + # if self.spec.satisfies("+caliper"): + # if self.spec.satisfies("+rocm"): + # # Need to set this to libcaliper.so to avoid rocprofiler context error + # env.set("ROCP_TOOL_LIBRARIES", os.path.join(self.spec["caliper"].prefix, "lib64", "libcaliper.so")) + + if self.compiler.extra_rpaths: + for rpath in self.compiler.extra_rpaths: + env.prepend_path("LD_LIBRARY_PATH", rpath) diff --git a/systems/llnl-elcapitan/system.py b/systems/llnl-elcapitan/system.py index fd6cdf967..ba8fa9f38 100644 --- a/systems/llnl-elcapitan/system.py +++ b/systems/llnl-elcapitan/system.py @@ -213,17 +213,23 @@ def __init__(self, spec): if self.rocm_version >= Version("6.4.0"): self.cce_version = Version("20.0.0") self.mpi_version = Version("9.0.1") + self.rccl_version = Version("6.4.1") elif self.rocm_version >= Version("6.0.0"): self.cce_version = Version("18.0.1") self.mpi_version = Version("8.1.31") + self.rccl_version = Version("6.3.1") else: self.cce_version = Version("16.0.0") self.mpi_version = Version("8.1.26") - if self.rocm_version >= Version("6.0.0"): + self.rccl_version = Version("5.4.3") + if self.rocm_version >= Version("6.4.0"): + self.pmi_version = Version("6.1.15.6") + self.pals_version = Version("1.2.12") + self.llvm_version = Version("19.0.0") + elif self.rocm_version >= Version("6.0.0"): self.pmi_version = Version("6.1.15.6") self.pals_version = Version("1.2.12") self.llvm_version = Version("18.0.1") - else: self.pmi_version = Version("6.1.12") self.pals_version = Version("1.2.9") @@ -344,8 +350,16 @@ def compute_packages_section(self): { "spec": "python@3.9.12", "prefix": "/usr/tce/packages/python/python-3.9.12", - } - ], + }, + { + "spec": "python@3.11.5", + "prefix": "/usr/tce/packages/python/python-3.11.5", + }, + { + "spec": "python@3.12.2", + "prefix": "/usr/tce/packages/python/python-3.12.2", + }, + ] }, "unzip": { "buildable": False, @@ -377,6 +391,22 @@ def compute_packages_section(self): ], "buildable": False, }, + "ncurses": { + "externals": [{"spec": "ncurses@6.1.20180224", "prefix": "/usr"}], + "buildable": False, + }, + "libxcrypt": { + "externals": [{"spec": "libxcrypt@4.1.1", "prefix": "/usr"}], + "buildable": False, + }, + "opengl": { + "externals": [{"spec": "opengl@4.5", "prefix": "/usr"}], + "buildable": False, + }, + "git": { + "externals": [{"spec": "git@2.43.7", "prefix": "/usr"}], + "buildable": False, + }, } } @@ -701,7 +731,7 @@ def rocm_config(self): { "spec": f"llvm@{self.llvm_version}", "prefix": f"/opt/rocm-{self.rocm_version}/llvm", - } + }, ], "buildable": False, }, @@ -740,6 +770,7 @@ def rocm_cce_compiler_cfg(self): f"/opt/rocm-{self.rocm_version}/lib", "/opt/cray/pe/gcc-libs", f"/opt/cray/pe/cce/{self.cce_version}/cce/x86_64/lib", + f"/collab/usr/global/tools/rccl/toss_4_x86_64_ib_cray/rocm-{self.rccl_version}/install/lib", ] # Avoid libunwind.so.1 error on tioga if self.spec.variants["cluster"][0] in ["tioga", "tuolumne"]: diff --git a/systems/llnl-matrix/system.py b/systems/llnl-matrix/system.py index a6e9954f9..e43be3796 100644 --- a/systems/llnl-matrix/system.py +++ b/systems/llnl-matrix/system.py @@ -64,6 +64,14 @@ class LlnlMatrix(System): description="Submit a job to a specific named bank", ) + variant( + "queue", + default="none", + values=("none", "pbatch", "pdebug"), + multi=False, + description="Submit to named queue" + ) + def __init__(self, spec): super().__init__(spec) self.programming_models = [CudaSystem(), OpenMPCPUOnlySystem()] @@ -151,13 +159,21 @@ def compute_packages_section(self): "buildable": False, }, "python": { + "buildable": False, "externals": [ { - "spec": "python@3.9.12+bz2+crypt+ctypes+dbm+lzma+pyexpat~pythoncmd+readline+sqlite3+ssl+tix+tkinter+uuid+zlib", - "prefix": "/usr/tce", + "spec": "python@3.9.12", + "prefix": "/usr/tce/packages/python/python-3.9.12", }, - ], - "buildable": False, + { + "spec": "python@3.11.9", + "prefix": "/usr/tce/packages/python/python-3.11.9", + }, + { + "spec": "python@3.12.4", + "prefix": "/usr/tce/packages/python/python-3.12.4", + }, + ] }, "hwloc": { "externals": [{"spec": "hwloc@2.11.2", "prefix": "/usr"}], @@ -171,6 +187,10 @@ def compute_packages_section(self): "externals": [{"spec": "curl@7.61.1", "prefix": "/usr"}], "buildable": False, }, + "git": { + "externals": [{"spec": "git@2.43.7", "prefix": "/usr"}], + "buildable": False, + }, "mpi": {"buildable": False}, } }