diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ec4d08f13..41a976a21 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -24,6 +24,7 @@ # We define the following GitLab pipeline variables: variables: +# Required information about GitHub repository GITHUB_PROJECT_NAME: "RAJAPerf" GITHUB_PROJECT_ORG: "LLNL" # Use the umdev service user to run CI. This prevents from running pipelines as diff --git a/.gitlab/corona-build-and-test-extra.yml b/.gitlab/corona-build-and-test-extra.yml index 2f48f46c9..593efd25d 100644 --- a/.gitlab/corona-build-and-test-extra.yml +++ b/.gitlab/corona-build-and-test-extra.yml @@ -29,5 +29,12 @@ variables: # INFO: This job is activated in RAJA CI, but we don't use desul atomics here #rocmcc_5_1_1_hip_desul_atomics: # variables: -# SPEC: "+rocm~openmp +desul amdgpu_target=gfx906 %rocmcc@5.1.1 ^blt@develop ^hip@5.1.1" +# SPEC: "~openmp +rocm +desul amdgpu_target=gfx906 %rocmcc@5.1.1 ^hip@5.1.1 ^blt@develop" # extends: .build_and_test_on_corona + +# EXTRA +rocmcc_5_1_1_hip_caliper: + variables: + SPEC: "~openmp +caliper +rocm amdgpu_target=gfx906 %rocmcc@5.1.1 ^hip@5.1.1 ^blt@develop" + extends: .build_and_test_on_corona + diff --git a/.gitlab/custom-jobs-and-variables.yml b/.gitlab/custom-jobs-and-variables.yml index f299e16ff..e69d6cd0f 100644 --- a/.gitlab/custom-jobs-and-variables.yml +++ b/.gitlab/custom-jobs-and-variables.yml @@ -16,9 +16,9 @@ variables: # Ruby # Arguments for top level allocation - RUBY_BUILD_AND_TEST_SHARED_ALLOC: "--exclusive --partition=pdebug --time=45 --nodes=1" + RUBY_BUILD_AND_TEST_SHARED_ALLOC: "--exclusive --reservation=ci --qos=ci_ruby --time=45 --nodes=1" # Arguments for job level allocation - RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=30 --nodes=1" + RUBY_BUILD_AND_TEST_JOB_ALLOC: "--reservation=ci --qos=ci_ruby --time=30 --nodes=1" # Project specific variants for ruby PROJECT_RUBY_VARIANTS: "+openmp " # Project specific deps for ruby diff --git a/.gitlab/lassen-build-and-test-extra.yml b/.gitlab/lassen-build-and-test-extra.yml index f76cbe14d..c2e912621 100644 --- a/.gitlab/lassen-build-and-test-extra.yml +++ b/.gitlab/lassen-build-and-test-extra.yml @@ -23,7 +23,15 @@ ibm_clang_9_0_0: # Overriding shared spec: Allow failures ibm_clang_9_0_0_gcc_8_3_1: variables: - SPEC: " ${PROJECT_LASSEN_VARIANTS} %clang@ibm.9.0.0 cxxflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" cflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" ${PROJECT_LASSEN_DEPS}" + SPEC: " ${PROJECT_LASSEN_VARIANTS} %clang@ibm.9.0.0 cxxflags==\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" cflags==\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" ${PROJECT_LASSEN_DEPS}" + extends: .build_and_test_on_lassen + allow_failure: true + +# Overriding shared spec: Longer allocation + Allow failures +ibm_clang_9_0_0_gcc_8_3_1_cuda_10_1_168: + variables: + SPEC: " ${PROJECT_LASSEN_VARIANTS} +cuda %clang@ibm.9.0.0 cxxflags==\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" cflags==\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" cuda_arch=70 ^cuda@10.1.168 ${PROJECT_LASSEN_DEPS}" + LASSEN_BUILD_AND_TEST_JOB_ALLOC: "1 -W 60" extends: .build_and_test_on_lassen allow_failure: true @@ -38,28 +46,28 @@ ibm_clang_9_0_0_gcc_8_3_1_cuda_10_1_168: # Overriding shared spec: Extra flags gcc_8_3_1: variables: - SPEC: " ${PROJECT_LASSEN_VARIANTS} %gcc@8.3.1 cxxflags=\"-finline-functions -finline-limit=20000\" cflags=\"-finline-functions -finline-limit=20000\" ${PROJECT_LASSEN_DEPS}" + SPEC: " ${PROJECT_LASSEN_VARIANTS} %gcc@8.3.1 cxxflags==\"-finline-functions -finline-limit=20000\" cflags==\"-finline-functions -finline-limit=20000\" ${PROJECT_LASSEN_DEPS}" extends: .build_and_test_on_lassen # Overriding shared spec: Longer allocation + Allow failures pgi_20_4_gcc_8_3_1: - extends: .build_and_test_on_lassen variables: - SPEC: " ${PROJECT_LASSEN_VARIANTS} %pgi@20.4 cxxflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" cflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" ${PROJECT_LASSEN_DEPS}" + SPEC: " ${PROJECT_LASSEN_VARIANTS} %pgi@20.4 cxxflags==\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" cflags==\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" ${PROJECT_LASSEN_DEPS}" LASSEN_BUILD_AND_TEST_JOB_ALLOC: "1 -W 60" + extends: .build_and_test_on_lassen allow_failure: true # Overriding shared spec: Longer allocation + Extra flags xl_16_1_1_12: variables: - SPEC: " ${PROJECT_LASSEN_VARIANTS} %xl@16.1.1.12 cxxflags=\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qsmp=omp -qnoeh -qsuppress=1500-029 -qsuppress=1500-036 ${PROJECT_LASSEN_DEPS}\"" + SPEC: " ${PROJECT_LASSEN_VARIANTS} %xl@16.1.1.12 cxxflags==\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qsmp=omp -qnoeh -qsuppress=1500-029 -qsuppress=1500-036 ${PROJECT_LASSEN_DEPS}\"" LASSEN_BUILD_AND_TEST_JOB_ALLOC: "1 -W 50" extends: .build_and_test_on_lassen # Overriding shared spec: Longer allocation + Extra flags xl_16_1_1_12_gcc_8_3_1: variables: - SPEC: " ${PROJECT_LASSEN_VARIANTS} %xl@16.1.1.12 cxxflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1 -qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qsmp=omp -qnoeh -qsuppress=1500-029 -qsuppress=1500-036\" cflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" ${PROJECT_LASSEN_DEPS}" + SPEC: " ${PROJECT_LASSEN_VARIANTS} %xl@16.1.1.12 cxxflags==\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1 -qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qsmp=omp -qnoeh -qsuppress=1500-029 -qsuppress=1500-036\" cflags==\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" ${PROJECT_LASSEN_DEPS}" LASSEN_BUILD_AND_TEST_JOB_ALLOC: "1 -W 50" extends: .build_and_test_on_lassen @@ -80,14 +88,14 @@ xl_16_1_1_12_gcc_7_3_1_cuda_10_1_168: # Overriding shared spec: Extra flags + Longer allocation + Allow failure xl_16_1_1_12_cuda_11_1_0: variables: - SPEC: " ${PROJECT_LASSEN_VARIANTS} +cuda %xl@16.1.1.12 cxxflags=\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" cuda_arch=70 ^cuda@11.1.0 ${PROJECT_LASSEN_DEPS}" + SPEC: " ${PROJECT_LASSEN_VARIANTS} +cuda %xl@16.1.1.12 cxxflags==\"-qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" cuda_arch=70 ^cuda@11.1.0 ${PROJECT_LASSEN_DEPS}" LASSEN_BUILD_AND_TEST_JOB_ALLOC: "1 -W 60" extends: .build_and_test_on_lassen # Overriding shared spec: Extra flags + Longer allocation + Allow failure xl_16_1_1_12_gcc_8_3_1_cuda_11_1_0: variables: - SPEC: " ${PROJECT_LASSEN_VARIANTS} +cuda %xl@16.1.1.12 cxxflags\"=--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1 -qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" cflags=--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1 cuda_arch=70 ^cuda@11.1.0 ${PROJECT_LASSEN_DEPS}" + SPEC: " ${PROJECT_LASSEN_VARIANTS} +cuda %xl@16.1.1.12 cxxflags==\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1 -qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" cflags==\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" cuda_arch=70 ^cuda@11.1.0 ${PROJECT_LASSEN_DEPS}" LASSEN_BUILD_AND_TEST_JOB_ALLOC: "1 -W 60" extends: .build_and_test_on_lassen @@ -114,7 +122,7 @@ clang_14_0_5: clang_12_0_1_cuda_11_5_0: variables: - SPEC: " +openmp +cuda cuda_arch=70 %clang@12.0.1 cxxflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" cflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" ^cuda@11.5.0" + SPEC: " +openmp +cuda cuda_arch=70 %clang@12.0.1 cxxflags==\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" cflags==\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" ^cuda@11.5.0" extends: .build_and_test_on_lassen gcc_8_3_1_cuda_11_1_0: @@ -132,6 +140,11 @@ gcc_8_3_1_cuda_11_5_0_ats_disabled: # EXTRAS ########## +clang_12_0_1_cuda_11_5_0_caliper: + variables: + SPEC: " +openmp +caliper +cuda cuda_arch=70 %clang@12.0.1 cxxflags==\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" cflags==\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" ^cuda@11.5.0" + extends: .build_and_test_on_lassen + clang_13_0_1_libcpp: variables: SPEC: " +openmp %clang@13.0.1+libcpp" @@ -139,7 +152,7 @@ clang_13_0_1_libcpp: clang_14_0_5_asan: variables: - SPEC: " +openmp %clang@14.0.5 cxxflags=-fsanitize=address" + SPEC: " +openmp %clang@14.0.5 cxxflags==\"-fsanitize=address\"" ASAN_OPTIONS: "detect_leaks=1" extends: .build_and_test_on_lassen diff --git a/.gitlab/ruby-build-and-test-extra.yml b/.gitlab/ruby-build-and-test-extra.yml index 68a6bf0d7..4253a3056 100644 --- a/.gitlab/ruby-build-and-test-extra.yml +++ b/.gitlab/ruby-build-and-test-extra.yml @@ -23,7 +23,7 @@ gcc_8_1_0: # Overriding shared spec: Allow failures pgi_20_1_gcc_local_8_3_1: variables: - SPEC: " ${PROJECT_RUBY_VARIANTS} %pgi@20.1 cxxflags\"=-rc=/usr/workspace/umpire/pgi/x86_64/local-gcc-8.3.1-rc\" cflags\"=-rc=/usr/workspace/umpire/pgi/x86_64/local-gcc-8.3.1-rc\" fflags=\"-rc=/usr/workspace/umpire/pgi/x86_64/local-gcc-8.3.1-rc\" ${PROJECT_RUBY_DEPS}" + SPEC: " ${PROJECT_RUBY_VARIANTS} %pgi@20.1 cxxflags==\"-rc=/usr/workspace/umpire/pgi/x86_64/local-gcc-8.3.1-rc\" cflags==\"-rc=/usr/workspace/umpire/pgi/x86_64/local-gcc-8.3.1-rc\" fflags==\"-rc=/usr/workspace/umpire/pgi/x86_64/local-gcc-8.3.1-rc\" ${PROJECT_RUBY_DEPS}" extends: .build_and_test_on_ruby allow_failure: true @@ -35,8 +35,16 @@ pgi_20_1_gcc_local_8_3_1: # ${PROJECT__DEPS} in the extra jobs. There is no reason not to fully # describe the spec here. +gcc_8_3_1_caliper: + variables: + SPEC: " +openmp +caliper %gcc@8.3.1" + RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=60 --nodes=1" + extends: .build_and_test_on_ruby + icpc_19_1_0: variables: SPEC: " +openmp %intel@19.1.0" RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=40 --nodes=1" + before_script: + - export USE_DEV_SHM=False extends: .build_and_test_on_ruby diff --git a/CMakeLists.txt b/CMakeLists.txt index f34947517..37515bc41 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,9 @@ ############################################################################### # C is required for googletest to find Threads -project(RAJAPerfSuite LANGUAGES CXX C) +cmake_policy(SET CMP0048 NEW) + +project(RAJAPerfSuite VERSION 2022.10.0 LANGUAGES CXX C) if (ENABLE_HIP) cmake_minimum_required(VERSION 3.23) @@ -95,9 +97,10 @@ if (ENABLE_OPENMP) add_definitions(-DRUN_OPENMP) endif () -set(RAJA_PERFSUITE_VERSION_MAJOR 2022) -set(RAJA_PERFSUITE_VERSION_MINOR 10) -set(RAJA_PERFSUITE_VERSION_PATCHLEVEL 0) +set(RAJA_PERFSUITE_VERSION_MAJOR ${RAJAPerfSuite_VERSION_MAJOR}) +set(RAJA_PERFSUITE_VERSION_MINOR ${RAJAPerfSuite_VERSION_MINOR}) +set(RAJA_PERFSUITE_VERSION_PATCHLEVEL ${RAJAPerfSuite_VERSION_PATCH}) +message(STATUS "CMAKE_PROJECT_VERSION:" ${CMAKE_PROJECT_VERSION}) set(RAJA_PERFSUITE_DEPENDS RAJA) @@ -124,6 +127,33 @@ if ((ENABLE_HIP) AND (NOT ENABLE_KOKKOS)) list(APPEND RAJA_PERFSUITE_DEPENDS blt::hip_runtime) endif() +# +# Are we using Caliper +# +set(RAJA_PERFSUITE_USE_CALIPER off CACHE BOOL "") +if (RAJA_PERFSUITE_USE_CALIPER) + find_package(caliper REQUIRED) + list(APPEND RAJA_PERFSUITE_DEPENDS caliper) + add_definitions(-DRAJA_PERFSUITE_USE_CALIPER) + message(STATUS "Using Caliper") + find_package(adiak REQUIRED) + # use ${adiak_LIBRARIES} since version could have adiak vs adiak::adiak export + list(APPEND RAJA_PERFSUITE_DEPENDS ${adiak_LIBRARIES}) + if (ENABLE_CUDA) + # Adiak will propagate -pthread from spectrum mpi from a spack install of Caliper with +mpi; and needs to be handled even if RAJAPerf is non MPI program + # We should delegate to BLT to handle unguarded -pthread from any dependencies, but currently BLT doesn't + set_target_properties(${adiak_LIBRARIES} PROPERTIES INTERFACE_COMPILE_OPTIONS "$<$>:-pthread>;$<$:-Xcompiler=-pthread>") + # the following for adiak-0.2.2 + if (TARGET adiak::mpi) + set_target_properties(adiak::mpi PROPERTIES INTERFACE_COMPILE_OPTIONS "$<$>:-pthread>;$<$:-Xcompiler=-pthread>") + endif () + endif () + message(STATUS "Caliper includes : ${caliper_INCLUDE_DIR}") + message(STATUS "Adiak includes : ${adiak_INCLUDE_DIRS}") + include_directories(${caliper_INCLUDE_DIR}) + include_directories(${adiak_INCLUDE_DIRS}) +endif () + set(RAJAPERF_BUILD_SYSTYPE $ENV{SYS_TYPE}) set(RAJAPERF_BUILD_HOST $ENV{HOSTNAME}) @@ -151,7 +181,7 @@ else() endif() configure_file(${CMAKE_SOURCE_DIR}/src/rajaperf_config.hpp.in - ${CMAKE_CURRENT_BINARY_DIR}/include/rajaperf_config.hpp) + ${CMAKE_CURRENT_BINARY_DIR}/include/rajaperf_config.hpp) include_directories($) diff --git a/docs/sphinx/user_guide/build.rst b/docs/sphinx/user_guide/build.rst index be1bde2fe..346ffdda8 100644 --- a/docs/sphinx/user_guide/build.rst +++ b/docs/sphinx/user_guide/build.rst @@ -210,3 +210,44 @@ sizes. The CMake option for this is will build versions of GPU kernels that use 64, 128, 256, 512, and 1024 threads per GPU thread-block. + +Building with Caliper +--------------------- + +RAJAPerf Suite may also use Caliper instrumentation, with per variant output into +Spot/Hatchet .cali files. Original timing is nested within Caliper annotations and +so is not impacted when Caliper support is turned on. While Caliper is low-overhead +it is not zero, so it will add a small amount of timing skew in its data as +compared to the original. For much more on Caliper, read it's documentation here: +[Caliper] (http://software.llnl.gov/Caliper/) + +Caliper *annotation* is in the following tree structure + +Variant + Group + Kernel + Kernel.Tuning + +| Build against these Caliper versions +| +| **caliper@2.9.0** (preferred target) +| **caliper@master** (if using older Spack version) + +In Cmake scripts add + **-DRAJA_PERFSUITE_USE_CALIPER=On** + +Add to **-DCMAKE_PREFIX_PATH** + ;${CALIPER_PREFIX}/share/cmake/caliper;${ADIAK_PREFIX}/lib/cmake/adiak + +or use + -Dcaliper_DIR -Dadiak_DIR package prefixes + +For Spack : raja_perf +caliper ^caliper@2.9.0 + +For Uberenv: python3 scripts/uberenv/uberenv.py --spec +caliper ^caliper@2.9.0 + +If you intend on passing nvtx or roctx annotation to Nvidia or AMD profiling tools, +build Caliper with +cuda cuda_arch=XX or +rocm respectively. Then you can specify +an additional Caliper service for nvtx or roctx like so: roctx example: + +CALI_SERVICES_ENABLE=roctx rocprof --roctx-trace --hip-trace raja-perf.exe \ No newline at end of file diff --git a/docs/sphinx/user_guide/output.rst b/docs/sphinx/user_guide/output.rst index edcbc9a57..7706620b1 100644 --- a/docs/sphinx/user_guide/output.rst +++ b/docs/sphinx/user_guide/output.rst @@ -159,3 +159,61 @@ storing the result in matrix A (N_i X N_j). Problem size could be chosen to be the maximum number of entries in matrix B or C. We choose the size of matrix A (N_i * N_j), which is more closely aligned with the number of independent operations (i.e., the amount of parallel work) in the kernels. + + +=========================== +Caliper output files +=========================== + +If you've built RAJAPerf with Caliper support turned on, then in addition to the +outputs mentioned above, we also save a .cali file for each variant run, such as: +Base_OpenMP.cali, Lambda_OpenMP.cali, RAJA_OpenMP.cali, etc. + +There are several techniques to display the Caliper trees (Timing Hierarchy) + +| 1: Caliper's cali-query tool. +| The first technique is with Caliper's own tool cali-query, we run it with +| **-T** to display tree, or you can specify **--tree**. +| +| cali-query -T $HOME/data/default_problem_size/gcc/RAJA_Seq.cali + +2: Caliper's Python module *caliperreader*:: + + import os + import caliperreader as cr + DATA_DIR = os.getenv('HOME')+"/data/default_problem_size/gcc" + os.chdir(DATA_DIR) + r = cr.CaliperReader() + r.read("RAJA_Seq.cali") + metric = 'avg#inclusive#sum#time.duration' + for rec in r.records: + path = rec['path'] if 'path' in rec else 'UNKNOWN' + time = rec[metric] if metric in rec else '0' + if not 'UNKNOWN' in path: + if (isinstance(path, list)): + path = "/".join(path) + print("{0}: {1}".format(path, time)) + +You can add a couple of lines to view the metadata keys captured by Caliper/Adiak:: + + for g in r.globals: + print(g) + +You can also add a line to display metadata value in the dictionary **r.globals** + +For example print out the OpenMP Max Threads value recorded at runtime:: + + print('OMP Max Threads: ' + r.globals['omp_max_threads'])` + +or the variant represented in this file:: + + print('Variant: ' + r.globals['variant']) + +3: Using the *Hatchet* Python module:: + + import hatchet as ht + DATA_DIR = os.getenv('HOME')+"/data/default_problem_size/gcc" + os.chdir(DATA_DIR) + gf1 = ht.GraphFrame.from_caliperreader("RAJA_Seq.cali") + print(gf1.tree()) + diff --git a/scripts/argparse_sweep_graph.py b/scripts/argparse_sweep_graph.py new file mode 100644 index 000000000..3825bfb36 --- /dev/null +++ b/scripts/argparse_sweep_graph.py @@ -0,0 +1,441 @@ +#!/usr/bin/env python3 +import argparse +import sys +import os +import csv +import glob +import difflib +import itertools +import importlib +import pkgutil +import traceback + +import data_classes_sweep_graph as dc + +def import_submodules(package, recursive=True): + """ Import all submodules of a module, recursively, including subpackages + + :param package: package (name or actual module) + :type package: str | module + :rtype: dict[str, types.ModuleType] + """ + if isinstance(package, str): + package = importlib.import_module(package) + results = {} + for loader, name, is_pkg in pkgutil.walk_packages(package.__path__): + full_name = package.__name__ + '.' + name + chk_name = full_name.split('.') + if not 'roundtrip' in chk_name and not 'vis' in chk_name: + results[full_name] = importlib.import_module(full_name) + if recursive and is_pkg: + results.update(import_submodules(full_name)) + # print(results) + return results + + +def check_hatchet_import(): + reader_spec = importlib.util.find_spec("hatchet") + reader_found = reader_spec is not None + depends_found = False + if reader_found: + print("Hatchet Reader found") + try: + cr = importlib.import_module("hatchet") + import_submodules(cr) + depends_found = True + except: + print("Can't load Hatchet") + traceback.print_exc() + else: + print("Hatchet not found") + return reader_found and depends_found + + +def get_size_from_dir_name(sweep_subdir_name): + # print(sweep_subdir_name) + run_size_name = sweep_subdir_name.replace("SIZE_", "") + try: + run_size = int(run_size_name) + return str(run_size) + except ValueError: + raise NameError("Expected SIZE_".format(sweep_subdir_name)) + +def get_close_matches(test_value,match_values) -> list: + close_matches = difflib.get_close_matches(test_value,match_values, n=30, cutoff=0.25) + if len(close_matches) > 0: + # cull for substring + found_sub = [] + for mm in close_matches: + if mm.find(test_value) != -1: + found_sub.append(mm) + if len(found_sub) > 0: + close_matches = found_sub + return close_matches + + +def get_close_matches_icase(word, possibilities, *args, **kwargs): + """ Case-insensitive version of difflib.get_close_matches """ + lword = word.lower() + lpos = {} + for p in possibilities: + if p.lower() not in lpos: + lpos[p.lower()] = [p] + else: + lpos[p.lower()].append(p) + lmatches = difflib.get_close_matches(lword, lpos.keys(), *args, **kwargs) + ret = [lpos[m] for m in lmatches] + ret = itertools.chain.from_iterable(ret) + return set(ret) + +def kind_action_check(values,kinds, kind_tempplates): + check = [] + + for k in values: + # strip whitespace + k = ''.join(k.split()) + items = k.split('<') + if k in kinds: + print("matches kinds: " + k) + check.append(k) + elif len(items) == 1: + close_matches = get_close_matches_icase(k, kinds.keys()) + if len(close_matches) > 0: + raise NameError( + "Invalid kinds check for {0}: Did you mean one of {1}, or try changing case".format(k, + str(close_matches))) + else: + raise NameError("Invalid kinds check for {0}: Use one of {1}".format(k, str(kinds.keys()))) + elif len(items) > 1: + # continue for now because this is a DSL expression + print('DSL: No checking yet') + check.append(k) + + return check + + +def direct_action_check(values,prescan_dict_name, namespace): + check = [] + for k in values: + if k in namespace.prescan[prescan_dict_name]: + check.append(k) + else: + close_matches = get_close_matches(k, namespace.prescan[prescan_dict_name]) + if len(close_matches) > 0: + raise NameError("Invalid {0} check for {1}: Did you mean one of {2}, or try changing case".format(prescan_dict_name, k, str(close_matches))) + else: + raise NameError("Invalid {0} check for {1}: Use one of {2}".format(prescan_dict_name, k, str(namespace.prescan[prescan_dict_name]))) + return check + +def close_action_check(values,prescan_dict_name, namespace): + outer_set = set() + for k in values: + inner_check = set() + if k in namespace.prescan[prescan_dict_name]: + inner_check.add(k) + else: + close_matches = get_close_matches(k, namespace.prescan[prescan_dict_name]) + if len(close_matches) > 0: + inner_check.update(close_matches) + if len(inner_check) == 0: + raise NameError("Invalid close check against {0} for {1}: Use something close to any of {2}, or try changing case".format(prescan_dict_name,k, str(namespace.prescan[prescan_dict_name]))) + outer_set.update(inner_check) + check = list(outer_set) + check.sort() + return check + +class process_argparse(): + # the intended use is to return both an args object for Namespace, + # and unknown args that specificallpy do not use - or -- prefix + + class CaliperAction(argparse.Action): + def __init__(self, option_strings, dest, nargs='?', **kwargs): + super().__init__(option_strings, dest, nargs, **kwargs) + def __call__(self, parser, namespace, values, option_string=None): + check = check_hatchet_import() + setattr(namespace, self.dest, check) + if check: + cr = importlib.import_module("hatchet") + import_submodules(cr) + else: + cr = None + setattr(namespace,"cr",cr) + + class KindAction(argparse.Action): + def __init__(self, option_strings, dest, nargs='+', **kwargs): + super().__init__(option_strings, dest, nargs, **kwargs) + + def __call__(self, parser, namespace, values, option_string=None): + check = kind_action_check(values, dc.Data.kinds, dc.Data.kind_templates) + setattr(namespace, self.dest, check) + + class KernelAction(argparse.Action): + def __init__(self, option_strings, dest, nargs='+', **kwargs): + super().__init__(option_strings, dest, nargs, **kwargs) + + def __call__(self, parser, namespace, values, option_string=None): + check = direct_action_check(values,"kernels_union",namespace) + setattr(namespace, self.dest, check) + + class KernelCloseAction(argparse.Action): + def __init__(self, option_strings, dest, nargs='+', **kwargs): + super().__init__(option_strings, dest, nargs, **kwargs) + + def __call__(self, parser, namespace, values, option_string=None): + check = close_action_check(values,"kernels_union",namespace) + setattr(namespace, self.dest, check) + class VariantAction(argparse.Action): + def __init__(self, option_strings, dest, nargs='+', **kwargs): + super().__init__(option_strings, dest, nargs, **kwargs) + + def __call__(self, parser, namespace, values, option_string=None): + check = direct_action_check(values, "variants", namespace) + setattr(namespace, self.dest, check) + + class VariantCloseAction(argparse.Action): + def __init__(self, option_strings, dest, nargs='+', **kwargs): + super().__init__(option_strings, dest, nargs, **kwargs) + + def __call__(self, parser, namespace, values, option_string=None): + check = close_action_check(values, "variants", namespace) + setattr(namespace, self.dest, check) + + class TuningAction(argparse.Action): + def __init__(self, option_strings, dest, nargs='+', **kwargs): + super().__init__(option_strings, dest, nargs, **kwargs) + + def __call__(self, parser, namespace, values, option_string=None): + check = direct_action_check(values, "tunings", namespace) + setattr(namespace, self.dest, check) + + class TuningCloseAction(argparse.Action): + def __init__(self, option_strings, dest, nargs='+', **kwargs): + super().__init__(option_strings, dest, nargs, **kwargs) + + def __call__(self, parser, namespace, values, option_string=None): + check = close_action_check(values, "tunings", namespace) + setattr(namespace, self.dest, check) + + class DirectoryAction(argparse.Action): + def __init__(self, option_strings, dest, nargs='+', **kwargs): + if nargs != '+': + raise ValueError("Expected variable nargs to be set to '+'") + super().__init__(option_strings, dest, nargs, **kwargs) + + def __call__(self, parser, namespace, values, option_string=None): + # print('Action Namespace=%r values=%r option_string=%r' % (namespace, values, option_string)) + if hasattr(namespace,'cr'): + cr = getattr(namespace,'cr') + print("DirectoryAction detects .cali file processing") + prescan = self.prescan_caliper_sweep_dirs(cr,values) + else: + prescan = self.prescan_sweep_dirs(values) + setattr(namespace, 'prescan', prescan) + setattr(namespace, self.dest, prescan["directories"]) # do the normal attr set for dest + + def prescan_sweep_dirs(self,sweep_dir_paths) -> dict: + prescan = {"directories": [],"kernels_union": [], "kernels_intersection": [], "variants": [], "tunings": [], "sweep_sizes": [], + "machines" : []} + # machines only gleans os.path.basename of sweep_dir_paths, and does not actually parse real encoded machine names from data; so machine_name is a convention for directory naming + sets = [] + outer_runsizes_set = set() + for sweep_dir_path in sweep_dir_paths: + if not os.path.exists(sweep_dir_path): + raise NameError("Invalid directory: {0}".format(sweep_dir_path)) + kernel_set = set() + sweep_dir_path = sweep_dir_path.rstrip(os.sep) + prescan["directories"].append(sweep_dir_path) + sweep_dir_name = os.path.basename(sweep_dir_path) + if sweep_dir_name not in prescan["machines"]: + prescan["machines"].append(sweep_dir_name) + subdirs = sorted(glob.glob(glob.escape(sweep_dir_path) + os.sep + "**" + os.sep + "SIZE_*", recursive=True)) + inner_runsizes_set = set() + for subdir in subdirs: + # print(subdir) + run_size = get_size_from_dir_name(os.path.basename(subdir)) + inner_runsizes_set.add(run_size) + #if run_size not in prescan["sweep_sizes"]: + # prescan["sweep_sizes"].append(run_size) + # open one of the timing files at this run_size + timing_files = sorted(glob.glob(glob.escape(subdir) + os.sep + "RAJAPerf-timing-*", recursive=False)) + with open(timing_files[0], "r") as file: + file_reader = csv.reader(file, delimiter=',') + variants_read = False + tunings_read = False + for row in file_reader: + # print(row) + if row[0].strip() == "Kernel": + if not variants_read: + for c in range(1, len(row)): + variant_name = row[c].strip() + if variant_name not in prescan["variants"]: + prescan["variants"].append(variant_name) + variants_read = True + elif not tunings_read: + for c in range(1, len(row)): + tuning_name = row[c].strip() + if tuning_name not in prescan["tunings"]: + prescan["tunings"].append(tuning_name) + tunings_read = True + elif variants_read == True & tunings_read == True: + kernel_name = row[0].strip() + if kernel_name not in prescan["kernels_union"]: + prescan["kernels_union"].append(kernel_name) + if kernel_name not in kernel_set: + kernel_set.add(kernel_name) + if (not outer_runsizes_set) and inner_runsizes_set: + outer_runsizes_set = inner_runsizes_set + outer_runsizes_set = outer_runsizes_set.intersection(inner_runsizes_set) + sets.append(kernel_set) + prescan["kernels_intersection"] = set.intersection(*sets) + prescan["sweep_sizes"] = list(outer_runsizes_set) + return prescan + + def prescan_caliper_sweep_dirs(self, cr,sweep_dir_paths) -> dict: + prescan = {"directories": [], "kernels_union": [], "kernels_intersection": [], "variants": [], "tunings": [], + "sweep_sizes": [], + "machines" : []} + # machines only gleans os.path.basename of sweep_dir_paths, and does not actually parse real encoded machine names from data; so machine_name is a convention for directory naming + sets = [] + outer_runsizes_set = set() + for sweep_dir_path in sweep_dir_paths: + if not os.path.exists(sweep_dir_path): + raise NameError("Invalid directory: {0}".format(sweep_dir_path)) + kernel_set = set() + sweep_dir_path = sweep_dir_path.rstrip(os.sep) + prescan["directories"].append(sweep_dir_path) + sweep_dir_name = os.path.basename(sweep_dir_path) + if sweep_dir_name not in prescan["machines"]: + prescan["machines"].append(sweep_dir_name) + subdirs = sorted(glob.glob(glob.escape(sweep_dir_path) + os.sep + "**" + os.sep + "SIZE_*", recursive=True)) + inner_runsizes_set = set() + for subdir in subdirs: + # print(subdir) + run_size = get_size_from_dir_name(os.path.basename(subdir)) + inner_runsizes_set.add(run_size) + cali_files = sorted(glob.glob(glob.escape(subdir) + os.sep + "*.cali", recursive=False)) + # not all kernels run in every variant so capture kernel list across variants + for f in cali_files: + gf = cr.GraphFrame.from_caliperreader(f) + #print(gf.metadata) + variant_name = gf.metadata['variant'] + if variant_name not in prescan["variants"]: + prescan["variants"].append(variant_name) + #machine = gf.metadata["cluster"] + "_" + gf.metadata["compiler"] + #if machine not in prescan["machines"]: + # prescan["machines"].append(machine) + # extract kernel list + kernel_index = -1 + tt = gf.graph.roots[0].traverse(order="pre") + for nn in tt: + # test if leaf node + if not nn.children: + # kernel_tuning_name is kernel.tuning in Caliper + kernel_tuning_name = gf.dataframe.loc[nn, 'name'] + kernel_name = kernel_tuning_name.split('.')[0] + tuning_name = kernel_tuning_name.split('.')[1] + if kernel_name not in prescan["kernels_union"]: + prescan["kernels_union"].append(kernel_name) + if kernel_name not in kernel_set: + kernel_set.add(kernel_name) + if tuning_name not in prescan["tunings"]: + prescan["tunings"].append(tuning_name) + + + if (not outer_runsizes_set) and inner_runsizes_set: + outer_runsizes_set = inner_runsizes_set + outer_runsizes_set = outer_runsizes_set.intersection(inner_runsizes_set) + sets.append(kernel_set) + prescan["kernels_intersection"] = set.intersection(*sets) + prescan["sweep_sizes"] = list(outer_runsizes_set) + return prescan + + def __init__(self): + self.parent_caliper_parser= argparse.ArgumentParser(add_help=False) + self.parent_caliper_parser.add_argument('--caliper', action=self.CaliperAction) + self.parent_parser = argparse.ArgumentParser(parents=[self.parent_caliper_parser],add_help=False) + self.parent_parser.add_argument('-d','--directories', required=True, nargs='+', action=self.DirectoryAction) + self.child_parser = argparse.ArgumentParser(parents=[self.parent_parser]) + + self.child_parser.add_argument('-o','--output',nargs=1, + help="output file prefix") + self.child_parser.add_argument('-gname', '--graph-name', nargs=1, + help="graph name") + self.child_parser.add_argument('-lloc', '--legend-location', nargs=2, + help="legend location x y ") + self.child_parser.add_argument('-ylabel', '--y-axis-label', nargs=1, + help="y axis label") + self.child_parser.add_argument('-yscale', '--y-axis-scale', nargs=1, + help="y axis scale") + self.child_parser.add_argument('-xlabel', '--x-axis-label', nargs=1, + help="x axis label") + self.child_parser.add_argument('-xscale', '--x-axis-scale', nargs=1, + help="x axis scale") + self.child_parser.add_argument('-hbin', '--histogram-bin-size', nargs=1, + help="histogram bin size") + self.child_parser.add_argument('-ylim', '--y-axis-limit', nargs=2, + help="y axis limit") + self.child_parser.add_argument('-xlim', '--x-axis-limit', nargs=2, + help="x axis limit") + self.child_parser.add_argument('--recolor', action='append',nargs=2, + help="recolor series_name (r,g,b) : series name followed by rgb in tuple form r,g,b floats in [0-1], optional repeat series color pairs") + self.child_parser.add_argument('--reformat', action='append',nargs=2, + help="reformat series_name format_str") + #the following should be modified to use action based on possible kinds + pgroup = self.child_parser.add_mutually_exclusive_group() + pgroup.add_argument('-pc','--print-compact', nargs=1,action=self.KindAction, + help="print one of kind argument expression in compact form") + pgroup.add_argument('-pe','--print-expanded', nargs=1,action=self.KindAction, + help="print one of kind argument expression in expanded form") + self.child_parser.add_argument('-slg','--split-line-graphs', nargs=1,action=self.KindAction, + help="split line graph of one kind argument expression") + self.child_parser.add_argument('-bg','--bar-graph', nargs=1,action=self.KindAction, + help="bar graph of one kind argument expression") + self.child_parser.add_argument('-hg','--histogram-graph', nargs=1,action=self.KindAction, + help="histogram graph of one kind argument expression") + + self.child_parser.add_argument('-k', '--kernels', nargs='+', action=self.KernelAction, + help='kernels to include') + self.child_parser.add_argument('-ek', '--exclude-kernels', nargs='+', action=self.KernelAction, + help='kernels to exclude') + self.child_parser.add_argument('-kc', '--kernels-close', nargs='+', action=self.KernelCloseAction, + help="search for set of kernels to include close to arg eg. Poly_ Basic_ etc") + self.child_parser.add_argument('-ekc', '--exclude-kernels-close', nargs='+', action=self.KernelCloseAction, + help="search for set of kernels to exclude close to arg eg. Poly_ Basic_ etc") + # eventually setup action to crosscheck against known kernel groups + self.child_parser.add_argument('-kg', '--kernel-groups', nargs='+', + help='kernel groups to include') + self.child_parser.add_argument('-ekg', '--exclude-kernel-groups', nargs='+', + help='kernel groups to exclude') + + self.child_parser.add_argument('-v', '--variants', nargs='+', action=self.VariantAction, + help='variants to include') + self.child_parser.add_argument('-ev', '--exclude-variants', nargs='+', action=self.VariantAction, + help='variants to exclude') + self.child_parser.add_argument('-vc', '--variants-close', nargs='+', action=self.VariantCloseAction, + help="search for set of variants to include close to arg like Seq, CUDA, HIP, etc") + self.child_parser.add_argument('-evc', '--exclude-variants-close', nargs='+', action=self.VariantCloseAction, + help="search for set of variants to exclude close to arg like Seq, CUDA, HIP etc") + + self.child_parser.add_argument('-t', '--tunings', nargs='+', action=self.TuningAction, + help='tunings to include') + self.child_parser.add_argument('-et', '--exclude-tunings', nargs='+', action=self.TuningAction, + help='tunings to exclude') + self.child_parser.add_argument('-tc', '--tunings-close', nargs='+', action=self.TuningCloseAction, + help="search for set of tunings to include close to arg eg. block, def{ault} etc") + self.child_parser.add_argument('-etc', '--exclude-tunings-close', nargs='+', action=self.TuningCloseAction, + help="search for set of tunings to exclude close to arg eg. block, def{ault} etc") + + + + def parse_args(self,argv): + args, unknown = self.child_parser.parse_known_args(argv) + return args, unknown + +def main(argv): + first_stage_parser = process_argparse() + args, unknown = first_stage_parser.parse_args(argv) + print(args) + print(unknown) + +if __name__ == '__main__': + main(sys.argv[1:]) \ No newline at end of file diff --git a/scripts/data_classes_sweep_graph.py b/scripts/data_classes_sweep_graph.py new file mode 100644 index 000000000..f7d206714 --- /dev/null +++ b/scripts/data_classes_sweep_graph.py @@ -0,0 +1,1388 @@ +import math + +def make_tuple_str(astr): + astr = astr.strip() + if len(astr) < 2 or astr[0] != "(" or astr[len(astr) - 1] != ")": + return None + astr = astr[1:len(astr) - 1] + atup = astr.split(",") + return tuple((a.strip() for a in atup)) + + +def normalize_color_tuple(t): + len_t = 0.0 + for i in range(0, len(t)): + len_t += t[i] * t[i] + len_t = math.sqrt(len_t) + new_t = () + for i in range(0, len(t)): + new_t += (t[i] / len_t,) + return new_t + + +def clamp_tuple(t, min_val=0.0, max_val=1.0): + new_t = () + for i in range(0, len(t)): + val = t[i] + if val > max_val: + val = max_val + elif val < min_val: + val = min_val + new_t += (val,) + return new_t + + +def color_mul(t, factor): + new_t = () + for i in range(0, len(t)): + new_t += (t[i] * factor,) + return clamp_tuple(new_t) + + +def make_color_tuple_rgb(r, g, b): + return (r / 255.0, g / 255.0, b / 255.0) + + +def make_color_tuple_str(color_str): + rgb = make_tuple_str(color_str) + if len(rgb) != 3: + raise NameError("Expected a tuple of 3 floats in [0-1]") + r = float(rgb[0].strip()) + g = float(rgb[1].strip()) + b = float(rgb[2].strip()) + return clamp_tuple((r, g, b)) + + +def set_legend_order(labels) -> list: + print("set_legend_order:" + str(labels)) + lex_order = ['machine', 'execution_model', 'programming_model', 'tuning'] + legend_order = [] + + machines = [] + programming_models = [] + execution_models = [] + tunings = [] + for label in labels: + ll = label.split(" ") + machines.append(ll[0]) + variant = ll[1] + vs = variant.split("_") + programming_models.append(vs[0]) + execution_models.append(vs[1]) + tunings.append(ll[2]) + + machines_min_len = len(min(machines, key=len)) + prg_min_len = len(min(programming_models, key=len)) + exe_min_len = len(min(execution_models, key=len)) + tunings_min_len = len(min(tunings, key=len)) + lex_strings = [] + for i in range(len(machines)): + machines[i] = machines[i][0:machines_min_len] + programming_models[i] = programming_models[i][0:prg_min_len] + execution_models[i] = execution_models[i][0:exe_min_len] + tunings[i] = tunings[i][0:tunings_min_len] + lex_string = "" + for lo in lex_order: + if lo == 'machine': + lex_string += machines[i] + if lo == 'programming_model': + lex_string += programming_models[i] + if lo == 'execution_model': + lex_string += execution_models[i] + if lo == 'tuning': + lex_string += tunings[i] + lex_strings.append([lex_string, i]) + lex_strings.sort() + + for x in lex_strings: + legend_order.append(x[1]) + + # print(lex_strings) + # print(legend_order) + + return legend_order + + +#g_timing_filename = "RAJAPerf-timing-Minimum.csv" +g_timing_filename = "RAJAPerf-timing-Average.csv" +g_runinfo_filename = "RAJAPerf-kernels.csv" +g_timing_file_kind = "time(s)" + +# Kernels sorted into categories based on performance bottlenecks + +g_known_kernel_groups = { + "bandwidth": { + "kind": "bandwidth(GiB/s)", + "kernels": [ "Basic_DAXPY", "Basic_DAXPY_ATOMIC", "Basic_INIT3", + "Basic_INIT_VIEW1D", "Basic_INIT_VIEW1D_OFFSET", + "Basic_MULADDSUB", "Lcals_DIFF_PREDICT", "Lcals_EOS", + "Lcals_FIRST_DIFF", "Lcals_FIRST_SUM", "Lcals_GEN_LIN_RECUR", + "Lcals_HYDRO_1D", "Lcals_PLANCKIAN", "Lcals_TRIDIAG_ELIM", + "Polybench_JACOBI_1D", "Stream_ADD", "Stream_COPY", + "Stream_MUL", "Stream_TRIAD", + + "Basic_IF_QUAD", "Basic_INDEXLIST", "Basic_INDEXLIST_3LOOP", + "Basic_NESTED_INIT", "Lcals_HYDRO_2D", "Lcals_INT_PREDICT", + "Polybench_FDTD_2D", "Polybench_HEAT_3D", + "Polybench_JACOBI_2D", "Stream_DOT", "Apps_CONVECTION3DPA", + "Apps_DEL_DOT_VEC_2D", "Apps_DIFFUSION3DPA", "Apps_ENERGY", + "Apps_FIR", "Apps_MASS3DPA", "Apps_NODAL_ACCUMULATION_3D", + "Apps_PRESSURE", "Apps_VOL3D", "Algorithm_SCAN", + "Algorithm_REDUCE_SUM", "Algorithm_MEMCPY", "Algorithm_MEMSET"], + }, + "flops": { + "kind": "GFLOPS", + "kernels": [ "Basic_MAT_MAT_SHARED", "Polybench_2MM", "Polybench_3MM", + "Polybench_GEMM", + + "Polybench_HEAT_3D", "Apps_CONVECTION3DPA", + "Apps_DEL_DOT_VEC_2D", "Apps_DIFFUSION3DPA", "Apps_FIR", + "Apps_MASS3DPA", "Apps_VOL3D", ], + }, + "reduce": { + "kind": "throughput(GProblem size/s)", + "kernels": [ "Basic_PI_REDUCE", "Basic_REDUCE3_INT", "Basic_REDUCE_STRUCT", + "Basic_TRAP_INT", "Lcals_FIRST_MIN", "Stream_DOT", + "Algorithm_REDUCE_SUM", ] + }, + "other": { + "kind": "throughput(GProblem size/s)", + "kernels": [ "Polybench_ADI", "Polybench_ATAX", "Polybench_FLOYD_WARSHALL", + "Polybench_GEMVER", "Polybench_GESUMMV", "Polybench_MVT", + "Apps_LTIMES", "Apps_LTIMES_NOVIEW", "Algorithm_SORT", + "Algorithm_SORTPAIRS", ] + }, + "launch_bound": { + "kind": "time/rep(us)", + "kernels": [ "Apps_HALOEXCHANGE", "Apps_HALOEXCHANGE_FUSED", ] + }, + } + + + +g_color_base_factor = 1.0 +g_color_lambda_factor = 0.7 +g_color_raja_factor = 0.4 +g_color_seq = normalize_color_tuple(make_color_tuple_rgb(204, 119, 34)) # ocre +g_color_omp = normalize_color_tuple(make_color_tuple_rgb(0, 115, 125)) # omp teal +g_color_ompt = normalize_color_tuple(make_color_tuple_rgb(125, 10, 0)) # omp teal compliment +g_color_cuda = normalize_color_tuple(make_color_tuple_rgb(118, 185, 0)) # nvidia green +g_color_hip = normalize_color_tuple(make_color_tuple_rgb(237, 28, 36)) # amd red +g_known_variants = {"Base_Seq" : {"color": color_mul(g_color_seq, g_color_base_factor)}, + "Lambda_Seq" : {"color": color_mul(g_color_seq, g_color_lambda_factor)}, + "RAJA_Seq" : {"color": color_mul(g_color_seq, g_color_raja_factor)}, + + "Base_OpenMP" : {"color": color_mul(g_color_omp, g_color_base_factor)}, + "Lambda_OpenMP" : {"color": color_mul(g_color_omp, g_color_lambda_factor)}, + "RAJA_OpenMP" : {"color": color_mul(g_color_omp, g_color_raja_factor)}, + + "Base_OpenMPTarget" : {"color": color_mul(g_color_ompt, g_color_base_factor)}, + "Lambda_OpenMPTarget": {"color": color_mul(g_color_ompt, g_color_lambda_factor)}, + "RAJA_OpenMPTarget" : {"color": color_mul(g_color_ompt, g_color_raja_factor)}, + + "Base_CUDA" : {"color": color_mul(g_color_cuda, g_color_base_factor)}, + "Lambda_CUDA" : {"color": color_mul(g_color_cuda, g_color_lambda_factor)}, + "RAJA_CUDA" : {"color": color_mul(g_color_cuda, g_color_raja_factor)}, + + "Base_HIP" : {"color": color_mul(g_color_hip, g_color_base_factor)}, + "Lambda_HIP" : {"color": color_mul(g_color_hip, g_color_lambda_factor)}, + "RAJA_HIP" : {"color": color_mul(g_color_hip, g_color_raja_factor)} + } +g_known_tunings = {"default" : {"format": "-"}, + "block_25" : {"format": "-"}, + "block_32" : {"format": ":"}, + "block_64" : {"format": "-."}, + "block_128" : {"format": "--"}, + "block_256" : {"format": "-"}, + "block_512" : {"format": "-."}, + "block_1024": {"format": "-"}, + "cub" : {"format": ":"}, + "library" : {"format": "-"}, + "rocprim" : {"format": ":"} + } +g_markers = ["o", "s", "+", "x", "*", "d", "h", "p", "8"] + +# reformat or color series +# formatted as series_name: dictionary of "color": color, "format": format +g_series_reformat = {} + + + +def first(vals): + return vals[0] + + +def last(vals): + return vals[len(vals) - 1] + + +def sum(vals): + sum_val = 0 + for val in vals: + sum_val += val + return sum_val + + +def avg(vals): + return sum(vals) / len(vals) + + +def stddev(vals): + avg_val = avg(vals) + stddev_val = 0 + for val in vals: + stddev_val += (val - avg_val) * (val - avg_val) + stddev_val /= len(vals) + stddev_val = math.sqrt(stddev_val) + return stddev_val + + +def relstddev(vals): + avg_val = avg(vals) + stddev_val = 0 + for val in vals: + stddev_val += (val - avg_val) * (val - avg_val) + stddev_val /= len(vals) + stddev_val = math.sqrt(stddev_val) + return stddev_val / abs(avg_val) + + +# returns (intercept, slope, correlation_coefficient) +def linearRegression_helper(n, xsum, ysum, x2sum, y2sum, xysum): + assert (n > 0) + if n == 1: + slope = 0.0 + intercept = ysum + correlation_coefficient = 1.0 + else: + slope = (n * xysum - xsum * ysum) / ((n * x2sum - xsum * xsum) + 1e-80) + intercept = (ysum - slope * xsum) / n + correlation_coefficient = (n * xysum - xsum * ysum) / ( + math.sqrt((n * x2sum - xsum * xsum) * (n * y2sum - ysum * ysum)) + 1e-80) + return (intercept, slope, correlation_coefficient) + + +# returns (intercept, slope, correlation_coefficient) +def linearRegression(yvals, xvals): + assert (len(xvals) == len(yvals)) + n = len(xvals) + xsum = sum(xvals) + ysum = sum(yvals) + x2sum = sum([x * x for x in xvals]) + y2sum = sum([y * y for y in yvals]) + xysum = sum([xvals[i] * yvals[i] for i in range(0, n)]) + return linearRegression_helper(n, xsum, ysum, x2sum, y2sum, xysum) + + +def eval_linearRegression(lr_vals, xval): + return lr_vals[0] + lr_vals[1] * xval + + +# returns (intercept, slope, correlation_coefficient) +def linearRegression_loglog(yvals, xvals): + assert (len(xvals) == len(yvals)) + xlogvals = [math.log(x, 2) for x in xvals] + ylogvals = [math.log(y, 2) for y in yvals] + return linearRegression(ylogvals, xlogvals) + + +def eval_linearRegression_loglog(lr_vals, xval): + return math.pow(2, lr_vals[0]) * math.pow(xval, lr_vals[1]) + + +# returns (intercept, slope, correlation_coefficient) +def segmented_linearRegression_partialRegression(i, n, xvals, yvals, sums, LR): + sums[0] += xvals[i] + sums[1] += yvals[i] + sums[2] += xvals[i] * xvals[i] + sums[3] += yvals[i] * yvals[i] + sums[4] += xvals[i] * yvals[i] + xsum = sums[0] + ysum = sums[1] + x2sum = sums[2] + y2sum = sums[3] + xysum = sums[4] + LR[i] = linearRegression_helper(n, xsum, ysum, x2sum, y2sum, xysum) + + +# returns ([break points...], [linear regressions...], correlation_coefficient) +def segmented_linearRegression_helper(ret, i, n, xvals, yvals, denom, LR_left, LR_right): + lr_vals_left = None + lr_vals_right = None + break_point = None + + if i == 0: + lr_vals_right = LR_right[i] + break_point = xvals[i] - 1.0 + elif i > 0 and i < n: + lr_vals_left = LR_left[i - 1] + lr_vals_right = LR_right[i] + break_point = (xvals[i - 1] + xvals[i]) / 2.0 + elif i == n: + lr_vals_left = LR_left[i - 1] + break_point = xvals[i - 1] + 1.0 + else: + assert (0) + + numer = 0.0 + for j in range(0, n): + xval = xvals[j] + yval = yvals[j] + lr_vals = None + if xval < break_point: + lr_vals = lr_vals_left + else: + lr_vals = lr_vals_right + lr_yval = eval_linearRegression(lr_vals, xval) + numer += (yval - lr_yval) * (yval - lr_yval) + + correlation_coefficient = 1.0 - numer / denom + if correlation_coefficient > ret[2]: + ret[0] = [break_point, ] + ret[1] = [lr_vals_left, lr_vals_right, ] + ret[2] = correlation_coefficient + + +# returns ([break points...], [linear regressions...], correlation_coefficient) +def segmented_linearRegression(yvals, xvals): + assert (len(xvals) == len(yvals)) + N = len(xvals) + + LR_left = [] + LR_right = [] + for i in range(0, N): + LR_left.append(None) + LR_right.append(None) + + sums = [0.0, 0.0, 0.0, 0.0, 0.0] + for ii in range(0, N): + i = N - ii - 1 + n = ii + 1 + segmented_linearRegression_partialRegression(i, n, xvals, yvals, sums, LR_right) + + sums = [0.0, 0.0, 0.0, 0.0, 0.0] + for i in range(0, N): + n = i + 1 + segmented_linearRegression_partialRegression(i, n, xvals, yvals, sums, LR_left) + + yavg = avg(yvals) + denom = sum([(y - yavg) * (y - yavg) for y in yvals]) + ret = [[], [], -math.inf] + for i in range(0, N + 1): + segmented_linearRegression_helper(ret, i, N, xvals, yvals, denom, LR_left, LR_right) + + return (*ret,) + + +def find_segment(break_points, xval): + break_i = len(break_points) + for i in range(0, len(break_points)): + break_point = break_points[i] + if xval < break_point: + break_i = i + break + return break_i + + +def eval_segmented_linearRegression(slr_vals, xval): + break_i = find_segment(slr_vals[0], xval) + return eval_linearRegression(slr_vals[1][break_i], xval) + + +# returns ([break points...], [linear regressions...], correlation_coefficient) +def segmented_linearRegression_loglog(yvals, xvals): + assert (len(xvals) == len(yvals)) + xlogvals = [math.log(x, 2) for x in xvals] + ylogvals = [math.log(y, 2) for y in yvals] + return segmented_linearRegression(ylogvals, xlogvals) + + +def eval_segmented_linearRegression_loglog(slr_vals, xval): + break_i = find_segment(slr_vals[0], math.log(xval, 2)) + return eval_linearRegression_loglog(slr_vals[1][break_i], xval) + + +class Data: + num_sweeps = 0 + sweeps = {} + sweep_markers = {} + exclude_sweeps = {} + + num_run_sizes = 0 + run_sizes = {} + + include_kernel_groups = {} + exclude_kernel_groups = {} + + num_kernels = 0 + kernels = {} + include_kernels = {} + exclude_kernels = {} + + num_variants = 0 + variants = {} + variant_colors = {} + include_variants = {} + exclude_variants = {} + + num_tunings = 0 + tunings = {} + tuning_formats = {} + include_tunings = {} + exclude_tunings = {} + + def add_sweep(sweep_name): + sweep_index = Data.num_sweeps + Data.num_sweeps += 1 + Data.sweeps[sweep_name] = sweep_index + Data.sweeps[sweep_index] = sweep_name + + def add_run_size(run_size_name): + run_size_index = Data.num_run_sizes + Data.num_run_sizes += 1 + Data.run_sizes[run_size_name] = run_size_index + Data.run_sizes[run_size_index] = run_size_name + + def add_kernel(kernel_name): + kernel_index = Data.num_kernels + Data.num_kernels += 1 + Data.kernels[kernel_name] = kernel_index + Data.kernels[kernel_index] = kernel_name + + def add_variant(variant_name): + variant_index = Data.num_variants + Data.num_variants += 1 + Data.variants[variant_name] = variant_index + Data.variants[variant_index] = variant_name + if variant_name in g_known_variants: + variant_color = g_known_variants[variant_name]["color"] + Data.variant_colors[variant_name] = variant_color + Data.variant_colors[variant_index] = variant_color + else: + print("Unknown variant {0}".format(variant_name)) + sys.exit(1) + + def add_tuning(tuning_name): + tuning_index = Data.num_tunings + Data.num_tunings += 1 + Data.tunings[tuning_name] = tuning_index + Data.tunings[tuning_index] = tuning_name + if tuning_name in g_known_tunings: + tuning_format = g_known_tunings[tuning_name]["format"] + Data.tuning_formats[tuning_name] = tuning_format + Data.tuning_formats[tuning_index] = tuning_format + else: + print("Unknown tuning {0}".format(tuning_name)) + sys.exit(1) + + num_axes = 5 + axes = {"sweep_dir_name": 0, 0: "sweep_dir_name", + "run_size" : 1, 1: "run_size", + "kernel_index" : 2, 2: "kernel_index", + "variant_index" : 3, 3: "variant_index", + "tuning_index" : 4, 4: "tuning_index", } + + def get_axis_name(axis_index): + if axis_index in Data.axes: + return Data.axes[axis_index] + else: + raise NameError("Unknown axis index {}".format(axis_index)) + + def get_index_name(axis_index, index): + if axis_index == Data.axes["sweep_dir_name"]: + return Data.sweeps[index] + elif axis_index == Data.axes["run_size"]: + return Data.run_sizes[index] + elif axis_index == Data.axes["kernel_index"]: + return Data.kernels[index] + elif axis_index == Data.axes["variant_index"]: + return Data.variants[index] + elif axis_index == Data.axes["tuning_index"]: + return Data.tunings[index] + else: + raise NameError("Unknown axis index {}".format(axis_index)) + + def get_axis_index_str(axis_index, index): + return "{}:{}".format(Data.get_axis_name(axis_index), Data.get_index_name(axis_index, index)) + + def get_axes_index_str(axes_index): + name = "{" + for axis_index, index in axes_index.items(): + name = "{}{},".format(name, Data.get_axis_index_str(axis_index, index)) + return "{}}}".format(name) + + def get_axis_index(axis_name, index_name): + if axis_name == "sweep_dir_name": + return {Data.axes[axis_name]: Data.sweeps[index_name], } + elif axis_name == "run_size": + return {Data.axes[axis_name]: Data.run_sizes[index_name], } + elif axis_name == "kernel_index": + return {Data.axes[axis_name]: Data.kernels[index_name], } + elif axis_name == "variant_index": + return {Data.axes[axis_name]: Data.variants[index_name], } + elif axis_name == "tuning_index": + return {Data.axes[axis_name]: Data.tunings[index_name], } + else: + raise NameError("Unknown axis name {}".format(axis_name)) + + def axes_difference(axes, partial_axes_index): + new_axes = [] + for axis_index in axes: + if not axis_index in partial_axes_index: + new_axes.append(axis_index) + return new_axes + + # multi-dimensional array structured like this + # directory name - platform, compiler, etc + # run size - problem size, for run_sizes + # kernel index - for kernels + info_axes = [axes["sweep_dir_name"], + axes["run_size"], + axes["kernel_index"], ] + + # multi-dimensional array structured like this + # directory name - platform, compiler, etc + # run size - problem size, for run_sizes + # kernel index - for kernels + # variant index - for variants + # tuning index - for tunings + data_axes = [axes["sweep_dir_name"], + axes["run_size"], + axes["kernel_index"], + axes["variant_index"], + axes["tuning_index"], ] + + # multi-dimensional array structured like data but missing some dimensions + # directory name - platform, compiler, etc + # kernel index - for kernels + # variant index - for variants + # tuning index - for tunings + run_size_reduced_axes = [axes["sweep_dir_name"], + axes["kernel_index"], + axes["variant_index"], + axes["tuning_index"], ] + + data_model_kind = "time(s)" + + def MultiAxesTreeKeyGenerator0(data_tree): + assert (len(data_tree.axes) == 0) + if False: + yield {} + + def MultiAxesTreeKeyGenerator1(data_tree): + assert (len(data_tree.axes) == 1) + assert (data_tree.data) + for k0 in data_tree.data.keys(): + yield {data_tree.axes[0]: k0, } + + def MultiAxesTreeKeyGenerator2(data_tree): + assert (len(data_tree.axes) == 2) + assert (data_tree.data) + for k0, v0 in data_tree.data.items(): + for k1 in v0.keys(): + yield {data_tree.axes[0]: k0, + data_tree.axes[1]: k1, } + + def MultiAxesTreeKeyGenerator3(data_tree): + assert (len(data_tree.axes) == 3) + assert (data_tree.data) + for k0, v0 in data_tree.data.items(): + for k1, v1 in v0.items(): + for k2 in v1.keys(): + yield {data_tree.axes[0]: k0, + data_tree.axes[1]: k1, + data_tree.axes[2]: k2, } + + def MultiAxesTreeKeyGenerator4(data_tree): + assert (len(data_tree.axes) == 4) + assert (data_tree.data) + for k0, v0 in data_tree.data.items(): + for k1, v1 in v0.items(): + for k2, v2 in v1.items(): + for k3 in v2.keys(): + yield {data_tree.axes[0]: k0, + data_tree.axes[1]: k1, + data_tree.axes[2]: k2, + data_tree.axes[3]: k3, } + + def MultiAxesTreeKeyGenerator5(data_tree): + assert (len(data_tree.axes) == 5) + assert (data_tree.data) + for k0, v0 in data_tree.data.items(): + for k1, v1 in v0.items(): + for k2, v2 in v1.items(): + for k3, v3 in v2.items(): + for k4 in v3.keys(): + yield {data_tree.axes[0]: k0, + data_tree.axes[1]: k1, + data_tree.axes[2]: k2, + data_tree.axes[3]: k3, + data_tree.axes[4]: k4, } + + def MultiAxesTreeItemGenerator0(data_tree): + assert (len(data_tree.axes) == 0) + if False: + yield ({}, None,) + + def MultiAxesTreeItemGenerator1(data_tree): + assert (len(data_tree.axes) == 1) + assert (data_tree.data) + for k0, v0 in data_tree.data.items(): + yield ({data_tree.axes[0]: k0, }, v0,) + + def MultiAxesTreeItemGenerator2(data_tree): + assert (len(data_tree.axes) == 2) + assert (data_tree.data) + for k0, v0 in data_tree.data.items(): + for k1, v1 in v0.items(): + yield ({data_tree.axes[0]: k0, + data_tree.axes[1]: k1, }, v1,) + + def MultiAxesTreeItemGenerator3(data_tree): + assert (len(data_tree.axes) == 3) + assert (data_tree.data) + for k0, v0 in data_tree.data.items(): + for k1, v1 in v0.items(): + for k2, v2 in v1.items(): + yield ({data_tree.axes[0]: k0, + data_tree.axes[1]: k1, + data_tree.axes[2]: k2, }, v2,) + + def MultiAxesTreeItemGenerator4(data_tree): + assert (len(data_tree.axes) == 4) + assert (data_tree.data) + for k0, v0 in data_tree.data.items(): + for k1, v1 in v0.items(): + for k2, v2 in v1.items(): + for k3, v3 in v2.items(): + yield ({data_tree.axes[0]: k0, + data_tree.axes[1]: k1, + data_tree.axes[2]: k2, + data_tree.axes[3]: k3, }, v3,) + + def MultiAxesTreeItemGenerator5(data_tree): + assert (len(data_tree.axes) == 5) + assert (data_tree.data) + + for k0, v0 in data_tree.data.items(): + for k1, v1 in v0.items(): + for k2, v2 in v1.items(): + for k3, v3 in v2.items(): + for k4, v4 in v3.items(): + yield ({data_tree.axes[0]: k0, + data_tree.axes[1]: k1, + data_tree.axes[2]: k2, + data_tree.axes[3]: k3, + data_tree.axes[4]: k4, }, v4,) + + def MultiAxesTreePartialItemGenerator_helper(data_tree, partial_axes_index, + axes_index, leftover_axes_index, + val, depth): + if data_tree.axes[depth] in partial_axes_index: + key = partial_axes_index[data_tree.axes[depth]] + if key in val: + val = val[key] + axes_index[data_tree.axes[depth]] = key + if depth + 1 == len(data_tree.axes): + yield (axes_index.copy(), leftover_axes_index.copy(), val,) + else: + gen = Data.MultiAxesTreePartialItemGenerator_helper(data_tree, partial_axes_index, + axes_index, leftover_axes_index, val, depth + 1) + for yld in gen: + yield yld + else: + # print(data_tree, partial_axes_index, + # axes_index, leftover_axes_index, + # key, val, depth) + raise NameError("invalid index {} {}".format(Data.get_axes_index_str(axes_index), + Data.get_axis_index_str(data_tree.axes[depth], key))) + else: + for key, val in val.items(): + axes_index[data_tree.axes[depth]] = key + leftover_axes_index[data_tree.axes[depth]] = key + if depth + 1 == len(data_tree.axes): + yield (axes_index.copy(), leftover_axes_index.copy(), val,) + else: + gen = Data.MultiAxesTreePartialItemGenerator_helper(data_tree, partial_axes_index, + axes_index, leftover_axes_index, val, depth + 1) + for yld in gen: + yield yld + + def MultiAxesTreePartialItemGenerator0(data_tree, partial_axes_index): + assert (len(data_tree.axes) == 0) + if False: + yield ({}, None,) + + def MultiAxesTreePartialItemGenerator1(data_tree, partial_axes_index): + assert (len(data_tree.axes) == 1) + assert (data_tree.data) + return Data.MultiAxesTreePartialItemGenerator_helper(data_tree, partial_axes_index, {}, {}, data_tree.data, 0) + + def MultiAxesTreePartialItemGenerator2(data_tree, partial_axes_index): + assert (len(data_tree.axes) == 2) + assert (data_tree.data) + return Data.MultiAxesTreePartialItemGenerator_helper(data_tree, partial_axes_index, {}, {}, data_tree.data, 0) + + def MultiAxesTreePartialItemGenerator3(data_tree, partial_axes_index): + assert (len(data_tree.axes) == 3) + assert (data_tree.data) + return Data.MultiAxesTreePartialItemGenerator_helper(data_tree, partial_axes_index, {}, {}, data_tree.data, 0) + + def MultiAxesTreePartialItemGenerator4(data_tree, partial_axes_index): + assert (len(data_tree.axes) == 4) + assert (data_tree.data) + return Data.MultiAxesTreePartialItemGenerator_helper(data_tree, partial_axes_index, {}, {}, data_tree.data, 0) + + def MultiAxesTreePartialItemGenerator5(data_tree, partial_axes_index): + assert (len(data_tree.axes) == 5) + assert (data_tree.data) + return Data.MultiAxesTreePartialItemGenerator_helper(data_tree, partial_axes_index, {}, {}, data_tree.data, 0) + + class MultiAxesTree: + # axes is an array of axis_indices in the depth order they occur in the tree + # indices is a dictionary of axis_indices to indices + + def __init__(self, axes): + assert (axes) + self.axes = axes + self.data = {} + + def check(self, axes_index): + data = self.data + for axis_index in self.axes: + if not axis_index in axes_index: + axis_name = Data.axes[axis_index] + raise NameError("Missing axis {}".format(axis_name)) + index = axes_index[axis_index] + if not index in data: + return False + data = data[index] + return True + + def get(self, axes_index): + data = self.data + for axis_index in self.axes: + if not axis_index in axes_index: + axis_name = Data.axes[axis_index] + raise NameError("Missing axis {}".format(axis_name)) + index = axes_index[axis_index] + if not index in data: + raise NameError("Missing index {}".format(index)) + data = data[index] + return data + + def set(self, axes_index, val): + data = self.data + for i in range(0, len(self.axes) - 1): + axis_index = self.axes[i] + if not axis_index in axes_index: + axis_name = Data.axes[axis_index] + raise NameError("Missing axis {}".format(axis_name)) + index = axes_index[axis_index] + if not index in data: + data[index] = {} + data = data[index] + axis_index = self.axes[len(self.axes) - 1] + if not axis_index in axes_index: + axis_name = Data.axes[axis_index] + raise NameError("Missing axis {}".format(axis_name)) + index = axes_index[axis_index] + data[index] = val + + def indexName(self, axes_index): + name = "" + for axis_index, index in axes_index.items(): + if name: + name = "{} {}".format(name, Data.get_index_name(axis_index, index)) + else: + name = Data.get_index_name(axis_index, index) + return name + + def axesString(self): + axes_names = "" + for axis_index in self.axes: + if axes_names: + axes_names = "{}, {}".format(axes_names, Data.axes[axis_index]) + else: + axes_names = "[{}".format(Data.axes[axis_index]) + return "{}]".format(axes_names) + + def dataString(self, compact=True): + if compact: + buf = self.axesString() + "\n" + else: + buf = "" + for item in self.items(): + keys, value = item + #print(str(keys)) + if compact: + buf += str(item) + "\n" + else: + buf += "(" + Data.get_axes_index_str(keys) + "," + str(value) + ")\n" + return buf + + def __repr__(self): + return "MultiAxesTree({}):\n{}".format(self.axesString(), self.dataString()) + + def __str__(self): + return "MultiAxesTree({})".format(self.axesString()) + + def keys(self): + assert (self.data != None) + if len(self.axes) == 0: + return Data.MultiAxesTreeKeyGenerator0(self) + elif len(self.axes) == 1: + return Data.MultiAxesTreeKeyGenerator1(self) + elif len(self.axes) == 2: + return Data.MultiAxesTreeKeyGenerator2(self) + elif len(self.axes) == 3: + return Data.MultiAxesTreeKeyGenerator3(self) + elif len(self.axes) == 4: + return Data.MultiAxesTreeKeyGenerator4(self) + elif len(self.axes) == 5: + return Data.MultiAxesTreeKeyGenerator5(self) + else: + raise ValueError + + def items(self): + assert (self.data != None) + if len(self.axes) == 0: + return Data.MultiAxesTreeItemGenerator0(self) + elif len(self.axes) == 1: + return Data.MultiAxesTreeItemGenerator1(self) + elif len(self.axes) == 2: + return Data.MultiAxesTreeItemGenerator2(self) + elif len(self.axes) == 3: + return Data.MultiAxesTreeItemGenerator3(self) + elif len(self.axes) == 4: + return Data.MultiAxesTreeItemGenerator4(self) + elif len(self.axes) == 5: + return Data.MultiAxesTreeItemGenerator5(self) + else: + raise ValueError + + def partial_match_items(self, partial_axes_index): + assert (self.data != None) + num_matching_indices = 0 + for axis_index in self.axes: + if axis_index in partial_axes_index: + num_matching_indices += 1 + assert (num_matching_indices == len(partial_axes_index)) + if len(self.axes) == 0: + return Data.MultiAxesTreePartialItemGenerator0(self, partial_axes_index) + elif len(self.axes) == 1: + return Data.MultiAxesTreePartialItemGenerator1(self, partial_axes_index) + elif len(self.axes) == 2: + return Data.MultiAxesTreePartialItemGenerator2(self, partial_axes_index) + elif len(self.axes) == 3: + return Data.MultiAxesTreePartialItemGenerator3(self, partial_axes_index) + elif len(self.axes) == 4: + return Data.MultiAxesTreePartialItemGenerator4(self, partial_axes_index) + elif len(self.axes) == 5: + return Data.MultiAxesTreePartialItemGenerator5(self, partial_axes_index) + else: + raise ValueError + + def __iter__(self): + return self.keys() + + class DataTree: + + def __init__(self, kind, label, model_kind=None, axes=None, args=None, func=None): + #print("DataTree init:"+str(kind)+ ' ' + str(label) + ' ' + str(args)) + self.kind = kind + self.label = label + self.axes = axes + self.args = args + self.func = func + self.model_kind = model_kind + if not self.model_kind and self.args: + self.model_kind = self.args[0] + self.data = None + + def makeData(self, axes=None): + if not self.axes: + if axes: + self.axes = axes + elif self.model_kind and self.model_kind in Data.kinds: + self.axes = Data.kinds[self.model_kind].axes + assert (self.axes) + self.data = Data.MultiAxesTree(self.axes) + + def hasAxes(self, other_axes): + for axis_index in other_axes: + if not axis_index in self.axes: + return False + return True + + def sameAxes(self, other_axes): + if len(self.axes) != len(other_axes): + return False + self.hasAxes(other_axes) + + def missingAxes(self, other_axes): + for axis_index in other_axes: + if not axis_index in self.axes: + return True + return False + + def check(self, axes_index): + return self.data.check(axes_index) + + def get(self, axes_index): + return self.data.get(axes_index) + + def set(self, axes_index, val): + return self.data.set(axes_index, val) + + def keys(self): + return self.data.keys() + + def items(self): + return self.data.items() + + def partial_match_items(self, partial_axes_index): + return self.data.partial_match_items(partial_axes_index) + + def __iter__(self): + return iter(self.data) + + def indexName(self, axes_index): + return self.data.indexName(axes_index) + + def axesString(self): + return self.data.axesString() + + def dataString(self, compact=True): + return self.data.dataString(compact) + + def __repr__(self): + return "DataTree({} {} {}):\n{}".format(self.kind, self.label, self.axesString(), self.dataString()) + + def __str__(self): + return "DataTree({} {} {})".format(self.kind, self.label, self.axesString()) + + class DataTreeTemplate: + + def __init__(self, kind_template, label_template, + combined_axis=None, model_kind=None, args=None, func=None): + self.kind_template = kind_template + self.label_template = label_template + self.combined_axis_template = combined_axis + self.model_kind_template = model_kind + self.arg_templates = args + self.func = func + + def getKind(self, template_args): + return self.kind_template.format(*template_args) + + def getLabel(self, template_args): + arg_labels = [arg_kind in Data.kinds and Data.kinds[arg_kind].label or None for arg_kind in template_args] + return self.label_template.format(*arg_labels) + + def getArgs(self, template_args): + return [arg.format(*template_args) for arg in self.arg_templates] + + def getCombinedAxis(self, template_args): + return self.combined_axis_template.format(*template_args) + + def getModelKind(self, args, template_args): + assert (len(args) > 0) + model_kind = None + # choose model_kind with most axes + for kind in args: + if kind in Data.kinds: + if not model_kind: + model_kind = kind + elif len(Data.kinds[kind].axes) > len(Data.kinds[model_kind].axes): + model_kind = kind + # use chosen model_kind + if self.model_kind_template: + model_kind = self.model_kind_template.format(*template_args) + assert (model_kind) + return model_kind + + def getAxes(self, model_kind, template_args): + model_axes = Data.kinds[model_kind].axes + combined_axis_index = None + if self.combined_axis_template: + combined_axis_name = self.getCombinedAxis(template_args) + combined_axis_index = Data.axes[combined_axis_name] + axes = [] + for axis_index in model_axes: + if axis_index != combined_axis_index: + axes.append(axis_index) + return axes + + def makeDataTree(self, template_args): + kind = self.getKind(template_args) + label = self.getLabel(template_args) + args = self.getArgs(template_args) + model_kind = self.getModelKind(args, template_args) + axes = self.getAxes(model_kind, template_args) + return Data.DataTree(kind, label, model_kind=model_kind, axes=axes, args=args, func=self.func) + + +# has info derivable from first kind "time(s)" which is read from files + kinds = {"Problem size" : DataTree("Problem size", "Problem size", axes=info_axes), + "Reps" : DataTree("Reps", "Reps", axes=info_axes), + "Iterations/rep" : DataTree("Iterations/rep", "Iterations", axes=info_axes), + "Kernels/rep" : DataTree("Kernels/rep", "Kernels", axes=info_axes), + "Bytes/rep" : DataTree("Bytes/rep", "Bytes", axes=info_axes), + "FLOPS/rep" : DataTree("FLOPS/rep", "FLOPS", axes=info_axes), + + "time(s)" : DataTree("time(s)", "time(s)", axes=data_axes), + + "time(ms)" : DataTree("time(ms)", "time(ms)", args=["time(s)"], func=lambda t: t * 1000.0), + "time(us)" : DataTree("time(us)", "time(us)", args=["time(s)"], + func=lambda t: t * 1000000.0), + "time(ns)" : DataTree("time(ns)", "time(ns)", args=["time(s)"], + func=lambda t: t * 1000000000.0), + + "time/rep(s)" : DataTree("time/rep(s)", "time(s)", args=["time(s)", "Reps"], + func=lambda t, r: t / r), + "time/rep(ms)" : DataTree("time/rep(ms)", "time(ms)", args=["time/rep(s)"], + func=lambda tpr: tpr * 1000.0), + "time/rep(us)" : DataTree("time/rep(us)", "time(us)", args=["time/rep(s)"], + func=lambda tpr: tpr * 1000000.0), + "time/rep(ns)" : DataTree("time/rep(ns)", "time(ns)", args=["time/rep(s)"], + func=lambda tpr: tpr * 1000000000.0), + + "time/it(s)" : DataTree("time/it(s)", "time(s)", args=["time/rep(s)", "Iterations/rep"], + func=lambda tpr, ipr: tpr / ipr), + "time/it(ms)" : DataTree("time/it(ms)", "time(ms)", args=["time/it(s)"], + func=lambda tpi: tpi * 1000.0), + "time/it(us)" : DataTree("time/it(us)", "time(us)", args=["time/it(s)"], + func=lambda tpi: tpi * 1000000.0), + "time/it(ns)" : DataTree("time/it(ns)", "time(ns)", args=["time/it(s)"], + func=lambda tpi: tpi * 1000000000.0), + + "time/kernel(s)" : DataTree("time/kernel(s)", "time(s)", args=["time/rep(s)", "Kernels/rep"], + func=lambda tpr, kpr: tpr / kpr), + "time/kernel(ms)" : DataTree("time/kernel(ms)", "time(ms)", args=["time/kernel(s)"], + func=lambda tpk: tpk * 1000.0), + "time/kernel(us)" : DataTree("time/kernel(us)", "time(us)", args=["time/kernel(s)"], + func=lambda tpk: tpk * 1000000.0), + "time/kernel(ns)" : DataTree("time/kernel(ns)", "time(ns)", args=["time/kernel(s)"], + func=lambda tpk: tpk * 1000000000.0), + + "throughput(Problem size/s)" : DataTree("throughput(Problem size/s)", "throughput(Problem size/s)", + args=["time/rep(s)", "Problem size"], func=lambda tpr, ps: ps / tpr), + "throughput(Problem size/ms)": DataTree("throughput(Problem size/ms)", "throughput(Problem size/ms)", + args=["throughput(Problem size/s)"], func=lambda thr: thr / 1000.0), + "throughput(Problem size/us)": DataTree("throughput(Problem size/us)", "throughput(Problem size/us)", + args=["throughput(Problem size/s)"], func=lambda thr: thr / 1000000.0), + "throughput(Problem size/ns)": DataTree("throughput(Problem size/ns)", "throughput(Problem size/ns)", + args=["throughput(Problem size/s)"], + func=lambda thr: thr / 1000000000.0), + "throughput(KProblem size/s)": DataTree("throughput(KProblem size/s)", "throughput(KProblem size/s)", + args=["throughput(Problem size/s)"], func=lambda thr: thr / 1000.0), + "throughput(MProblem size/s)": DataTree("throughput(MProblem size/s)", "throughput(MProblem size/s)", + args=["throughput(Problem size/s)"], func=lambda thr: thr / 1000000.0), + "throughput(GProblem size/s)": DataTree("throughput(GProblem size/s)", "throughput(GProblem size/s)", + args=["throughput(Problem size/s)"], + func=lambda thr: thr / 1000000000.0), + "throughput(TProblem size/s)": DataTree("throughput(TProblem size/s)", "throughput(TProblem size/s)", + args=["throughput(Problem size/s)"], + func=lambda thr: thr / 1000000000000.0), + + "bandwidth(B/s)" : DataTree("bandwidth(B/s)", "bandwidth(B/s)", args=["time/rep(s)", "Bytes/rep"], + func=lambda tpr, bpr: bpr / tpr), + "bandwidth(KB/s)" : DataTree("bandwidth(KB/s)", "bandwidth(KB/s)", args=["bandwidth(B/s)"], + func=lambda bps: bps / 1000.0), + "bandwidth(MB/s)" : DataTree("bandwidth(MB/s)", "bandwidth(MB/s)", args=["bandwidth(B/s)"], + func=lambda bps: bps / 1000000.0), + "bandwidth(GB/s)" : DataTree("bandwidth(GB/s)", "bandwidth(GB/s)", args=["bandwidth(B/s)"], + func=lambda bps: bps / 1000000000.0), + "bandwidth(TB/s)" : DataTree("bandwidth(TB/s)", "bandwidth(TB/s)", args=["bandwidth(B/s)"], + func=lambda bps: bps / 1000000000000.0), + "bandwidth(KiB/s)" : DataTree("bandwidth(KiB/s)", "bandwidth(KiB/s)", args=["bandwidth(B/s)"], + func=lambda bps: bps / 1024.0), + "bandwidth(MiB/s)" : DataTree("bandwidth(MiB/s)", "bandwidth(MiB/s)", args=["bandwidth(B/s)"], + func=lambda bps: bps / 1048576.0), + "bandwidth(GiB/s)" : DataTree("bandwidth(GiB/s)", "bandwidth(GiB/s)", args=["bandwidth(B/s)"], + func=lambda bps: bps / 1073741824.0), + "bandwidth(TiB/s)" : DataTree("bandwidth(TiB/s)", "bandwidth(TiB/s)", args=["bandwidth(B/s)"], + func=lambda bps: bps / 1099511627776.0), + + "FLOPS" : DataTree("FLOPS", "FLOPS", args=["time/rep(s)", "FLOPS/rep"], + func=lambda tpr, fpr: fpr / tpr), + "KFLOPS" : DataTree("KFLOPS", "KFLOPS", args=["FLOPS"], func=lambda fps: fps / 1000.0), + "MFLOPS" : DataTree("MFLOPS", "MFLOPS", args=["FLOPS"], func=lambda fps: fps / 1000000.0), + "GFLOPS" : DataTree("GFLOPS", "GFLOPS", args=["FLOPS"], + func=lambda fps: fps / 1000000000.0), + "TFLOPS" : DataTree("TFLOPS", "TFLOPS", args=["FLOPS"], + func=lambda fps: fps / 1000000000000.0), + + } + + kind_templates = { + "log10" : DataTreeTemplate("log10<{0}>", "log10({0})", args=["{0}", ], + func=lambda val: math.log(val, 10)), + "log2" : DataTreeTemplate("log2<{0}>", "log2({0})", args=["{0}", ], + func=lambda val: math.log(val, 2)), + "ln" : DataTreeTemplate("ln<{0}>", "ln({0})", args=["{0}", ], + func=lambda val: math.log(val)), + + "add" : DataTreeTemplate("add<{0},{1}>", "{0} + {1}", args=["{0}", "{1}"], + func=lambda lhs, rhs: lhs + rhs), + "sub" : DataTreeTemplate("sub<{0},{1}>", "{0} - {1}", args=["{0}", "{1}"], + func=lambda lhs, rhs: lhs - rhs), + "mul" : DataTreeTemplate("mul<{0},{1}>", "{0} * {1}", args=["{0}", "{1}"], + func=lambda lhs, rhs: lhs * rhs), + "div" : DataTreeTemplate("div<{0},{1}>", "{0} / {1}", args=["{0}", "{1}"], + func=lambda lhs, rhs: lhs / rhs), + + "first" : DataTreeTemplate("first<{0},{1}>", "{0}", combined_axis="{1}", args=["{0}"], + func=first), + "last" : DataTreeTemplate("last<{0},{1}>", "{0}", combined_axis="{1}", args=["{0}"], + func=last), + "min" : DataTreeTemplate("min<{0},{1}>", "{0}", combined_axis="{1}", args=["{0}"], + func=min), + "max" : DataTreeTemplate("max<{0},{1}>", "{0}", combined_axis="{1}", args=["{0}"], + func=max), + "sum" : DataTreeTemplate("sum<{0},{1}>", "{0}", combined_axis="{1}", args=["{0}"], + func=sum), + "avg" : DataTreeTemplate("avg<{0},{1}>", "{0}", combined_axis="{1}", args=["{0}"], + func=avg), + "stddev" : DataTreeTemplate("stddev<{0},{1}>", "{0}", combined_axis="{1}", args=["{0}"], + func=stddev), + "relstddev" : DataTreeTemplate("relstddev<{0},{1}>", "{0}", combined_axis="{1}", args=["{0}"], + func=relstddev), + + "_LR" : DataTreeTemplate("_LR<{0}>", "intercept, slope, correlation coefficient", + combined_axis="run_size", args=["{0}", "Problem size"], + func=linearRegression), + "LR_intercept" : DataTreeTemplate("LR_intercept<{0}>", "intercept", args=["_LR<{0}>"], + func=lambda lr: lr[0]), + "LR_slope" : DataTreeTemplate("LR_slope<{0}>", "slope", args=["_LR<{0}>"], + func=lambda lr: lr[1]), + "LR_correlationCoefficient" : DataTreeTemplate("LR_correlationCoefficient<{0}>", "correlation coefficient", + args=["_LR<{0}>"], func=lambda lr: lr[2]), + "LR" : DataTreeTemplate("LR<{0}>", "{0}", model_kind="{0}", + args=["_LR<{0}>", "Problem size"], func=eval_linearRegression), + + "_LR_log" : DataTreeTemplate("_LR_log<{0}>", "intercept, slope, correlation coefficient", + combined_axis="run_size", args=["{0}", "Problem size"], + func=linearRegression_loglog), + "LR_log_intercept" : DataTreeTemplate("LR_log_intercept<{0}>", "intercept", args=["_LR_log<{0}>"], + func=lambda lr: lr[0]), + "LR_log_slope" : DataTreeTemplate("LR_log_slope<{0}>", "slope", args=["_LR_log<{0}>"], + func=lambda lr: lr[1]), + "LR_log_correlationCoefficient" : DataTreeTemplate("LR_log_correlationCoefficient<{0}>", "correlation coefficient", + args=["_LR_log<{0}>"], func=lambda lr: lr[2]), + "LR_log" : DataTreeTemplate("LR_log<{0}>", "{0}", model_kind="{0}", + args=["_LR_log<{0}>", "Problem size"], + func=eval_linearRegression_loglog), + + "_LR2" : DataTreeTemplate("_LR2<{0}>", "intercept, slope, correlation coefficient", + combined_axis="run_size", args=["{0}", "Problem size"], + func=segmented_linearRegression), + "LR2_intercept" : DataTreeTemplate("LR2_intercept<{0}>", "intercept", args=["_LR2<{0}>"], + func=lambda lr: lr[0]), + "LR2_slope" : DataTreeTemplate("LR2_slope<{0}>", "slope", args=["_LR2<{0}>"], + func=lambda lr: lr[1]), + "LR2_correlationCoefficient" : DataTreeTemplate("LR2_correlationCoefficient<{0}>", "correlation coefficient", + args=["_LR2<{0}>"], func=lambda lr: lr[2]), + "LR2" : DataTreeTemplate("LR2<{0}>", "{0}", model_kind="{0}", + args=["_LR2<{0}>", "Problem size"], + func=eval_segmented_linearRegression), + + "_LR2_log" : DataTreeTemplate("_LR2_log<{0}>", "intercept, slope, correlation coefficient", + combined_axis="run_size", args=["{0}", "Problem size"], + func=segmented_linearRegression_loglog), + "LR2_log_intercept" : DataTreeTemplate("LR2_log_intercept<{0}>", "intercept", args=["_LR2_log<{0}>"], + func=lambda lr: lr[0]), + "LR2_log_slope" : DataTreeTemplate("LR2_log_slope<{0}>", "slope", args=["_LR2_log<{0}>"], + func=lambda lr: lr[1]), + "LR2_log_correlationCoefficient": DataTreeTemplate("LR2_log_correlationCoefficient<{0}>", "correlation coefficient", + args=["_LR2_log<{0}>"], func=lambda lr: lr[2]), + "LR2_log" : DataTreeTemplate("LR2_log<{0}>", "{0}", model_kind="{0}", + args=["_LR2_log<{0}>", "Problem size"], + func=eval_segmented_linearRegression_loglog), + + } + + def compute_data(kind): + if not kind in Data.kinds: + raise NameError("Unknown data kind {}".format(kind)) + + datatree = Data.kinds[kind] + if datatree.data: + return # already calculated + if not (datatree.model_kind and datatree.args and datatree.func): + raise NameError("Computing data is not supported for kind {0}".format(kind)) + + model_kind = datatree.model_kind + compute_args = datatree.args + compute_func = datatree.func + + if model_kind != kind: + Data.compute(model_kind) + + arg_datatrees = () + for arg_kind in compute_args: + # calculate data for arg_kind + Data.compute(arg_kind) + arg_datatree = Data.kinds[arg_kind] + arg_datatrees = arg_datatrees + (arg_datatree,) + + if (not model_kind in Data.kinds) or (not Data.kinds[model_kind].data): + raise NameError("Model data not available {0}, no args".format(model_kind)) + + datatree.makeData() + + use_lists = () + for arg_datatree in arg_datatrees: + use_list = datatree.missingAxes(arg_datatree.axes) + use_lists = use_lists + (use_list,) + + for axes_index in Data.kinds[model_kind]: + # print("compute_data:"+str(axes_index)) + if not datatree.check(axes_index): + args_val = () + for i in range(0, len(arg_datatrees)): + arg_datatree = arg_datatrees[i] + arg_val = arg_datatree.get(axes_index) + if use_lists[i]: + arg_val = [arg_val, ] + args_val = args_val + (arg_val,) + datatree.set(axes_index, args_val) + else: + args_val = datatree.get(axes_index) + for i in range(0, len(arg_datatrees)): + if use_lists[i]: + arg_datatree = arg_datatrees[i] + arg_val = arg_datatree.get(axes_index) + args_val[i].append(arg_val) + + for axes_index, args_val in datatree.items(): + val = compute_func(*args_val) + datatree.set(axes_index, val) + + def compute_index(kind_preindex, index_args): + # print("compute_index", kind_preindex, index_args) + Data.compute(kind_preindex) + datatree_preindex = Data.kinds[kind_preindex] + + # extract axes and indices + partial_axis_index = {} + for index_str in index_args: + index_list = index_str.split("::") + if len(index_list) != 2: + raise NameError("Expected valid index ::: {}".format(index_str)) + axis_name = index_list[0].strip() + index_name = index_list[1].strip() + partial_axis_index.update(Data.get_axis_index(axis_name, index_name)) + + kind = "{}[{}]".format(kind_preindex, ",".join(index_args)) + + datatree = None + if kind in Data.kinds: + datatree = Data.kinds[kind] + if datatree.data: + return + else: + axes = Data.axes_difference(datatree_preindex.axes, partial_axis_index) + datatree = Data.DataTree(kind, datatree_preindex.label, axes=axes) + Data.kinds[kind] = datatree + + datatree.makeData() + + for axes_index, partial_axes_index, value in datatree_preindex.partial_match_items(partial_axis_index): + datatree.set(partial_axes_index, value) + + def compute_templated_data(kind_template, template_args): + # print("compute_templated_data", kind_template, template_args) + if kind_template in Data.kind_templates: + kind = Data.kind_templates[kind_template].getKind(template_args) + if not kind in Data.kinds: + # compute args first to ensure arg kinds exist + for arg_kind in Data.kind_templates[kind_template].getArgs(template_args): + Data.compute(arg_kind) + Data.kinds[kind] = Data.kind_templates[kind_template].makeDataTree(template_args) + Data.compute(kind) + else: + raise NameError("Unkown kind template {}".format(kind_template)) + + def kind_template_scan(kind): + # print("kind_template_scan", kind) + + kind_prefix = None + + template_args = [] + index_args = [] + + template_depth = 0 + index_depth = 0 + + arg_end_idx = -1 + + # look through string backwards to find indexing or templating + for i_forward in range(0, len(kind)): + i = len(kind) - i_forward - 1 + c = kind[i] + if c == ">" or c == "]": + if template_depth == 0 and index_depth == 0: + arg_end_idx = i + if c == ">": + template_depth += 1 + elif c == "]": + index_depth += 1 + elif c == ",": + if template_depth == 1 and index_depth == 0: + template_args.append(kind[i + 1:arg_end_idx].strip()) + arg_end_idx = i + elif template_depth == 0 and index_depth == 1: + index_args.append(kind[i + 1:arg_end_idx].strip()) + arg_end_idx = i + elif c == "<" or c == "[": + if template_depth == 1 and index_depth == 0: + template_args.append(kind[i + 1:arg_end_idx].strip()) + arg_end_idx = -1 + elif template_depth == 0 and index_depth == 1: + index_args.append(kind[i + 1:arg_end_idx].strip()) + arg_end_idx = -1 + if c == "<": + template_depth -= 1 + elif c == "[": + index_depth -= 1 + if template_depth == 0 and index_depth == 0: + if not kind_prefix: + kind_prefix = kind[:i].strip() + break + assert (arg_end_idx == -1) + assert (template_depth == 0) + assert (index_depth == 0) + assert (kind_prefix) + + # reverse lists + for i in range(0, len(template_args) // 2): + i_rev = len(template_args) - i - 1 + template_args[i], template_args[i_rev] = template_args[i_rev], template_args[i] + for i in range(0, len(index_args) // 2): + i_rev = len(index_args) - i - 1 + index_args[i], index_args[i_rev] = index_args[i_rev], index_args[i] + + return (kind_prefix, template_args, index_args) + + def compute(kind): + if kind in Data.kinds: + if not Data.kinds[kind].data: + Data.compute_data(kind) + else: + pass + else: + kind_template, template_args, index_args = Data.kind_template_scan(kind) + # print("Data.kind_template_scan", kind_template, template_args, index_args) + if template_args: + if kind_template in Data.kind_templates: + Data.compute_templated_data(kind_template, template_args) + else: + raise NameError("Unknown data kind template {}".format(kind)) + elif index_args: + Data.compute_index(kind_template, index_args) + else: + raise NameError("Unknown data kind {}".format(kind)) + + diff --git a/scripts/gitlab/build_and_test.sh b/scripts/gitlab/build_and_test.sh index f7501b7b6..849174cac 100755 --- a/scripts/gitlab/build_and_test.sh +++ b/scripts/gitlab/build_and_test.sh @@ -27,10 +27,21 @@ spec=${SPEC:-""} job_unique_id=${CI_JOB_ID:-""} raja_version=${UPDATE_RAJA:-""} sys_type=${SYS_TYPE:-""} +use_dev_shm=${USE_DEV_SHM:-true} + +spack_upstream_path=${SPACK_UPSTREAM_PATH:-"/usr/workspace/umdev/RAJAPerf/upstream"} +update_spack_upstream=${UPDATE_SPACK_UPSTREAM:-false} prefix="" -if [[ -d /dev/shm ]] +if [[ ${update_spack_upstream} == true ]] +then + use_dev_shm=false + echo "We don't build in shared memory when updating the spack upstream" + + prefix=${spack_upstream_path} + mkdir -p ${prefix} +elif [[ -d /dev/shm && ${use_dev_shm} == true ]] then prefix="/dev/shm/${hostname}" if [[ -z ${job_unique_id} ]]; then @@ -43,6 +54,9 @@ then prefix="${prefix}-${job_unique_id}" mkdir -p ${prefix} +else + prefix="spack-and-build-root" + mkdir ${prefix} fi # Dependencies @@ -62,22 +76,23 @@ then exit 1 fi - prefix_opt="" + prefix_opt="--prefix=${prefix}" - if [[ -d /dev/shm ]] + upstream_opt="" + if [[ ${update_spack_upstream} == false && -e ${spack_upstream_path}/.spack-db ]] then - prefix_opt="--prefix=${prefix}" - - # We force Spack to put all generated files (cache and configuration of - # all sorts) in a unique location so that there can be no collision - # with existing or concurrent Spack. - spack_user_cache="${prefix}/spack-user-cache" - export SPACK_DISABLE_LOCAL_CONFIG="" - export SPACK_USER_CACHE_PATH="${spack_user_cache}" - mkdir -p ${spack_user_cache} + upstream_opt="--upstream=${spack_upstream_path}" fi - ./tpl/RAJA/scripts/uberenv/uberenv.py --project-json=".uberenv_config.json" --spec="${spec}" ${prefix_opt} + # We force Spack to put all generated files (cache and configuration of + # all sorts) in a unique location so that there can be no collision + # with existing or concurrent Spack. + spack_user_cache="${prefix}/spack-user-cache" + export SPACK_DISABLE_LOCAL_CONFIG="" + export SPACK_USER_CACHE_PATH="${spack_user_cache}" + mkdir -p ${spack_user_cache} + + ./tpl/RAJA/scripts/uberenv/uberenv.py --project-json=".uberenv_config.json" --spec="${spec}" ${prefix_opt} ${upstream_opt} mv ${project_dir}/tpl/RAJA/*.cmake ${project_dir}/. @@ -118,7 +133,7 @@ hostconfig=$(basename ${hostconfig_path}) # Build Directory if [[ -z ${build_root} ]] then - if [[ -d /dev/shm ]] + if [[ -d /dev/shm && ${use_dev_shm} == true ]] then build_root="${prefix}" else @@ -216,12 +231,12 @@ then then echo "~~~~~~~~~ Run Command: ~~~~~~~~~~~~~~~~~~~~~" echo "lrun -n1 ... ctest --output-on-failure -T test" - echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" lrun -n1 --smpiargs="-disable_gpu_hooks" ctest --output-on-failure -T test else echo "~~~~~~~~~ Run Command: ~~~~~~~~~~~~~~~~~~~~~" echo "lrun -n1 ... ctest --output-on-failure -T test" - echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" lrun -n1 --smpiargs="-disable_gpu_hooks" ctest --output-on-failure -T test fi else @@ -229,12 +244,12 @@ then then echo "~~~~~~~~~ Run Command: ~~~~~~~~~~~~~~~~~~~~~" echo "ctest --output-on-failure -T test 2>&1 | tee tests_output.txt" - echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" ctest --output-on-failure -T test 2>&1 | tee tests_output.txt else echo "~~~~~~~~~ Run Command: ~~~~~~~~~~~~~~~~~~~~~" echo "ctest --output-on-failure -T test 2>&1 | tee tests_output.txt" - echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" ctest --output-on-failure -T test 2>&1 | tee tests_output.txt fi fi @@ -250,6 +265,24 @@ then xsltproc -o junit.xml ${project_dir}/blt/tests/ctest-to-junit.xsl Testing/*/Test.xml mv junit.xml ${project_dir}/junit.xml + if ( find test -name '*.cali' | grep -q '.' ); then + reports_dir=${project_dir}/caliper-reports + mkdir -p ${reports_dir} + cp test/*.cali ${reports_dir} + + for suffix in "Seq" "OpenMP" "OpenMPTarget" "Cuda" "HIP"; do + if ( find ${reports_dir} -name '*${suffix}.cali' | grep -q '.' ); then + echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + echo "Hatchet comparison of Caliper data for ${suffix}" + echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + echo " -> RAJA vs Base" + ${project_dir}/scripts/gitlab/hatchet-analysis.py --baseline=${reports_dir}/Base_${suffix}.cali --report=${reports_dir}/RAJA_${suffix}.cali + echo " -> Lambda vs Base" + ${project_dir}/scripts/gitlab/hatchet-analysis.py --baseline=${reports_dir}/Base_${suffix}.cali --report=${reports_dir}/Lambda_${suffix}.cali + fi + done + fi + if grep -q "Errors while running CTest" ./tests_output.txt then echo "ERROR: failure(s) while running CTest" && exit 1 diff --git a/scripts/gitlab/hatchet-analysis.py b/scripts/gitlab/hatchet-analysis.py new file mode 100755 index 000000000..0c7e075df --- /dev/null +++ b/scripts/gitlab/hatchet-analysis.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 + +import sys +import platform +import datetime as dt + +import argparse + +parser = argparse.ArgumentParser(add_help=False) +parser.add_argument('-r','--report',required=True,nargs=1,help="Pass the Caliper report file.") +parser.add_argument('-b','--baseline',required=True,nargs=1,help="Pass the Caliper baseline file.") +parser.add_argument('-t','--tolerance',required=False,nargs=1,type=float,default=[0.05],help="Specify tolerance for pass/fail") + +args = parser.parse_args() +print(args) + +input_deploy_dir_str = "/usr/gapps/spot/dev" +machine = platform.uname().machine + +sys.path.append(input_deploy_dir_str + "/hatchet-venv/" + machine + "/lib/python3.7/site-packages") +sys.path.append(input_deploy_dir_str + "/hatchet/" + machine) +sys.path.append(input_deploy_dir_str + "/spotdb") + +import hatchet as ht + +# This class turns an existing GraphFrame into a "generic" one by renaming +# the root node into a generic node. We can then compare 2 "generic" graph +# frame. In practice we use it to allow Hatchet to compare performance trees +# generated from RAJA and Base kernels. +# If they don’t have exactly the same structure, then we can use +# ExtractCommonSubtree below. +class GenericFrame(ht.GraphFrame): + def __init__(self, gf): + generic_dataframe = gf.dataframe.copy() + generic_graph = gf.graph.copy() + generic_exc_metrics = gf.exc_metrics + generic_inc_metrics = gf.inc_metrics + generic_default_metric = gf.default_metric # in newer Hatchet + generic_dataframe.iloc[0, generic_dataframe.columns.get_loc('name')] = 'Variant' + ii = generic_dataframe.index[0] + # fr = ht.frame.Frame({'name': 'Variant', 'type' : 'region'}) + fr = ht.graphframe.Frame({'name': 'Variant', 'type': 'region'}) + nn = ht.graphframe.Node(fr) + setattr(nn, '_hatchet_nid', ii._hatchet_nid) + setattr(nn, '_depth', ii._depth) + setattr(nn, 'children', ii.children) + generic_dataframe.rename(index={ii: nn}, inplace=True) + setattr(generic_graph, 'roots', [nn]) + super().__init__(generic_graph, generic_dataframe, generic_exc_metrics, generic_inc_metrics) + +# In this class, we turn dissimilar GraphFrames into comparable ones. +# The idea behind is that the trees contain timings for the same algorithms +# but different implementations (tuning) that result in non comparable leaves. +# We extract the minimal value of the lowest level data leaves to set +# a common comparison dataset. +# To understand the implementation below, note that the caliper annotation +# follows a 3-level structure: +# Variant +# - Group +# -- Kernel +# --- Kernel.Tuning +def ExtractCommonSubtree(gf1: ht.GraphFrame, gf2: ht.GraphFrame, metric: str) -> (ht.GraphFrame): + if (gf1.graph == gf2.graph): + return gf1 + else: + cc = gf1.deepcopy() + cc2 = gf2.deepcopy() + cc.unify(cc2) + # search for nodes contained in both graphs {0==both, 1==left only, 2==right only} + filter_func = lambda x: x["_missing_node"] == 0 + common_subtree = cc.filter(filter_func, squash=True) + # print(common_subtree.dataframe.columns.tolist()) + # tt is generator object from post order tree traversal, i.e starts down at first set of leaves + tt = common_subtree.graph.roots[0].traverse(order="post") + s2 = 0.0 # sum accumulated at depth 2 + s1 = 0.0 # sum accumulated at depth 1 + s0 = 0.0 + m3 = sys.float_info.max + # replace subtree values with sum of kernels that have run + for nn in tt: + if nn._depth == 3: + if common_subtree.dataframe.loc[nn, metric] < m3: + m3 = common_subtree.dataframe.loc[nn, metric] + elif nn._depth == 2: + s2 = m3 + s1 += s2 + common_subtree.dataframe.loc[nn, metric] = s2 + m3 = sys.float_info.max + s2 = 0 + elif nn._depth == 1: + s0 += s1 + common_subtree.dataframe.loc[nn, metric] = s1 + s1 = 0 + elif nn._depth == 0: + common_subtree.dataframe.loc[nn, metric] = s0 + + return common_subtree + +f1 = args.report[0] +f2 = args.baseline[0] +tolerance=args.tolerance[0] + + +gf1 = GenericFrame(ht.GraphFrame.from_caliperreader(f1)) +gf2 = GenericFrame(ht.GraphFrame.from_caliperreader(f2)) + +if 'min#inclusive#sum#time.duration' in gf1.inc_metrics: + metric = 'min#inclusive#sum#time.duration' +elif 'Min time/rank' in gf1.inc_metrics: + metric = 'Min time/rank' + +gf11 = gf1 +gf22 = gf2 + +if len(gf1.graph) != len(gf2.graph): + gf11 = ExtractCommonSubtree(gf1,gf2,metric) + gf22 = ExtractCommonSubtree(gf2,gf1,metric) + +gf3 = gf11 - gf22 + +# Sort resulting DataFrame by ``time`` column in descending order. +#sorted_df = gf3.dataframe.sort_values(by=[metric], ascending=False) + +# Display resulting DataFrame. +#print(sorted_df.head()) + +# Display calltree +#print(gf3.tree(metric_column=metric,precision=5)) + +#setup threshold as a fraction of baseline using tolerance multiplier +baseline_node = gf2.graph.roots[0] +threshold = tolerance * float(gf2.dataframe.loc[baseline_node, metric]) + + +# Get a single metric value for a given node +root_node = gf3.graph.roots[0] +result = gf3.dataframe.loc[root_node, metric] +print("Result =", result," with threshold =",threshold) +if result > threshold: + print('fail') +else: + print('pass') + diff --git a/scripts/lc-builds/blueos_nvcc10_caliper25_gcc8.3.1.sh b/scripts/lc-builds/blueos_nvcc10_caliper25_gcc8.3.1.sh new file mode 100755 index 000000000..a9d553dfb --- /dev/null +++ b/scripts/lc-builds/blueos_nvcc10_caliper25_gcc8.3.1.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +############################################################################### +# Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +# and RAJA Performance Suite project contributors. +# See the RAJAPerf/COPYRIGHT file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +################################################################################# + +BUILD_SUFFIX=lc_blueos-nvcc10-caliper-gcc8.3.1 +RAJA_HOSTCONFIG=../tpl/RAJA/host-configs/lc-builds/blueos/nvcc_gcc_X.cmake + +rm -rf build_${BUILD_SUFFIX} >/dev/null +mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} + +module load cmake/3.14.5 +module load caliper-2.5.0-gcc-8.3.1-cu3vy3k + +CALIPER_PREFIX=/usr/WS2/holger/spack/opt/spack/linux-rhel7-power9le/gcc-8.3.1/caliper-2.5.0-cu3vy3kjwjerpdm6xis2kauhz4s6wto2/ + +ADIAK_PREFIX=/usr/WS2/holger/spack/opt/spack/linux-rhel7-power9le/gcc-8.3.1/adiak-0.2.1-hsv444o7ofb6s2znkvvnh6hcmr774g73/ + +cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_COMPILER=/usr/tce/packages/gcc/gcc-8.3.1/bin/g++ \ + -C ${RAJA_HOSTCONFIG} \ + -DCMAKE_PREFIX_PATH="${CALIPER_PREFIX}/share/cmake/caliper;${ADIAK_PREFIX}/lib/cmake/adiak" \ + -DENABLE_OPENMP=On \ + -DENABLE_CUDA=On \ + -DCMAKE_CUDA_FLAGS="-Xcompiler -mno-float128" \ + -DCUDA_TOOLKIT_ROOT_DIR=/usr/tce/packages/cuda/cuda-10.2.89 \ + -DCMAKE_CUDA_COMPILER=/usr/tce/packages/cuda/cuda-10.1.243/bin/nvcc \ + -DCUDA_ARCH=sm_70 \ + -DENABLE_CALIPER=On \ + -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=On \ + -DCMAKE_VERBOSE_MAKEFILE=On \ + "$@" \ + .. diff --git a/scripts/lc-builds/toss4_cray-mpich_amdclang.sh b/scripts/lc-builds/toss4_cray-mpich_amdclang.sh index 614f2caec..e3385daaa 100755 --- a/scripts/lc-builds/toss4_cray-mpich_amdclang.sh +++ b/scripts/lc-builds/toss4_cray-mpich_amdclang.sh @@ -98,10 +98,11 @@ echo echo " module unload rocm" echo " srun -n1 make" echo -echo " Please note that cray-mpich requires libmodules.so.1 from cce to run." +echo " Please note that cray-mpich requires libmodules.so from cce and " +echo " libpgmath.so from llvm to run." echo " Until this is handled transparently in the build system you may add " echo " cce to your LD_LIBRARY_PATH." echo -echo " export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/usr/tce/packages/cce-tce/cce-13.0.2/cce/x86_64/lib/" +echo " export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/usr/tce/packages/cce-tce/cce-13.0.2/cce/x86_64/lib/:/usr/tce/packages/rocmcc-tce/rocmcc-5.1.0/llvm/lib/" echo echo "***********************************************************************" diff --git a/scripts/sweep_graph.py b/scripts/sweep_graph.py new file mode 100755 index 000000000..6b2f1db21 --- /dev/null +++ b/scripts/sweep_graph.py @@ -0,0 +1,1150 @@ +#!/usr/bin/env python3 + +import math +import os +import sys +import csv + +import glob +import numpy as np + +import matplotlib.pyplot as plt + +import argparse_sweep_graph +import data_classes_sweep_graph as dc +# the following is edited specifically for Hatchet, but future make exclude pkgs generic +# We exclude roundtrip and vis in import check since we're not in Hatchet interactive mode + +def get_size_from_dir_name(sweep_subdir_name): + # print(sweep_subdir_name) + run_size_name = sweep_subdir_name.replace("SIZE_", "") + try: + run_size = int(run_size_name) + return str(run_size) + except ValueError: + raise NameError("Expected SIZE_".format(sweep_subdir_name)) + +def read_runinfo_file(sweep_index, sweep_subdir_runinfo_file_path, run_size_index): + #print("read_runinfo_file") + #print(sweep_index, sweep_subdir_runinfo_file_path, run_size_index) + with open(sweep_subdir_runinfo_file_path, "r") as file: + file_reader = csv.reader(file, delimiter=',') + + ignore = True + c_to_info_kinds = {} + for row in file_reader: + # print(row) + if row[0].strip() == "Kernels": + ignore = False + for c in range(1, len(row)): + info_kind = row[c].strip() + #print(c, info_kind) + if not info_kind in dc.Data.kinds: + # add new kind to global data + print("Unknown kernel info {0}".format(info_kind)) + dc.Data.kinds[info_kind] = dc.Data.DataTree(info_kind, "info", dc.Data.info_axes) + if info_kind in c_to_info_kinds: + print("Repeated kernel info {0}".format(info_kind)) + sys.exit(1) + if not dc.Data.kinds[info_kind].data: + #print("# add data to kind:" + info_kind) + dc.Data.kinds[info_kind].makeData() + if not sweep_index in dc.Data.kinds[info_kind].data.data: + #print("# add new sweep to global data") + dc.Data.kinds[info_kind].data.data[sweep_index] = {} + if run_size_index in dc.Data.kinds[info_kind].data.data[sweep_index]: + sweep_dir_name = dc.Data.get_index_name(dc.Data.axes["sweep_dir_name"], sweep_index) + run_size_name = dc.Data.get_index_name(dc.Data.axes["run_size"], run_size_index) + print("Repeated kernel size {0} in {1}".format(sweep_dir_name, run_size_name)) + sys.exit(1) + else: + #print("# add new size to global data") + dc.Data.kinds[info_kind].data.data[sweep_index][run_size_index] = {} + # make map of columns to names + c_to_info_kinds[c] = info_kind + c_to_info_kinds[info_kind] = c + elif not ignore: + kernel_index = -1 + kernel_name = row[0].strip() + if kernel_name in dc.Data.kernels: + kernel_index = dc.Data.kernels[kernel_name] + elif (len(dc.Data.include_kernels) == 0 or kernel_name in dc.Data.include_kernels) and (not kernel_name in dc.Data.exclude_kernels): + # add kernel to global list + dc.Data.add_kernel(kernel_name) + kernel_index = dc.Data.kernels[kernel_name] + else: + continue # skip this kernel + + for c in range(1, len(row)): + info_kind = c_to_info_kinds[c] + try: + # add data to global structure + val = int(row[c].strip()) + #print(kernel_index, kernel_name, info_kind, val) + + axes_index = { dc.Data.axes["sweep_dir_name"]: sweep_index, + dc.Data.axes["run_size"]: run_size_index, + dc.Data.axes["kernel_index"]: kernel_index, } + + dc.Data.kinds[info_kind].set(axes_index, val) + except ValueError: + print('read_runinfo_file ValueError') + pass # could not convert data to int + +# we expect the following to overlap wrt redundancies to read_caliper_timing_file; they should be refactored +def read_caliper_runinfo_file(cr, sweep_index, sweep_subdir, run_size_index): + #print(sweep_index, sweep_subdir, run_size_index) + kernel_list = [] + candidate_list = [] + kernel_metadata = {} + + # per kernel metadata is in dataframe columns per kernel.tuning, so we need our kernel list + allfiles = sorted(glob.glob(glob.escape(sweep_subdir) + "/*.cali")) + # not all kernels run in every variant so capture kernel list across variants + for f in allfiles: + #print(f) + gf = cr.GraphFrame.from_caliperreader(f) + metric = 'min#inclusive#sum#time.duration' + #print(gf.inc_metrics) + + # extract kernel list + kernel_index = -1 + tt = gf.graph.roots[0].traverse(order="pre") + for nn in tt: + # test if leaf node + if not nn.children: + # kernel_tuning_name is kernel.tuning in Caliper + kernel_tuning_name = gf.dataframe.loc[nn, 'name'] + kernel_name = kernel_tuning_name.split('.')[0] + if kernel_name not in kernel_metadata: + kernel_metadata[kernel_name] = {} + if (len(dc.Data.include_kernels) == 0 or kernel_name in dc.Data.include_kernels) and (not kernel_name in dc.Data.exclude_kernels): + candidate_list.append(kernel_name) + if len(kernel_metadata[kernel_name]) == 0: + metadata = {} + metadata['Problem size'] = gf.dataframe.loc[nn, 'any#any#max#ProblemSize'] + metadata['Reps'] = gf.dataframe.loc[nn, 'any#any#max#Reps'] + metadata['Iterations/rep'] = gf.dataframe.loc[nn, 'any#any#max#Iterations/Rep'] + metadata['Kernels/rep'] = gf.dataframe.loc[nn, 'any#any#max#Kernels/Rep'] + metadata['Bytes/rep'] = gf.dataframe.loc[nn, 'any#any#max#Bytes/Rep'] + metadata['FLOPS/rep'] = gf.dataframe.loc[nn, 'any#any#max#Flops/Rep'] + kernel_metadata[kernel_name] = metadata + #print("Kernel Column Metadata:" + kernel_name ) + #print(kernel_metadata[kernel_name]['Problem size']) + + kernel_list = list(set(candidate_list) | set(kernel_list)) + + for kernel_name in kernel_list: + if kernel_name not in dc.Data.kernels: + dc.Data.add_kernel(kernel_name) + kernel_index = dc.Data.kernels[kernel_name] + metadata = kernel_metadata[kernel_name] # use column metadata instead + for info_kind, info_value in metadata.items(): + if not info_kind in dc.Data.kinds: + dc.Data.kinds[info_kind] = dc.Data.DataTree(info_kind, "info", dc.Data.info_axes) + if not dc.Data.kinds[info_kind].data: + dc.Data.kinds[info_kind].makeData() + if not sweep_index in dc.Data.kinds[info_kind].data.data: + dc.Data.kinds[info_kind].data.data[sweep_index] = {} + if not run_size_index in dc.Data.kinds[info_kind].data.data[sweep_index]: + dc.Data.kinds[info_kind].data.data[sweep_index][run_size_index] = {} + try: + val = int(info_value) + axes_index = { dc.Data.axes["sweep_dir_name"]: sweep_index, + dc.Data.axes["run_size"]: run_size_index, + dc.Data.axes["kernel_index"]: kernel_index, } + dc.Data.kinds[info_kind].set(axes_index, val) + #sweep_dir_name = dc.Data.get_index_name(dc.Data.axes["sweep_dir_name"], sweep_index) + #run_size_name = dc.Data.get_index_name(dc.Data.axes["run_size"], run_size_index) + #kernel_index_name = dc.Data.get_index_name(dc.Data.axes["kernel_index"], kernel_index) + #print("Info kind {0} {1} size {2} kernel {3} val {4}".format(info_kind,sweep_dir_name, run_size_name,kernel_index_name,val)) + except ValueError: + print("read_caliper_runinfo_file ValueError") + pass # could not convert data to int + +def read_timing_file(sweep_index, sweep_subdir_timing_file_path, run_size_index): + #print(sweep_index, sweep_subdir_timing_file_path, run_size_index) + with open(sweep_subdir_timing_file_path, "r") as file: + file_reader = csv.reader(file, delimiter=',') + + data_kind = dc.g_timing_file_kind + if not data_kind in dc.Data.kinds: + raise NameError("Unknown kind {}".format(data_kind)) + if not dc.Data.kinds[data_kind].data: + dc.Data.kinds[data_kind].makeData() + if not sweep_index in dc.Data.kinds[data_kind].data.data: + dc.Data.kinds[data_kind].data.data[sweep_index] = {} + if not run_size_index in dc.Data.kinds[data_kind].data.data[sweep_index]: + dc.Data.kinds[data_kind].data.data[sweep_index][run_size_index] = {} + else: + sweep_dir_name = dc.Data.get_index_name(dc.Data.axes["sweep_dir_name"], sweep_index) + run_size_name = dc.Data.get_index_name(dc.Data.axes["run_size"], run_size_index) + raise NameError("Already seen {0} in {1}".format(sweep_dir_name, run_size_name)) + + c_to_variant_index = {} + c_to_tuning_index = {} + for row in file_reader: + # print(row) + if row[0].strip() == "Kernel": + if len(c_to_variant_index) == 0: + for c in range(1, len(row)): + variant_name = row[c].strip() + variant_index = -1 + if variant_name in dc.Data.variants: + variant_index = dc.Data.variants[variant_name] + elif (len(dc.Data.include_variants) == 0 or variant_name in dc.Data.include_variants) and (not variant_name in dc.Data.exclude_variants): + dc.Data.add_variant(variant_name) + variant_index = dc.Data.variants[variant_name] + else: + variant_index = -1 + c_to_variant_index[c] = variant_index + + elif len(c_to_tuning_index) == 0: + for c in range(1, len(row)): + tuning_name = row[c].strip() + if tuning_name in dc.Data.tunings: + tuning_index = dc.Data.tunings[tuning_name] + elif (len(dc.Data.include_tunings) == 0 or tuning_name in dc.Data.include_tunings) and (not tuning_name in dc.Data.exclude_tunings): + dc.Data.add_tuning(tuning_name) + tuning_index = dc.Data.tunings[tuning_name] + else: + tuning_index = -1 + c_to_tuning_index[c] = tuning_index + + else: + print("Unknown row {0}".format(row)) + sys.exit(1) + elif len(c_to_variant_index) > 0 and len(c_to_tuning_index) > 0: + kernel_index = -1 + kernel_name = row[0].strip() + if kernel_name in dc.Data.kernels: + kernel_index = dc.Data.kernels[kernel_name] + else: + continue # skip kernel + + for c in range(1, len(row)): + variant_index = c_to_variant_index[c] + tuning_index = c_to_tuning_index[c] + if variant_index < 0 or tuning_index < 0: + continue # ignore data + + axes_index = { dc.Data.axes["sweep_dir_name"]: sweep_index, + dc.Data.axes["run_size"]: run_size_index, + dc.Data.axes["kernel_index"]: kernel_index, + dc.Data.axes["variant_index"]: variant_index, + dc.Data.axes["tuning_index"]: tuning_index, } + #print(axes_index) + #print(dc.Data.axes) + try: + val = float(row[c].strip()) + #print(kernel_index, kernel_name, variant_index, tuning_index, data_kind, val) + dc.Data.kinds[data_kind].set(axes_index, val) + except ValueError: + # we usually encounter this for Not run entry + #print(row[c].strip()) + #print('read_timing_file ValueError') + pass # could not convert data to float + +def read_caliper_timing_file(cr, sweep_index, sweep_subdir, run_size_index): + graph_frames = [] + kernel_list = [] + candidate_list = [] + + data_kind = dc.g_timing_file_kind + if not data_kind in dc.Data.kinds: + raise NameError("Unknown kind {}".format(data_kind)) + if not dc.Data.kinds[data_kind].data: + dc.Data.kinds[data_kind].makeData() + if not sweep_index in dc.Data.kinds[data_kind].data.data: + dc.Data.kinds[data_kind].data.data[sweep_index] = {} + if not run_size_index in dc.Data.kinds[data_kind].data.data[sweep_index]: + dc.Data.kinds[data_kind].data.data[sweep_index][run_size_index] = {} + else: + sweep_dir_name = dc.Data.get_index_name(dc.Data.axes["sweep_dir_name"], sweep_index) + run_size_name = dc.Data.get_index_name(dc.Data.axes["run_size"], run_size_index) + raise NameError("Already seen {0} in {1}".format(sweep_dir_name, run_size_name)) + + #print("run size:" + Data.get_index_name(Data.axes["run_size"], run_size_index)) + allfiles = sorted(glob.glob(glob.escape(sweep_subdir) + "/*.cali")) + for f in allfiles: + kernel_tuning_list = [] + candidate_tuning_list = [] + gf = cr.GraphFrame.from_caliperreader(f) + #print(gf.metadata['variant']) + metric = 'min#inclusive#sum#time.duration' + #print(gf.inc_metrics) + graph_frames.append(gf) + + #take care of variant in this graphframe + variant_name = gf.metadata['variant'] + + if variant_name in dc.Data.variants: + variant_index = dc.Data.variants[variant_name] + elif (len(dc.Data.include_variants) == 0 or variant_name in dc.Data.include_variants) and (not variant_name in dc.Data.exclude_variants): + dc.Data.add_variant(variant_name) + variant_index = dc.Data.variants[variant_name] + else: + variant_index = -1 + + # extract kernel list + kernel_index = -1 + tt = gf.graph.roots[0].traverse(order="pre") + for nn in tt: + # test if leaf node + if not nn.children: + #kernel_tuning_name is kernel.tuning in Caliper + kernel_tuning_name = gf.dataframe.loc[nn,'name'] + kernel_name = kernel_tuning_name.split('.')[0] + if kernel_name in dc.Data.kernels: + kernel_tuning_name = gf.dataframe.loc[nn,'name'] + candidate_tuning_list.append(kernel_tuning_name) + candidate_list.append(kernel_name) + kernel_list = list(set(candidate_list) | set(kernel_list)) + kernel_tuning_list = list(set(candidate_tuning_list) | set(kernel_tuning_list)) + #print(kernel_list) + #print(kernel_tuning_list) + + for kernel in kernel_tuning_list: + kernel_name = kernel.split('.')[0] + tuning_name = kernel.split('.')[1] + + if kernel_name in dc.Data.kernels: + kernel_index = dc.Data.kernels[kernel_name] + else: + continue # skip kernel + + if tuning_name in dc.Data.tunings: + tuning_index = dc.Data.tunings[tuning_name] + elif (len(dc.Data.include_tunings) == 0 or tuning_name in dc.Data.include_tunings) and (not tuning_name in dc.Data.exclude_tunings): + dc.Data.add_tuning(tuning_name) + tuning_index = dc.Data.tunings[tuning_name] + else: + tuning_index = -1 + + if variant_index < 0 or tuning_index <0: + continue # skip this variant or tuning + + axes_index = { dc.Data.axes["sweep_dir_name"]: sweep_index, + dc.Data.axes["run_size"]: run_size_index, + dc.Data.axes["kernel_index"]: kernel_index, + dc.Data.axes["variant_index"]: variant_index, + dc.Data.axes["tuning_index"]: tuning_index, } + val = 0.0 + #print(metric) + try: + val = float(gf.dataframe.loc[gf.dataframe['name']==kernel].iloc[0][metric]) + #print(variant_name, kernel_name, tuning_name, data_kind, val) + dc.Data.kinds[data_kind].set(axes_index, val) + except ValueError: + print('ValueError') + pass # could not convert data to float + +def get_plot_data(kind, partial_axes_index): + + if not kind in dc.Data.kinds: + raise NameError("Unknown kind {}".format(kind)) + + kind_data = dc.Data.kinds[kind] + + assert(kind_data.hasAxes(partial_axes_index)) + + data = [] + for axes_index, leftover_axes_index, value in kind_data.partial_match_items(partial_axes_index): + index_name = kind_data.indexName(leftover_axes_index) + data.append({ "name": index_name, + "axes_index": leftover_axes_index, + "data": [value] }) + + return data + +def get_plot_data2(xkind, ykind, partial_axes_index): + + if not ykind in dc.Data.kinds: + raise NameError("Unknown kind {}".format(ykind)) + if not xkind in dc.Data.kinds: + raise NameError("Unknown kind {}".format(xkind)) + + ykind_data = dc.Data.kinds[ykind] + xkind_data = dc.Data.kinds[xkind] + + assert(ykind_data.hasAxes(partial_axes_index)) + assert(xkind_data.hasAxes(partial_axes_index)) + + data = [] + for axes_index, leftover_axes_index, yvalue in ykind_data.partial_match_items(partial_axes_index): + index_name = ykind_data.indexName(leftover_axes_index) + xvalue = xkind_data.get(axes_index) + data.append({ "name": index_name, + "axes_index": leftover_axes_index, + "ydata": [yvalue], + "xdata": [xvalue] }) + + return data + +g_gname = None + +g_lloc = 'best' + +g_ylabel = None +g_yscale = None +g_ylim = None + +g_xlabel = None +g_xscale = None +g_xlim = None + +g_hbin_size = None + +def plot_data_split_line(outputfile_name, split_axis_name, xaxis_name, xkind, ykinds): + print("plotting {} {} {} {}".format(outputfile_name, split_axis_name, xaxis_name, xkind, ykinds)) + + assert(split_axis_name == "kernel_index") + for split_index in range(0, dc.Data.num_kernels): + split_name = dc.Data.kernels[split_index] + + lloc = g_lloc + + ylabel = g_ylabel + yscale = g_yscale or "log" + ylim = g_ylim + + xlabel = g_xlabel or dc.Data.kinds[xkind].label + xscale = g_xscale or "log" + xlim = g_xlim + + gname = g_gname + + split_data = { "ynames": [], + "ycolor": {}, + "yformat": {}, + "ydata": {}, + "xdata": {}, } + + for ykind in ykinds: + if gname: + gname = "{}\n{}".format(gname, ykind) + else: + gname = "{}".format(ykind) + if not ykind in dc.Data.kinds: + raise NameError("Unknown kind {}".format(ykind)) + if not ylabel: + ylabel = dc.Data.kinds[ykind].label + elif (not g_ylabel) and ylabel != dc.Data.kinds[ykind].label: + raise NameError("kinds use different labels {}".format([dc.Data.kinds[_ykind].label for _ykind in ykinds])) + + assert(xaxis_name == "run_size") + for x_index in range(0, dc.Data.num_run_sizes): + + axes_index = { dc.Data.axes[split_axis_name]: split_index, + dc.Data.axes[xaxis_name]: x_index } + + data_list = get_plot_data2(xkind, ykind, axes_index) + + for data in data_list: + yname = data["name"] + + if not yname in split_data["ydata"]: + + ycolor = (0.0, 0.0, 0.0, 1.0) + if dc.Data.axes["variant_index"] in data["axes_index"]: + variant_index = data["axes_index"][dc.Data.axes["variant_index"]] + ycolor = dc.Data.variant_colors[variant_index] + + ymarker = "" + if dc.Data.axes["sweep_dir_name"] in data["axes_index"]: + sweep_index = data["axes_index"][dc.Data.axes["sweep_dir_name"]] + ymarker = dc.Data.sweep_markers[sweep_index] + + yformat = "{}-".format(ymarker) + if dc.Data.axes["tuning_index"] in data["axes_index"]: + tuning_index = data["axes_index"][dc.Data.axes["tuning_index"]] + yformat = "{}{}".format(ymarker, dc.Data.tuning_formats[tuning_index]) + + split_data["ynames"].append(yname) + split_data["ycolor"][yname] = ycolor + split_data["yformat"][yname] = yformat + split_data["ydata"][yname] = [] + split_data["xdata"][yname] = [] + + split_data["ydata"][yname].append(data["ydata"][0]) + split_data["xdata"][yname].append(data["xdata"][0]) + + fname = "{}_{}.png".format(outputfile_name, split_name) + if gname: + gname = "{}\n{}".format(split_name, gname) + else: + gname = "{}".format(split_name) + + print("Plotting {}:".format(fname)) + + for yname in split_data["ynames"]: + + ycolor = split_data["ycolor"][yname] + yformat = split_data["yformat"][yname] + ydata = split_data["ydata"][yname] + xdata = split_data["xdata"][yname] + + if yname in dc.g_series_reformat and "format" in dc.g_series_reformat[yname]: + yformat = dc.g_series_reformat[yname]["format"] + if yname in dc.g_series_reformat and "color" in dc.g_series_reformat[yname]: + ycolor = dc.g_series_reformat[yname]["color"] + + print(" series \"{}\" format \"{}\" color \"{}\"".format(yname, yformat, ycolor)) + + if len(ykinds) > 1: + yname = "{} {}".format(dc.Data.kinds[ykind].kind, yname) + np_xdata = np.array(xdata) + xind = np_xdata.argsort() + np_xdata = np_xdata[xind[0:]] + np_ydata = np.array(ydata) + np_ydata = np_ydata[xind[0:]] + plt.plot(np_xdata,np_ydata,yformat,color=ycolor,label=yname) + + if ylabel: + plt.ylabel(ylabel) + if yscale: + plt.yscale(yscale) + if ylim: + plt.ylim(ylim) + + if xlabel: + plt.xlabel(xlabel) + if xscale: + plt.xscale(xscale) + if xlim: + plt.xlim(xlim) + #print(plt.rcParams.keys()) + plt.title(gname) + handles, labels = plt.gca().get_legend_handles_labels() + legend_order = dc.set_legend_order(labels) + plt.legend([handles[idx] for idx in legend_order], [labels[idx] for idx in legend_order],loc=lloc) + plt.grid(True) + + plt.savefig(fname, dpi=150.0) + plt.clf() + +def plot_data_bar(outputfile_name, xaxis, ykinds): + # print("plotting {} {} {}".format(outputfile_name, xaxis, ykinds)) + + assert(xaxis == "kernel_index") + + gname = g_gname + + lloc = g_lloc + + xlabel = g_xlabel or "Kernel" + xscale = g_xscale + xlim = g_xlim + + ylabel = g_ylabel + yscale = g_yscale + ylim = g_ylim + + for ykind in ykinds: + if gname: + gname = "{}\n{}".format(gname, ykind) + else: + gname = "{}".format(ykind) + if not ykind in dc.Data.kinds: + raise NameError("Unknown kind {}".format(ykind)) + if not ylabel: + ylabel = dc.Data.kinds[ykind].label + elif (not g_ylabel) and ylabel != dc.Data.kinds[ykind].label: + raise NameError("kinds use different labels {}".format([dc.Data.kinds[_ykind].label for _ykind in ykinds])) + + kernel_data = { "kernel_names": [], + "kernel_centers": [], + "ynames": {}, + "ycolor": {}, + "ydata": {}, } + + for kernel_index in range(0, dc.Data.num_kernels): + kernel_name = dc.Data.kernels[kernel_index] + + kernel_data["kernel_names"].append(kernel_name) + kernel_data["kernel_centers"].append(kernel_index) + + axes_index = { dc.Data.axes["kernel_index"]: kernel_index } + + for ykind in ykinds: + + ydata_list = get_plot_data(ykind, axes_index) + + for ydata in ydata_list: + + assert(len(ydata["data"]) == 1) + + yname = ydata["name"] + if len(ykinds) > 1: + yname = "{} {}".format(dc.Data.kinds[ykind].kind, yname) + + ycolor = (0.0, 0.0, 0.0, 1.0) + if dc.Data.axes["variant_index"] in ydata["axes_index"]: + variant_index = ydata["axes_index"][dc.Data.axes["variant_index"]] + ycolor = dc.Data.variant_colors[variant_index] + + if not yname in kernel_data["ynames"]: + kernel_data["ynames"][yname] = len(kernel_data["ynames"]) + kernel_data["ycolor"][yname] = ycolor + kernel_data["ydata"][yname] = [] + + # pad with 0s if find missing data + while len(kernel_data["ydata"][yname])+1 < len(kernel_data["kernel_names"]): + kernel_data["ydata"][yname].append(0.0) + + kernel_data["ydata"][yname].append(ydata["data"][0]) + + fname = "{}.png".format(outputfile_name) + if not gname: + gname = "{}".format("bar") + + print("Plotting {}:".format(fname)) + + num_xticks = len(kernel_data["kernel_centers"]) + plt.figure(figsize=(max(num_xticks*0.5, 4), 6,)) + + y_n = len(kernel_data["ydata"]) + ywidth = 1.0 / (y_n+1) + for yname in kernel_data["ynames"]: + + y_i = kernel_data["ynames"][yname] + ycolor = kernel_data["ycolor"][yname] + yaxis = kernel_data["ydata"][yname] + + if yname in dc.g_series_reformat and "color" in dc.g_series_reformat[yname]: + ycolor = dc.g_series_reformat[yname]["color"] + + print(" series \"{}\" color \"{}\"".format(yname, ycolor)) + + xaxis = [c + (y_i+1)/(y_n+1) - 0.5 for c in kernel_data["kernel_centers"]] + + # pad with 0s if find missing data + while len(yaxis) < len(kernel_data["kernel_names"]): + yaxis.append(0.0) + + plt.bar(xaxis,yaxis,label=yname,width=ywidth,color=ycolor,zorder=2) # ,edgecolor="grey") + + xticks = kernel_data["kernel_centers"] + xtick_names = kernel_data["kernel_names"] + + if ylabel: + plt.ylabel(ylabel) + if yscale: + plt.yscale(yscale) + if ylim: + plt.ylim(ylim) + + if xlabel: + plt.xlabel(xlabel) + if xscale: + plt.xscale(xscale) + if xlim: + plt.xlim(xlim) + + plt.xticks(xticks, xtick_names, rotation=90) + + plt.title(gname) + plt.legend(loc=lloc) + plt.grid(True, zorder=0) + + plt.savefig(fname, dpi=150.0, bbox_inches="tight") + plt.clf() + + +def plot_data_histogram(outputfile_name, haxis, hkinds): + # print("plotting {} {} {}".format(outputfile_name, haxis, hkinds)) + + assert(haxis == "kernel_index") + + gname = g_gname + + lloc = g_lloc + + hbin_size = g_hbin_size + hbin_max = None + hbin_min = None + + xlabel = g_xlabel + xscale = g_xscale + xlim = g_xlim + + ylabel = g_ylabel or "Number" + yscale = g_yscale + ylim = g_ylim + + for ykind in hkinds: + if gname: + gname = "{}\n{}".format(gname, ykind) + else: + gname = "{}".format(ykind) + if not ykind in dc.Data.kinds: + raise NameError("Unknown kind {}".format(ykind)) + if not xlabel: + xlabel = dc.Data.kinds[ykind].label + elif (not g_xlabel) and xlabel != dc.Data.kinds[ykind].label: + raise NameError("kinds use different labels {}".format([dc.Data.kinds[_ykind].label for _ykind in hkinds])) + + if not hbin_size: + + hdata_all = [] + + for kernel_index in range(0, dc.Data.num_kernels): + kernel_name = dc.Data.kernels[kernel_index] + + axes_index = { dc.Data.axes["kernel_index"]: kernel_index } + + for ykind in hkinds: + + hdata_list = get_plot_data(ykind, axes_index) + + for hdata in hdata_list: + + assert(len(hdata["data"]) == 1) + hdata_all.append(hdata["data"][0]) + + hdata_all.sort() + + num_hdata = len(hdata_all) + i_Q1 = math.floor(num_hdata * 0.25) + i_Q3 = math.floor(num_hdata * 0.75) + hdata_Q1 = hdata_all[i_Q1] + hdata_Q3 = hdata_all[i_Q3] + iqr = hdata_Q3 - hdata_Q1 + + hbin_size = 2.0 * iqr / num_hdata**(1.0/3.0) + + if hbin_size > 1.0: + hbin_size = math.floor(hbin_size) + elif hbin_size > 0.0: + hbin_size = 1.0 / math.ceil(1.0 / hbin_size) + else: + hbin_size = 1.0 + + kernel_data = { "hnames": {}, + "hcolor": {}, + "hbins": {}, } + + for kernel_index in range(0, dc.Data.num_kernels): + kernel_name = dc.Data.kernels[kernel_index] + + axes_index = { dc.Data.axes["kernel_index"]: kernel_index } + + for ykind in hkinds: + + hdata_list = get_plot_data(ykind, axes_index) + + for hdata in hdata_list: + + assert(len(hdata["data"]) == 1) + + hname = hdata["name"] + if len(hkinds) > 1: + hname = "{} {}".format(dc.Data.kinds[ykind].kind, hname) + + hcolor = (0.0, 0.0, 0.0, 1.0) + if dc.Data.axes["variant_index"] in hdata["axes_index"]: + variant_index = hdata["axes_index"][dc.Data.axes["variant_index"]] + hcolor = dc.Data.variant_colors[variant_index] + + if not hname in kernel_data["hnames"]: + kernel_data["hnames"][hname] = len(kernel_data["hnames"]) + kernel_data["hcolor"][hname] = hcolor + kernel_data["hbins"][hname] = {} + + hbin = math.floor(hdata["data"][0] / hbin_size) + + if hbin_max == None or hbin > hbin_max: + hbin_max = hbin + if hbin_min == None or hbin < hbin_min: + hbin_min = hbin + + if not hbin in kernel_data["hbins"][hname]: + kernel_data["hbins"][hname][hbin] = 0 + kernel_data["hbins"][hname][hbin] += 1 + + fname = "{}.png".format(outputfile_name) + if not gname: + gname = "{}".format("histogram") + + print("Plotting {}:".format(fname)) + + num_xticks = hbin_max - hbin_min + 1 + if xlim: + num_xticks = math.ceil((xlim[1] - xlim[0]) / hbin_size) + plt.figure(figsize=(max(num_xticks*0.5, 4), 6,)) + + h_n = len(kernel_data["hnames"]) + hwidth = hbin_size / h_n + #print(h_n, hwidth, hbin_size) + for hname in kernel_data["hnames"]: + + h_i = kernel_data["hnames"][hname] + xoffset = hbin_size * ((h_i+1)/(h_n+1) - 0.5) + hcolor = kernel_data["hcolor"][hname] + hbins = kernel_data["hbins"][hname] + + if hname in dc.g_series_reformat and "color" in dc.g_series_reformat[hname]: + hcolor = dc.g_series_reformat[hname]["color"] + + print(" series \"{}\" color \"{}\" offset {}".format(hname, hcolor, xoffset)) + + xaxis = [] + haxis = [] + for i, hval in hbins.items(): + xval = (i + 0.5) * hbin_size + xoffset + xaxis.append(xval) + haxis.append(hval) + + plt.bar(xaxis,haxis,label=hname,width=hwidth,color=hcolor,zorder=2) # ,edgecolor="grey") + + if ylabel: + plt.ylabel(ylabel) + if yscale: + plt.yscale(yscale) + if ylim: + plt.ylim(ylim) + + if xlabel: + plt.xlabel(xlabel) + if xscale: + plt.xscale(xscale) + if xlim: + plt.xlim(xlim) + + plt.title(gname) + plt.legend(loc=lloc) + plt.grid(True, zorder=0) + + plt.savefig(fname, dpi=150.0, bbox_inches="tight") + plt.clf() + + +def main(argv): + outputfile = "graph" + runinfo_filename = dc.g_runinfo_filename + timing_filename = dc.g_timing_filename + print_kinds = [] + split_line_graph_kind_lists = [] + bar_graph_kind_lists = [] + histogram_graph_kind_lists = [] + + + # set a few plot params - see rcParams.keys() for list + params = {'xtick.labelsize':'small', + 'ytick.labelsize':'small', + 'axes.labelsize':'small', + 'axes.titlesize':'medium', + 'legend.fontsize':'x-small'} + plt.rcParams.update(params) + + + parser = argparse_sweep_graph.process_argparse() + args, unknown = parser.parse_args(argv) + print(args) + + cr = None + # argparse module can do a Hatchet Caliper Reader check for arg --caliper + # arg.caliper is False by default + can_process_caliper = args.caliper + # cr is set to importlib hatchet + if can_process_caliper: + cr = args.cr + + #kernels section + parse_set = set() + if args.kernels != None: + parse_set.update(set(args.kernels)) + if args.kernels_close != None: + parse_set.update(set(args.kernels_close)) + for k in list(parse_set): + print("including kernel:" + str(k)) + dc.Data.include_kernels[k] = k + + parse_set = set() + if args.exclude_kernels != None: + parse_set.update(set(args.exclude_kernels)) + if args.exclude_kernels_close != None: + parse_set.update(set(args.exclude_kernels_close)) + for k in list(parse_set): + print("excluding kernel:" + str(k)) + dc.Data.exclude_kernels[k] = k + + # variant section + parse_set = set() + if args.variants != None: + parse_set.update(set(args.variants)) + if args.variants_close != None: + parse_set.update(set(args.variants_close)) + for k in list(parse_set): + print("including variant:" + str(k)) + dc.Data.include_variants[k] = k + + parse_set = set() + if args.exclude_variants != None: + parse_set.update(set(args.exclude_variants)) + if args.exclude_variants_close != None: + parse_set.update(set(args.exclude_variants_close)) + for k in list(parse_set): + print("excluding variant:" + str(k)) + dc.Data.exclude_variants[k] = k + + #tuning section + parse_set = set() + if args.tunings != None: + parse_set.update(set(args.tunings)) + if args.tunings_close != None: + parse_set.update(set(args.tunings_close)) + for k in list(parse_set): + print("including tuning:" + str(k)) + dc.Data.include_tunings[k] = k + + parse_set = set() + if args.exclude_tunings != None: + parse_set.update(set(args.exclude_tunings)) + if args.exclude_tunings_close != None: + parse_set.update(set(args.exclude_tunings_close)) + for k in list(parse_set): + print("excluding tuning:" + str(k)) + dc.Data.exclude_tunings[k] = k + + sweep_dir_paths = args.prescan["directories"] + + if args.output != None: + outputfile = args.output[0] + + if args.graph_name != None: + global g_gname + g_gname = args.graph_name[0] + + if args.legend_location != None: + global g_lloc + g_lloc = (float(args.legend_location[0]), float(args.legend_location[1])) + + if args.y_axis_label != None: + global g_ylabel + g_ylabel = args.y_axis_label[0] + + if args.y_axis_scale != None: + global g_yscale + g_yscale = args.y_axis_scale[0] + + if args.y_axis_limit != None: + global g_ylim + g_ylim = (float(args.y_axis_limit[0]),float(args.y_axis_limit[1])) + + if args.x_axis_label != None: + global g_xlabel + g_xlabel = args.x_axis_label[0] + + if args.x_axis_scale != None: + global g_xscale + g_xscale = args.x_axis_scale[0] + + if args.x_axis_limit != None: + global g_xlim + g_xlim = (float(args.x_axis_limit[0]), float(args.x_axis_limit[1])) + + if args.recolor != None: + # expect one or more repeating sequence of series tuple + for ll in args.recolor: + series_name = ll[0] + # expecting tuple string "(r,g,b)" with r,g,b floats in [0-1] + tuple_str = ll[1] + if not series_name in dc.g_series_reformat: + dc.g_series_reformat[series_name] = {} + dc.g_series_reformat[series_name]['color'] = dc.make_color_tuple_str(tuple_str) + + if args.reformat != None: + for ll in args.reformat: + series_name = ll[0] + format_str = ll[1] + if not series_name in dc.g_series_reformat: + dc.g_series_reformat[series_name] = {} + dc.g_series_reformat[series_name]['format'] = format_str + + if args.kernel_groups != None: + for g in args.kernel_groups: + dc.Data.include_kernel_groups[g] = g + + for kernel_group in dc.Data.include_kernel_groups.keys(): + if kernel_group in dc.g_known_kernel_groups: + print("include kernel group:"+str(kernel_group)) + for kernel_name in dc.g_known_kernel_groups[kernel_group]["kernels"]: + if kernel_name in args.prescan["kernels_union"]: + dc.Data.include_kernels[kernel_name] = kernel_name + else: + print("Unknown kernel group {}".format(kernel_group)) + sys.exit(2) + + if args.exclude_kernel_groups != None: + for g in args.exclude_kernel_groups: + dc.Data.exclude_kernel_groups[g] = g + + for kernel_group in dc.Data.exclude_kernel_groups.keys(): + if kernel_group in dc.g_known_kernel_groups: + print("exclude kernel group:"+str(kernel_group)) + for kernel_name in dc.g_known_kernel_groups[kernel_group]["kernels"]: + if kernel_name in args.prescan["kernels_union"]: + dc.Data.exclude_kernels[kernel_name] = kernel_name + else: + print("Unknown kernel group {}".format(kernel_group)) + sys.exit(2) + + compact_flag = True + if args.print_compact != None: + for aa in args.print_compact: + print_kinds.append(aa) + + if args.print_expanded != None: + compact_flag = False + for aa in args.print_expanded: + print_kinds.append(aa) + + if args.split_line_graphs != None: + split_line_graph_kind_lists.append([]) + for aa in args.split_line_graphs: + split_line_graph_kind_lists[len(split_line_graph_kind_lists) - 1].append(aa) + + if args.bar_graph != None: + bar_graph_kind_lists.append([]) + for aa in args.bar_graph: + bar_graph_kind_lists[len(bar_graph_kind_lists) - 1].append(aa) + + if args.histogram_graph != None: + histogram_graph_kind_lists.append([]) + for aa in args.histogram_graph: + histogram_graph_kind_lists[len(histogram_graph_kind_lists) - 1].append(aa) + + #done with options + print("Input directories are \"{0}\"".format(sweep_dir_paths)) + print("Output file is \"{0}\"".format(outputfile)) + for sweep_dir_path in sweep_dir_paths: + print("sweep_dir_path:" + sweep_dir_path) + sweep_dir_name = os.path.basename(sweep_dir_path) + print("sweep_dir_name:" + sweep_dir_name) + + if sweep_dir_name in dc.Data.exclude_sweeps: + continue + + if sweep_dir_name in dc.Data.sweeps: + raise NameError("Repeated sweep_dir_name {}".format(sweep_dir_name)) + dc.Data.add_sweep(sweep_dir_name) + sweep_index = dc.Data.sweeps[sweep_dir_name] + if sweep_index >= len(dc.g_markers): + raise NameError("Ran out of sweep markers for {}".format(sweep_dir_name)) + dc.Data.sweep_markers[sweep_index] = dc.g_markers[sweep_index] + + for r0,sweep_subdir_names,f0 in os.walk(sweep_dir_path): + for sweep_subdir_name in sweep_subdir_names: + sweep_subdir_path = os.path.join(sweep_dir_path, sweep_subdir_name) + # print(sweep_dir_name, sweep_subdir_path) + + run_size_name = get_size_from_dir_name(sweep_subdir_name) + if run_size_name in args.prescan["sweep_sizes"]: + if not run_size_name in dc.Data.run_sizes: + dc.Data.add_run_size(run_size_name) + run_size_index = dc.Data.run_sizes[run_size_name] + else: + continue + + sweep_subdir_timing_file_path = "" + sweep_subdir_runinfo_file_path = "" + for r1,d1,sweep_subdir_file_names in os.walk(sweep_subdir_path): + for sweep_subdir_file_name in sweep_subdir_file_names: + sweep_subdir_file_path = os.path.join(sweep_subdir_path, sweep_subdir_file_name) + if sweep_subdir_file_name == timing_filename: + sweep_subdir_timing_file_path = sweep_subdir_file_path + elif sweep_subdir_file_name == runinfo_filename: + sweep_subdir_runinfo_file_path = sweep_subdir_file_path + + if sweep_subdir_timing_file_path != "" and sweep_subdir_runinfo_file_path != "": + #print(sweep_subdir_timing_file_path, sweep_subdir_runinfo_file_path) + #read_runinfo_file(sweep_index, sweep_subdir_runinfo_file_path, run_size_index) + if(can_process_caliper): + read_caliper_runinfo_file(cr,sweep_index, sweep_subdir_path, run_size_index) + read_caliper_timing_file(cr,sweep_index, sweep_subdir_path, run_size_index) + else: + read_runinfo_file(sweep_index, sweep_subdir_runinfo_file_path, run_size_index) + read_timing_file(sweep_index, sweep_subdir_timing_file_path, run_size_index) + + + kinds_string = "" + for kindTree in dc.Data.kinds.values(): + kinds_string += ", {}".format(kindTree.kind) + print("kinds") + print(" {}".format(kinds_string[2:])) + + kind_templates_string = "" + for kindTree_template in dc.Data.kind_templates.values(): + kind_templates_string += ", {}".format(kindTree_template.kind_template) + print("kind_templates") + print(" {}".format(kind_templates_string[2:])) + + axes_string = "" + for v in range(0, dc.Data.num_axes): + axes_string += ", {}".format(dc.Data.axes[v]) + print("axes") + print(" {}".format(axes_string[2:])) + + sweeps_string = "" + for v in range(0, dc.Data.num_sweeps): + sweeps_string += ", {}".format(dc.Data.sweeps[v]) + print("sweeps") + print(" {}".format(sweeps_string[2:])) + + run_sizes_string = "" + for v in range(0, dc.Data.num_run_sizes): + run_sizes_string += ", {}".format(dc.Data.run_sizes[v]) + print("run_sizes") + print(" {}".format(run_sizes_string[2:])) + + kernel_groups_string = "" + for kernel_group in dc.g_known_kernel_groups: + kernel_groups_string += ", {}".format(kernel_group) + print("kernel groups") + print(" {}".format(kernel_groups_string[2:])) + + assert dc.Data.num_kernels > 0,f"Expected kernels to be greater than zero; kernel name typo in cmdline arg??" + kernel_string = "" + for v in range(0, dc.Data.num_kernels): + kernel_string += ", {}".format(dc.Data.kernels[v]) + print("kernels") + print(" {}".format(kernel_string[2:])) + + variant_string = "" + for v in range(0, dc.Data.num_variants): + variant_string += ", {}".format(dc.Data.variants[v]) + print("variants") + print(" {}".format(variant_string[2:])) + + tuning_string = "" + for v in range(0, dc.Data.num_tunings): + tuning_string += ", {}".format(dc.Data.tunings[v]) + print("tunings") + print(" {}".format(tuning_string[2:])) + + for kind in print_kinds: + print("Print Data {}:".format(kind)) + dc.Data.compute(kind) + print(dc.Data.kinds[kind].dataString(compact_flag)) + + for kind_list in split_line_graph_kind_lists: + print("Plot split line graph {}:".format(kind_list)) + for kind in kind_list: + dc.Data.compute(kind) + plot_data_split_line(outputfile, "kernel_index", "run_size", "Problem size", kind_list) + + for kind_list in bar_graph_kind_lists: + print("Plot bar graph {}:".format(kind_list)) + for kind in kind_list: + dc.Data.compute(kind) + plot_data_bar(outputfile, "kernel_index", kind_list) + + for kind_list in histogram_graph_kind_lists: + print("Plot histogram graph {}:".format(kind_list)) + for kind in kind_list: + dc.Data.compute(kind) + plot_data_histogram(outputfile, "kernel_index", kind_list) + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/scripts/sweep_size.sh b/scripts/sweep_size.sh index 65c3a94e5..f524d49dd 100755 --- a/scripts/sweep_size.sh +++ b/scripts/sweep_size.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash -EXECUTABLES="" +NAMES=() +EXECUTABLES=() SIZE_MIN=10000 SIZE_MAX=1000000 SIZE_RATIO=2 @@ -8,20 +9,20 @@ SIZE_RATIO=2 ################################################################################ # # Usage: -# srun -n1 --exclusive sweep.sh -x raja-perf.exe [-- ] +# srun -n1 --exclusive sweep.sh -x my_run raja-perf.exe [-- ] # # Parse any args for this script and consume them using shift # leave the raja perf arguments if any for later use # # Examples: -# lalloc 1 lrun -n1 sweep.sh -x raja-perf.exe -- -# # run a sweep of default problem sizes with executable `raja-perf.exe` -# # with args `args` +# lalloc 1 lrun -n1 sweep.sh -x my_run raja-perf.exe -- +# # run a sweep of default problem sizes in dir my_run with +# # executable `raja-perf.exe` with args `args` # -# srun -n1 --exclusive sweep.sh -x raja-perf.exe --size-min 1000 +# srun -n1 --exclusive sweep.sh -x my_run raja-perf.exe --size-min 1000 # --size-max 10000 --size-ratio 2 -- # # run a sweep of problem sizes 1K to 10K with ratio 2 (1K, 2K, 4K, 8K) -# # with executable `raja-perf.exe` with args `args` +# # in dir my_run with executable `raja-perf.exe` with args `args` # ################################################################################ while [ "$#" -gt 0 ]; do @@ -30,17 +31,20 @@ while [ "$#" -gt 0 ]; do if [[ "x$1" == "x-x" || "x$1" == "x--executable" ]]; then - exec="$2" - if ! [[ "x$exec" == x/* ]]; then - exec="$(pwd)/$exec" + if [ "$#" -lt 3 ]; then + echo "Expected 2 args to $1" 1>&2 + exit 1 + elif [[ "$2" =~ ^\-.* ]]; then + echo "Expected 2 args to $1: $2" 1>&2 + exit 1 + elif [[ "$3" =~ ^\-.* ]]; then + echo "Expected 2 args to $1: $2 $3" 1>&2 + exit 1 fi - if [[ "x$EXECUTABLES" == "x" ]]; then - EXECUTABLES="$exec" - else - EXECUTABLES="${EXECUTABLES} $exec" - fi - shift + NAMES+=("$2") + EXECUTABLES+=("$3") + shift 2 elif [[ "x$1" == "x-m" || "x$1" == "x--size-min" ]]; then @@ -77,7 +81,8 @@ while [ "$#" -gt 0 ]; do done -echo "Running sweep with executables: $EXECUTABLES" +echo "Running sweeps with names: ${NAMES[@]}" +echo " and executables: ${EXECUTABLES[@]}" echo "Sweeping from size $SIZE_MIN to $SIZE_MAX with ratio $SIZE_RATIO" echo "extra args to executables are: $@" @@ -102,44 +107,24 @@ if [[ "$SIZE_MIN" -gt "$SIZE_MAX" ]]; then exit 1 fi -################################################################################ -# check executables exist and are executable -################################################################################ -for exec in $EXECUTABLES; do - if [[ ! -f "$exec" ]]; then - echo "Executable not found: $exec" 1>&2 - exit 1 - elif [[ ! -x "$exec" ]]; then - echo "Executable not executable: $exec" 1>&2 - exit 1 - fi -done - -EXEC_I=0 -for exec in $EXECUTABLES; do - - mkdir "RAJAPerf_$EXEC_I" || exit 1 - - let EXEC_I=EXEC_I+1 - -done - SIZE="$SIZE_MIN" while [[ "$SIZE" -le "$SIZE_MAX" ]]; do EXEC_I=0 - for exec in $EXECUTABLES; do + EXEC_N=${#EXECUTABLES[@]} + while [[ "$EXEC_I" -lt "$EXEC_N" ]]; do - cd "RAJAPerf_$EXEC_I" || exit 1 + name="${NAMES[EXEC_I]}" + exec="${EXECUTABLES[EXEC_I]}" + echo "${name}: ${exec}" - SIZE_FILE="$(printf "SIZE_%09d" $SIZE)" - mkdir "$SIZE_FILE" && cd "$SIZE_FILE" || exit 1 + SIZE_DIR="$(printf "SIZE_%09d" $SIZE)" + OUT_DIR="${name}/$SIZE_DIR" - echo "$exec --size $SIZE $@" - echo "$exec --size $SIZE $@" &> "raja-perf-sweep.txt" - $exec --size $SIZE $@ &>> "raja-perf-sweep.txt" + mkdir -p "${OUT_DIR}" || exit 1 - cd ../.. + echo "$exec -od ${OUT_DIR} --size $SIZE $@" | tee -a "${OUT_DIR}/raja-perf-sweep.txt" + $exec -od ${OUT_DIR} --size $SIZE $@ &>> "${OUT_DIR}/raja-perf-sweep.txt" let EXEC_I=EXEC_I+1 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5f93d967b..0c2d98bc4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -247,9 +247,13 @@ else() blt_add_executable( NAME raja-perf.exe SOURCES RAJAPerfSuiteDriver.cpp + INCLUDES ${PROJECT_BINARY_DIR}/include DEPENDS_ON ${RAJA_PERFSUITE_EXECUTABLE_DEPENDS} ) install( TARGETS raja-perf.exe RUNTIME DESTINATION bin ) endif() + +#blt_print_target_properties(TARGET raja-perf.exe) + diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 47db79deb..9dff522bd 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -14,5 +14,6 @@ blt_add_library( OutputUtils.cpp RAJAPerfSuite.cpp RunParams.cpp + INCLUDES ${PROJECT_BINARY_DIR}/include/ DEPENDS_ON ${RAJA_PERFSUITE_DEPENDS} ) diff --git a/src/common/Executor.cpp b/src/common/Executor.cpp index a41e3fd00..fb09b8aeb 100644 --- a/src/common/Executor.cpp +++ b/src/common/Executor.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -37,11 +38,20 @@ #include - namespace rajaperf { using namespace std; +#ifdef RAJA_PERFSUITE_USE_CALIPER +vector split(const string str, const string regex_str) +{ + regex regexz(regex_str); + vector list(sregex_token_iterator(str.begin(), str.end(), regexz, -1), + sregex_token_iterator()); + return list; +} +#endif + namespace { #ifdef RAJA_PERFSUITE_ENABLE_MPI @@ -98,13 +108,102 @@ void Allreduce(const Checksum_type* send, Checksum_type* recv, int count, #endif -} +} // close unnamed namespace + Executor::Executor(int argc, char** argv) : run_params(argc, argv), reference_vid(NumVariants), reference_tune_idx(KernelBase::getUnknownTuningIdx()) { +#ifdef RAJA_PERFSUITE_USE_CALIPER + struct configuration cc; + adiak::init(NULL); + adiak::user(); + adiak::launchdate(); + adiak::libraries(); + adiak::cmdline(); + adiak::clustername(); + adiak::value("perfsuite_version", cc.adiak_perfsuite_version); + adiak::value("raja_version", cc.adiak_raja_version); + adiak::value("cmake_build_type", cc.adiak_cmake_build_type); + adiak::value("cmake_cxx_flags", cc.adiak_cmake_cxx_flags); + adiak::value("cmake_exe_linker_flags", cc.adiak_cmake_exe_linker_flags); + adiak::value("rajaperf_compiler", cc.adiak_rajaperf_compiler); + adiak::value("rajaperf_compiler_options", cc.adiak_rajaperf_compiler_options); + adiak::value("compiler_version", cc.adiak_compiler_version); + + auto tokens = split(cc.adiak_rajaperf_compiler, "/"); + string compiler_exec = tokens.back(); + string compiler = compiler_exec + "-" + cc.adiak_compiler_version; + cout << "Compiler: " << compiler << "\n"; + adiak::value("compiler", compiler.c_str()); + auto tsize = tokens.size(); + if (tsize >= 3) { + // pickup path version /bin/exec + string path_version = tokens[tsize-3]; + //cout << "Compiler path version: " << path_version << "\n"; + auto s = split(path_version,"-"); + if (s.size() >= 2) { + string path_version_short = s[0] + "-" + s[1]; + //cout << "Compiler path version short: " << path_version_short << "\n"; + adiak::value("Compiler_path_version",path_version_short.c_str()); + } + } + + if (strlen(cc.adiak_cuda_compiler_version) > 0) { + adiak::value("cuda_compiler_version", cc.adiak_cuda_compiler_version); + } + if (strlen(cc.adiak_gpu_targets) > 0) { + adiak::value("gpu_targets", cc.adiak_gpu_targets); + } + if (strlen(cc.adiak_cmake_hip_architectures) > 0) { + adiak::value("cmake_hip_architectures", cc.adiak_cmake_hip_architectures); + } + if (strlen(cc.adiak_gpu_targets_block_sizes) > 0) { + adiak::value("gpu_targets_block_sizes", cc.adiak_gpu_targets_block_sizes); + } + if (strlen(cc.adiak_raja_hipcc_flags) > 0) { + adiak::value("raja_hipcc_flags", cc.adiak_raja_hipcc_flags); + } + if (strlen(cc.adiak_mpi_cxx_compiler) > 0) { + adiak::value("mpi_cxx_compiler", cc.adiak_mpi_cxx_compiler); + } + if (strlen(cc.adiak_systype_build) > 0) { + adiak::value("systype_build", cc.adiak_systype_build); + } + if (strlen(cc.adiak_machine_build) > 0) { + adiak::value("machine_build", cc.adiak_machine_build); + } + + adiak::value("ProblemSizeRunParam",(double)1.0); + adiak::value("SizeMeaning",run_params.SizeMeaningToStr(run_params.getSizeMeaning()).c_str()); + if (run_params.getSizeMeaning() == RunParams::SizeMeaning::Factor) { + adiak::value("ProblemSizeRunParam",(double)run_params.getSizeFactor()); + } else if (run_params.getSizeMeaning() == RunParams::SizeMeaning::Direct) { + adiak::value("ProblemSizeRunParam",(double)run_params.getSize()); + } + + // Openmp section +#if defined(_OPENMP) + std::string strval = ""; + std::string test = std::to_string(_OPENMP); + + std::unordered_map map{ + {200505,"2.5"},{200805,"3.0"},{201107,"3.1"},{201307,"4.0"},{201511,"4.5"},{201611,"4.5"},{201811,"5.0"},{202011,"5.1"},{202111,"5.2"}}; + + try { + strval = map.at(_OPENMP); + } catch(...) { + strval="Version Not Detected"; + } + std::cerr << "_OPENMP:" << test << " at version: " << strval << "\n"; + adiak::value("omp_version",strval.c_str()); + strval = std::to_string(omp_get_max_threads()); + adiak::value("omp_max_threads",strval.c_str()); +#endif + +#endif } @@ -113,6 +212,9 @@ Executor::~Executor() for (size_t ik = 0; ik < kernels.size(); ++ik) { delete kernels[ik]; } +#ifdef RAJA_PERFSUITE_USE_CALIPER + adiak::fini(); +#endif } @@ -611,6 +713,9 @@ void Executor::setupSuite() for (VIDset::iterator vid = run_var.begin(); vid != run_var.end(); ++vid) { variant_ids.push_back( *vid ); +#ifdef RAJA_PERFSUITE_USE_CALIPER + KernelBase::setCaliperMgrVariant(*vid,run_params.getOutputDirName(),run_params.getAddToSpotConfig()); +#endif } // @@ -862,13 +967,22 @@ void Executor::runSuite() for (size_t ik = 0; ik < warmup_kernels.size(); ++ik) { KernelBase* warmup_kernel = warmup_kernels[ik]; +#ifdef RAJA_PERFSUITE_USE_CALIPER + warmup_kernel->caliperOff(); +#endif runKernel(warmup_kernel, true); +#ifdef RAJA_PERFSUITE_USE_CALIPER + warmup_kernel->caliperOn(); +#endif delete warmup_kernel; warmup_kernels[ik] = nullptr; } } +// +// Now, run kernels for real.... +// getCout() << "\n\nRunning specified kernels and variants...\n"; const int npasses = run_params.getNumPasses(); @@ -884,6 +998,11 @@ void Executor::runSuite() } // loop over passes through suite +#ifdef RAJA_PERFSUITE_USE_CALIPER + // Flush Caliper data + KernelBase::setCaliperMgrFlush(); +#endif + } template < typename Kernel > diff --git a/src/common/Executor.hpp b/src/common/Executor.hpp index 6bca5a1d2..22fdfce8e 100644 --- a/src/common/Executor.hpp +++ b/src/common/Executor.hpp @@ -12,6 +12,10 @@ #include "common/RAJAPerfSuite.hpp" #include "common/RunParams.hpp" +#ifdef RAJA_PERFSUITE_USE_CALIPER +#include "rajaperf_config.hpp" +#endif + #include #include #include diff --git a/src/common/KernelBase.cpp b/src/common/KernelBase.cpp index c620c4880..393212fe9 100644 --- a/src/common/KernelBase.cpp +++ b/src/common/KernelBase.cpp @@ -39,6 +39,16 @@ KernelBase::KernelBase(KernelID kid, const RunParams& params) : running_tuning = getUnknownTuningIdx(); checksum_scale_factor = 1.0; +#ifdef RAJA_PERFSUITE_USE_CALIPER + // Init Caliper column metadata attributes; aggregatable attributes need to be initialized before manager.start() + ProblemSize_attr = cali_create_attribute("ProblemSize",CALI_TYPE_DOUBLE,CALI_ATTR_ASVALUE | CALI_ATTR_AGGREGATABLE | CALI_ATTR_SKIP_EVENTS); + Reps_attr = cali_create_attribute("Reps",CALI_TYPE_DOUBLE,CALI_ATTR_ASVALUE | CALI_ATTR_AGGREGATABLE | CALI_ATTR_SKIP_EVENTS); + Iters_Rep_attr = cali_create_attribute("Iterations/Rep",CALI_TYPE_DOUBLE,CALI_ATTR_ASVALUE | CALI_ATTR_AGGREGATABLE | CALI_ATTR_SKIP_EVENTS); + Kernels_Rep_attr = cali_create_attribute("Kernels/Rep",CALI_TYPE_DOUBLE,CALI_ATTR_ASVALUE | CALI_ATTR_AGGREGATABLE | CALI_ATTR_SKIP_EVENTS); + Bytes_Rep_attr = cali_create_attribute("Bytes/Rep",CALI_TYPE_DOUBLE,CALI_ATTR_ASVALUE | CALI_ATTR_AGGREGATABLE | CALI_ATTR_SKIP_EVENTS); + Flops_Rep_attr = cali_create_attribute("Flops/Rep",CALI_TYPE_DOUBLE,CALI_ATTR_ASVALUE | CALI_ATTR_AGGREGATABLE | CALI_ATTR_SKIP_EVENTS); +#endif + } @@ -151,6 +161,9 @@ void KernelBase::setVariantDefined(VariantID vid) min_time[vid].resize(variant_tuning_names[vid].size(), std::numeric_limits::max()); max_time[vid].resize(variant_tuning_names[vid].size(), -std::numeric_limits::max()); tot_time[vid].resize(variant_tuning_names[vid].size(), 0.0); +#ifdef RAJA_PERFSUITE_USE_CALIPER + doCaliMetaOnce[vid].resize(variant_tuning_names[vid].size(),true); +#endif } void KernelBase::execute(VariantID vid, size_t tune_idx) @@ -191,6 +204,12 @@ void KernelBase::runKernel(VariantID vid, size_t tune_idx) return; } +#ifdef RAJA_PERFSUITE_USE_CALIPER + if(doCaliperTiming) { + KernelBase::setCaliperMgrStart(vid); + } +#endif + switch ( vid ) { case Base_Seq : @@ -261,6 +280,11 @@ void KernelBase::runKernel(VariantID vid, size_t tune_idx) } } +#ifdef RAJA_PERFSUITE_USE_CALIPER + if(doCaliperTiming) { + setCaliperMgrStop(vid); + } +#endif } void KernelBase::print(std::ostream& os) const @@ -331,4 +355,30 @@ void KernelBase::print(std::ostream& os) const os << std::endl; } +#ifdef RAJA_PERFSUITE_USE_CALIPER +void KernelBase::doOnceCaliMetaBegin(VariantID vid, size_t tune_idx) +{ + // attributes are class variables initialized in ctor + if(doCaliMetaOnce[vid].at(tune_idx)) { + cali_set_double(ProblemSize_attr,(double)getActualProblemSize()); + cali_set_double(Reps_attr,(double)getRunReps()); + cali_set_double(Iters_Rep_attr,(double)getItsPerRep()); + cali_set_double(Kernels_Rep_attr,(double)getKernelsPerRep()); + cali_set_double(Bytes_Rep_attr,(double)getBytesPerRep()); + cali_set_double(Flops_Rep_attr,(double)getFLOPsPerRep()); + } +} + +void KernelBase::doOnceCaliMetaEnd(VariantID vid, size_t tune_idx) +{ + if(doCaliMetaOnce[vid].at(tune_idx)) { + doCaliMetaOnce[vid].at(tune_idx) = false; + } +} + +// initialize a KernelBase static +std::map KernelBase::mgr; +#endif } // closing brace for rajaperf namespace + + diff --git a/src/common/KernelBase.hpp b/src/common/KernelBase.hpp index cec79e2eb..cac0b66c9 100644 --- a/src/common/KernelBase.hpp +++ b/src/common/KernelBase.hpp @@ -29,8 +29,46 @@ #include #include #include +#include #include +#ifdef RAJA_PERFSUITE_USE_CALIPER + +#define CALI_START \ + if(doCaliperTiming) { \ + std::string tstr = getVariantTuningName(running_variant,running_tuning); \ + std::string kstr = getName(); \ + std::string ktstr = kstr + "." + tstr; \ + std::string gstr = getGroupName(kstr); \ + std::string vstr = getVariantName(running_variant); \ + doOnceCaliMetaBegin(running_variant,running_tuning); \ + CALI_MARK_BEGIN(vstr.c_str()); \ + CALI_MARK_BEGIN(gstr.c_str()); \ + CALI_MARK_BEGIN(kstr.c_str()); \ + CALI_MARK_BEGIN(ktstr.c_str()); \ + } + +#define CALI_STOP \ + if(doCaliperTiming) { \ + std::string tstr = getVariantTuningName(running_variant,running_tuning); \ + std::string kstr = getName(); \ + std::string ktstr = kstr + "." + tstr; \ + std::string gstr = getGroupName(kstr); \ + std::string vstr = getVariantName(running_variant); \ + CALI_MARK_END(ktstr.c_str()); \ + CALI_MARK_END(kstr.c_str()); \ + CALI_MARK_END(gstr.c_str()); \ + CALI_MARK_END(vstr.c_str()); \ + doOnceCaliMetaEnd(running_variant,running_tuning); \ + } + +#else + +#define CALI_START +#define CALI_STOP + +#endif + namespace rajaperf { /*! @@ -289,6 +327,7 @@ class KernelBase #ifdef RAJA_PERFSUITE_ENABLE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif + CALI_START; timer.start(); } @@ -298,7 +337,7 @@ class KernelBase #ifdef RAJA_PERFSUITE_ENABLE_MPI MPI_Barrier(MPI_COMM_WORLD); #endif - timer.stop(); recordExecTime(); + timer.stop(); CALI_STOP; recordExecTime(); } void resetTimer() { timer.reset(); } @@ -341,6 +380,187 @@ class KernelBase } #endif +#ifdef RAJA_PERFSUITE_USE_CALIPER + void caliperOn() { doCaliperTiming = true; } + void caliperOff() { doCaliperTiming = false; } + void doOnceCaliMetaBegin(VariantID vid, size_t tune_idx); + void doOnceCaliMetaEnd(VariantID vid, size_t tune_idx); + static void setCaliperMgrVariant(VariantID vid, const std::string& outdir, const std::string& addToConfig) + { + static bool ran_spot_config_check = false; + bool config_ok = true; + const std::string problem_size_json_spec = R"json( + { + "name" : "problem_size", + "type" : "boolean", + "category" : "metric", + "description" : "problem size", + "query" : + [ + { "level" : "local", + "select": { "expr": "any(max#ProblemSize)", "as": "ProblemSize" }, + }, + { "level" : "cross", + "select": { "expr": "any(any#max#ProblemSize)", "as": "ProblemSize" }, + } + ] + } +)json"; + + const std::string reps_json_spec = R"json( + { + "name" : "reps", + "type" : "boolean", + "category" : "metric", + "description" : "reps", + "query" : + [ + { "level" : "local", + "select": { "expr": "any(max#Reps)", "as": "Reps" }, + }, + { "level" : "cross", + "select": { "expr": "any(any#max#Reps)", "as": "Reps" }, + } + ] + } +)json"; + + const std::string iters_json_spec = R"json( + { + "name" : "iters_p_rep", + "type" : "boolean", + "category" : "metric", + "description" : "iterations per rep", + "query" : + [ + { "level" : "local", + "select": { "expr": "any(max#Iterations/Rep)", "as": "Iterations/Rep" }, + }, + { "level" : "cross", + "select": { "expr": "any(any#max#Iterations/Rep)", "as": "Iterations/Rep" }, + } + ] + } +)json"; + + const std::string kernels_json_spec = R"json( + { + "name" : "kernels_p_rep", + "type" : "boolean", + "category" : "metric", + "description" : "kernels per rep", + "query" : + [ + { "level" : "local", + "select": { "expr": "any(max#Kernels/Rep)", "as": "Kernels/Rep" }, + }, + { "level" : "cross", + "select": { "expr": "any(any#max#Kernels/Rep)", "as": "Kernels/Rep" }, + } + ] + } +)json"; + + const std::string bytes_json_spec = R"json( + { + "name" : "bytes_p_rep", + "type" : "boolean", + "category" : "metric", + "description" : "bytes per rep", + "query" : + [ + { "level" : "local", + "select": { "expr": "any(max#Bytes/Rep)", "as": "Bytes/Rep" }, + }, + { "level" : "cross", + "select": { "expr": "any(any#max#Bytes/Rep)", "as": "Bytes/Rep" }, + } + ] + } +)json"; + + const std::string flops_rep_json_spec = R"json( + { + "name" : "flops_p_rep", + "type" : "boolean", + "category" : "metric", + "description" : "flops per rep", + "query" : + [ + { "level" : "local", + "select": { "expr": "any(max#Flops/Rep)", "as": "Flops/Rep" }, + }, + { "level" : "cross", + "select": { "expr": "any(any#max#Flops/Rep)", "as": "Flops/Rep" }, + } + ] + } +)json"; + + if(!ran_spot_config_check && (!addToConfig.empty())) { + cali::ConfigManager cm; + std::string check_profile = "spot()," + addToConfig; + std::string msg = cm.check(check_profile.c_str()); + if(!msg.empty()) { + std::cerr << "Problem with Cali Config: " << check_profile << "\n"; + std::cerr << "Check your command line argument: " << addToConfig << "\n"; + config_ok = false; + exit(-1); + } + ran_spot_config_check = true; + std::cout << "Caliper ran Spot config check\n"; + } + + if(config_ok) { + cali::ConfigManager m; + mgr.insert(std::make_pair(vid, m)); + std::string od("./"); + if (outdir.size()) { + od = outdir + "/"; + } + std::string vstr = getVariantName(vid); + std::string profile = "spot(output=" + od + vstr + ".cali)"; + if(!addToConfig.empty()) { + profile += "," + addToConfig; + } + std::cout << "Profile: " << profile << std::endl; + mgr[vid].add_option_spec(problem_size_json_spec.c_str()); + mgr[vid].set_default_parameter("problem_size", "true"); + mgr[vid].add_option_spec(reps_json_spec.c_str()); + mgr[vid].set_default_parameter("reps", "true"); + mgr[vid].add_option_spec(iters_json_spec.c_str()); + mgr[vid].set_default_parameter("iters_p_rep", "true"); + mgr[vid].add_option_spec(kernels_json_spec.c_str()); + mgr[vid].set_default_parameter("kernels_p_rep", "true"); + mgr[vid].add_option_spec(bytes_json_spec.c_str()); + mgr[vid].set_default_parameter("bytes_p_rep", "true"); + mgr[vid].add_option_spec(flops_rep_json_spec.c_str()); + mgr[vid].set_default_parameter("flops_p_rep", "true"); + mgr[vid].add(profile.c_str()); + } + } + + static void setCaliperMgrStart(VariantID vid) { mgr[vid].start(); } + static void setCaliperMgrStop(VariantID vid) { mgr[vid].stop(); } + static void setCaliperMgrFlush() + { // we're going to flush all the variants at once + std::cout << "flushing " << mgr.size() << " variants\n"; + for(auto const &kv : mgr) { + // set Adiak key first + std::string variant=getVariantName(kv.first); + adiak::value("variant",variant.c_str()); + mgr[kv.first].flush(); + } + } + + std::string getGroupName(const std::string &kname ) + { + std::size_t found = kname.find("_"); + return kname.substr(0,found); + } + +#endif + protected: const RunParams& run_params; @@ -382,6 +602,22 @@ class KernelBase RAJA::Timer timer; +#ifdef RAJA_PERFSUITE_USE_CALIPER + bool doCaliperTiming = true; // warmup can use this to exclude timing + std::vector doCaliMetaOnce[NumVariants]; + cali_id_t ProblemSize_attr; // in ctor cali_create_attribute("ProblemSize",CALI_TYPE_DOUBLE,CALI_ATTR_ASVALUE | CALI_ATTR_AGGREGATABLE | CALI_ATTR_SKIP_EVENTS); + cali_id_t Reps_attr; + cali_id_t Iters_Rep_attr; + cali_id_t Kernels_Rep_attr; + cali_id_t Bytes_Rep_attr; + cali_id_t Flops_Rep_attr; + + + // we need a Caliper Manager object per variant +// we can inline this with c++17 + static std::map mgr; +#endif + std::vector min_time[NumVariants]; std::vector max_time[NumVariants]; std::vector tot_time[NumVariants]; diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index e73bd9888..cb7c8f0b4 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -19,6 +19,12 @@ #include #include +#ifdef RAJA_PERFSUITE_USE_CALIPER +#include +#include +#include +#endif + namespace rajaperf { diff --git a/src/common/RunParams.cpp b/src/common/RunParams.cpp index b4af80a20..e314b91d6 100644 --- a/src/common/RunParams.cpp +++ b/src/common/RunParams.cpp @@ -55,6 +55,9 @@ RunParams::RunParams(int argc, char** argv) invalid_npasses_combiner_input(), outdir(), outfile_prefix("RAJAPerf"), +#ifdef RAJA_PERFSUITE_USE_CALIPER + add_to_spot_config(), +#endif disable_warmup(false) { parseCommandLineOptions(argc, argv); @@ -111,6 +114,12 @@ void RunParams::print(std::ostream& str) const str << "\n outdir = " << outdir; str << "\n outfile_prefix = " << outfile_prefix; +#ifdef RAJA_PERFSUITE_USE_CALIPER + if(add_to_spot_config.length() > 0) { + str << "\n add_to_spot_config = " << add_to_spot_config; + } +#endif + str << "\n disable_warmup = " << disable_warmup; str << "\n kernel_input = "; @@ -551,7 +560,20 @@ void RunParams::parseCommandLineOptions(int argc, char** argv) } } +#ifdef RAJA_PERFSUITE_USE_CALIPER + } else if ( std::string(argv[i]) == std::string("--add-to-spot-config") || + std::string(argv[i]) == std::string("-atsc") ) { + i++; + if ( i < argc ) { + opt = std::string(argv[i]); + if ( opt.at(0) == '-' ) { + i--; + } else { + add_to_spot_config = std::string( argv[i] ); + } + } +#endif } else { input_state = BadInput; @@ -708,6 +730,13 @@ void RunParams::printHelpMessage(std::ostream& str) const str << "\t\t Example...\n" << "\t\t --checkrun 2 (run each kernel twice)\n\n"; +#ifdef RAJA_PERFSUITE_USE_CALIPER + str << "\t --add-to-spot-config, -atsc [Default is none]\n" + << "\t\t appends additional parameters to the built-in Caliper spot config\n"; + str << "\t\t Example to include some PAPI counters (Intel arch)\n" + << "\t\t -atsc topdown.all\n\n"; +#endif + str << std::endl; str.flush(); } diff --git a/src/common/RunParams.hpp b/src/common/RunParams.hpp index 5a0b79274..c3157c260 100644 --- a/src/common/RunParams.hpp +++ b/src/common/RunParams.hpp @@ -196,6 +196,10 @@ class RunParams { const std::string& getOutputDirName() const { return outdir; } const std::string& getOutputFilePrefix() const { return outfile_prefix; } +#ifdef RAJA_PERFSUITE_USE_CALIPER + const std::string& getAddToSpotConfig() const { return add_to_spot_config; } +#endif + bool getDisableWarmup() const { return disable_warmup; } //@} @@ -270,6 +274,10 @@ class RunParams { std::string outdir; /*!< Output directory name. */ std::string outfile_prefix; /*!< Prefix for output data file names. */ +#ifdef RAJA_PERFSUITE_USE_CALIPER + std::string add_to_spot_config; +#endif + bool disable_warmup; }; diff --git a/src/rajaperf_config.hpp.in b/src/rajaperf_config.hpp.in index 9f14594b5..07e62e83c 100644 --- a/src/rajaperf_config.hpp.in +++ b/src/rajaperf_config.hpp.in @@ -31,6 +31,25 @@ namespace rajaperf { struct configuration { +#ifdef RAJA_PERFSUITE_USE_CALIPER +constexpr static const char* adiak_perfsuite_version = "@CMAKE_PROJECT_VERSION@"; +constexpr static const char* adiak_raja_version = "@RAJA_LOADED@"; +constexpr static const char* adiak_cmake_build_type = "@CMAKE_BUILD_TYPE@"; +constexpr static const char* adiak_cmake_cxx_flags = "@CMAKE_CXX_FLAGS@"; +constexpr static const char* adiak_cmake_exe_linker_flags = "@CMAKE_EXE_LINKER_FLAGS@"; +constexpr static const char* adiak_rajaperf_compiler = "@RAJAPERF_COMPILER@"; +constexpr static const char* adiak_rajaperf_compiler_options = "@RAJAPERF_COMPILER_OPTIONS@"; +constexpr static const char* adiak_compiler_version = "@CMAKE_CXX_COMPILER_VERSION@"; +constexpr static const char* adiak_cuda_compiler_version = "@CMAKE_CUDA_COMPILER_VERSION@"; +constexpr static const char* adiak_gpu_targets = "@GPU_TARGETS@"; +constexpr static const char* adiak_cmake_hip_architectures = "@CMAKE_HIP_ARCHIECTURES@"; +constexpr static const char* adiak_gpu_targets_block_sizes = "@RAJA_PERFSUITE_GPU_BLOCKSIZES@"; +constexpr static const char* adiak_raja_hipcc_flags = "@RAJA_HIPCC_FLAGS@"; +constexpr static const char* adiak_mpi_cxx_compiler = "@MPI_CXX_COMPILER@"; +constexpr static const char* adiak_systype_build = "@RAJAPERF_BUILD_SYSTYPE@"; +constexpr static const char* adiak_machine_build = "@RAJAPERF_BUILD_HOST@"; +#endif + #if 0 // Version of RAJA Perf Suite (ex: 0.1.0) static const std::string perfsuite_version = @@ -72,7 +91,6 @@ std::string systype_run; std::string machine_run; }; - } // closing brace for rajaperf namespace // Squash compiler warnings about unused variables @@ -83,3 +101,4 @@ inline void RAJAPERF_UNUSED_VAR(Ts&&...) { } #define RAJAPERF_UNUSED_ARG(...) #endif // closing endif for header file include guard + diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 001c81190..5fe711de8 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -15,7 +15,7 @@ set(RAJA_PERFSUITE_TEST_EXECUTABLE_DEPENDS stream algorithm) list(APPEND RAJA_PERFSUITE_TEST_EXECUTABLE_DEPENDS ${RAJA_PERFSUITE_DEPENDS}) - + raja_add_test( NAME test-raja-perf-suite SOURCES test-raja-perf-suite.cpp diff --git a/test/test-raja-perf-suite.cpp b/test/test-raja-perf-suite.cpp index 9e59954f6..5078a7167 100644 --- a/test/test-raja-perf-suite.cpp +++ b/test/test-raja-perf-suite.cpp @@ -18,7 +18,6 @@ TEST(ShortSuiteTest, Basic) { - // Assemble command line args for basic test int argc = 5; @@ -28,6 +27,7 @@ TEST(ShortSuiteTest, Basic) argc = 7; #endif + #if (defined(RAJA_COMPILER_CLANG) && __clang_major__ == 11) argc = 7; #endif @@ -55,6 +55,12 @@ TEST(ShortSuiteTest, Basic) for (int is = 0; is < argc; ++is) { argv[is] = const_cast(sargv[is].c_str()); } +#ifdef RAJA_PERFSUITE_ENABLE_MPI + MPI_Init(NULL,NULL); + + int num_ranks; + MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); +#endif // STEP 1: Create suite executor object with input args defined above rajaperf::Executor executor(argc, argv); @@ -131,6 +137,9 @@ TEST(ShortSuiteTest, Basic) } // loop over variants } // loop over kernels +#ifdef RAJA_PERFSUITE_ENABLE_MPI + MPI_Finalize(); +#endif // clean up delete [] argv; diff --git a/tpl/RAJA b/tpl/RAJA index 6e1b9afbe..28fbae0c9 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 6e1b9afbebd3d536de447d36733b473cc8dea790 +Subproject commit 28fbae0c957d223ea88685669f5eb2108aca84d1