From 132e0ff2b972439d8f481fda3dfb7f830920a016 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Wed, 17 Dec 2025 14:46:22 -0800 Subject: [PATCH 1/5] Simple, buut somewhat unseemly change to make warmup kernels run only once. Also, fixed some output file column header names. --- src/common/Executor.cpp | 12 ++++++++---- src/common/KernelBase.cpp | 14 +++++++++++++- src/common/KernelBase.hpp | 17 ++++++++++++++++- 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/src/common/Executor.cpp b/src/common/Executor.cpp index 7805376bb..851a31394 100644 --- a/src/common/Executor.cpp +++ b/src/common/Executor.cpp @@ -835,6 +835,7 @@ void Executor::runWarmupKernels() // // Run warmup kernels // + bool prev_state = KernelBase::setWarmupRun(true); for ( auto kid = kernel_ids.begin(); kid != kernel_ids.end(); ++ kid ) { KernelBase* kernel = getKernelObject(*kid, run_params); #if defined(RAJA_PERFSUITE_USE_CALIPER) @@ -846,6 +847,7 @@ void Executor::runWarmupKernels() #endif delete kernel; } + KernelBase::setWarmupRun(prev_state); } @@ -933,10 +935,12 @@ void Executor::writeCSVReport(ostream& file, CSVRepMode mode, // // Set basic table formatting parameters. // - const string kernel_col_name("Kernel "); + const string kernel_name_col_header_variant("Variant "); + const string kernel_name_col_header_tuning("Tuning "); const string sepchr(" , "); - size_t kercol_width = kernel_col_name.size(); + size_t kercol_width = max(kernel_name_col_header_variant.size(), + kernel_name_col_header_tuning.size()); for (size_t ik = 0; ik < kernels.size(); ++ik) { kercol_width = max(kercol_width, kernels[ik]->getName().size()); } @@ -969,7 +973,7 @@ void Executor::writeCSVReport(ostream& file, CSVRepMode mode, // // Print column variant name line. // - file <(0); - if (run_params.getInputState() == RunParams::CheckRun) { + if (s_warmup_run) { + run_reps = static_cast(1); + } else if (run_params.getInputState() == RunParams::CheckRun) { run_reps = static_cast(run_params.getCheckRunReps()); } else { run_reps = static_cast(default_reps*run_params.getRepFactor()); diff --git a/src/common/KernelBase.hpp b/src/common/KernelBase.hpp index 95889333e..710c2c74d 100644 --- a/src/common/KernelBase.hpp +++ b/src/common/KernelBase.hpp @@ -90,6 +90,15 @@ class KernelBase { return std::numeric_limits::max(); } static std::string getDefaultTuningName() { return "default"; } + // + // Method to set state of all Kernel objects to indicate kernel runs + // are for warmup purposes if true is passed, else false. + // + // The warmup state before the method call is returned to facilitate + // reset mechanics. + // + static bool setWarmupRun(bool warmup_run); + KernelBase(KernelID kid, const RunParams& params); virtual ~KernelBase(); @@ -629,7 +638,13 @@ class KernelBase variant_tuning_method_pointer method); // - // Static properties of kernel, independent of run + // Boolean member shared by all kernel objects indicating whether they + // will be run for warmup purposes (true) or not (false). + // + static inline bool s_warmup_run = false; + + // + // Persistent properties of kernel, independent of run // KernelID kernel_id; std::string name; From 77b6754238813e83177f18eae1b9a3cc9704ddbb Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Thu, 18 Dec 2025 12:34:55 -0800 Subject: [PATCH 2/5] Clean up warmup kernel code and add more options --- src/common/Executor.cpp | 50 +++++++++++++++++++++++++++++----------- src/common/RunParams.cpp | 33 +++++++++++++++++--------- src/common/RunParams.hpp | 40 ++++++++++++++++++++++++++++---- 3 files changed, 93 insertions(+), 30 deletions(-) diff --git a/src/common/Executor.cpp b/src/common/Executor.cpp index 851a31394..f803def35 100644 --- a/src/common/Executor.cpp +++ b/src/common/Executor.cpp @@ -754,7 +754,9 @@ void Executor::runKernel(KernelBase* kernel, bool print_kernel_name) void Executor::runWarmupKernels() { - if ( run_params.getDisableWarmup() ) { + RunParams::WarmupMode warmup_mode = run_params.getWarmupMode(); + + if ( warmup_mode == RunParams::WarmupMode::Disable ) { return; } @@ -763,16 +765,28 @@ void Executor::runWarmupKernels() // // Get warmup kernels to run from input // - std::set kernel_ids = run_params.getWarmupKernelIDsToRun(); + std::set warmup_kernel_ids; + + if ( warmup_mode == RunParams::WarmupMode::Specified ) { + + warmup_kernel_ids = run_params.getSpecifiedWarmupKernelIDs(); - if ( kernel_ids.empty() ) { + } else if ( warmup_mode == RunParams::WarmupMode::KernelsRun ) { // - // If no warmup kernels were given, choose a warmup kernel for each feature + // Warmup kernels will be same as kernels specified to run in the suite // + for (size_t ik = 0; ik < kernels.size(); ++ik) { + KernelBase* kernel = kernels[ik]; + warmup_kernel_ids.insert( kernel->getKernelID() ); + } // iterate over kernels to run + } else if ( warmup_mode == RunParams::WarmupMode::Default ) { + + // + // No warmup kernel input given, choose a warmup kernel for each feature // - // For kernels to be run, assemble a set of feature IDs + // First, assemble a set of feature IDs // std::set feature_ids; for (size_t ik = 0; ik < kernels.size(); ++ik) { @@ -788,7 +802,7 @@ void Executor::runWarmupKernels() } // iterate over kernels // - // Map feature IDs to set of warmup kernel IDs + // Map feature IDs to rudimentary set of warmup kernel IDs // for ( auto fid = feature_ids.begin(); fid != feature_ids.end(); ++ fid ) { @@ -797,29 +811,29 @@ void Executor::runWarmupKernels() case Forall: case Kernel: case Launch: - kernel_ids.insert(Basic_DAXPY); break; + warmup_kernel_ids.insert(Basic_DAXPY); break; case Sort: - kernel_ids.insert(Algorithm_SORT); break; + warmup_kernel_ids.insert(Algorithm_SORT); break; case Scan: - kernel_ids.insert(Basic_INDEXLIST_3LOOP); break; + warmup_kernel_ids.insert(Basic_INDEXLIST_3LOOP); break; case Workgroup: - kernel_ids.insert(Comm_HALO_PACKING_FUSED); break; + warmup_kernel_ids.insert(Comm_HALO_PACKING_FUSED); break; case Reduction: - kernel_ids.insert(Basic_REDUCE3_INT); break; + warmup_kernel_ids.insert(Basic_REDUCE3_INT); break; case Atomic: - kernel_ids.insert(Basic_PI_ATOMIC); break; + warmup_kernel_ids.insert(Basic_PI_ATOMIC); break; case View: break; #ifdef RAJA_PERFSUITE_ENABLE_MPI case MPI: - kernel_ids.insert(Comm_HALO_EXCHANGE_FUSED); break; + warmup_kernel_ids.insert(Comm_HALO_EXCHANGE_FUSED); break; #endif default: @@ -836,7 +850,14 @@ void Executor::runWarmupKernels() // Run warmup kernels // bool prev_state = KernelBase::setWarmupRun(true); - for ( auto kid = kernel_ids.begin(); kid != kernel_ids.end(); ++ kid ) { + + for ( auto kid = warmup_kernel_ids.begin(); + kid != warmup_kernel_ids.end(); ++ kid ) { + // + // Note that we create a new kernel object for each kernel to run + // in warmup so we don't pollute timing data, checksum data, etc. + // for kernels that will run for real later... + // KernelBase* kernel = getKernelObject(*kid, run_params); #if defined(RAJA_PERFSUITE_USE_CALIPER) kernel->caliperOff(); @@ -847,6 +868,7 @@ void Executor::runWarmupKernels() #endif delete kernel; } + KernelBase::setWarmupRun(prev_state); } diff --git a/src/common/RunParams.cpp b/src/common/RunParams.cpp index 44793e9f3..1568089d3 100644 --- a/src/common/RunParams.cpp +++ b/src/common/RunParams.cpp @@ -58,6 +58,7 @@ RunParams::RunParams(int argc, char** argv) checkrun_reps(1), reference_variant(), reference_vid(NumVariants), + warmup_mode(WarmupMode::Default), warmup_kernel_input(), invalid_warmup_kernel_input(), kernel_input(), @@ -83,7 +84,6 @@ RunParams::RunParams(int argc, char** argv) #if defined(RAJA_PERFSUITE_USE_CALIPER) add_to_spot_config(), #endif - disable_warmup(false), run_kernels(), run_variants() { @@ -176,8 +176,6 @@ void RunParams::print(std::ostream& str) const } #endif - str << "\n disable_warmup = " << disable_warmup; - str << "\n seq data space = " << getDataSpaceName(seqDataSpace); str << "\n omp data space = " << getDataSpaceName(ompDataSpace); str << "\n omp target data space = " << getDataSpaceName(ompTargetDataSpace); @@ -200,6 +198,8 @@ void RunParams::print(std::ostream& str) const str << "\n hip MPI data space = " << getDataSpaceName(hipMPIDataSpace); str << "\n kokkos MPI data space = " << getDataSpaceName(kokkosMPIDataSpace); + str << "\n warmup_mode = " << WarmupModeToStr(warmup_mode); + str << "\n warmup_kernel_input = "; for (size_t j = 0; j < warmup_kernel_input.size(); ++j) { str << "\n\t" << warmup_kernel_input[j]; @@ -845,6 +845,8 @@ void RunParams::parseCommandLineOptions(int argc, char** argv) } } + warmup_mode = WarmupMode::Specified; + } else if ( opt == std::string("--kernels") || opt == std::string("-k") ) { @@ -1140,9 +1142,13 @@ void RunParams::parseCommandLineOptions(int argc, char** argv) input_state = DryRun; } - } else if ( std::string(argv[i]) == std::string("--disable-warmup") ) { + } else if ( std::string(argv[i]) == std::string("--warmup-disable") ) { + + warmup_mode = WarmupMode::Disable; - disable_warmup = true; + } else if ( std::string(argv[i]) == std::string("--warmup-to-run") ) { + + warmup_mode = WarmupMode::KernelsRun; } else if ( std::string(argv[i]) == std::string("--checkrun") ) { @@ -1348,11 +1354,16 @@ void RunParams::printHelpMessage(std::ostream& str) const << "\t\t -of dat (output data will be in files 'dat*')\n\n"; str << "\t Options for selecting kernels to run....\n" - << "\t ========================================\n\n";; + << "\t ========================================\n\n"; + + str << "\t For warmup kernels, the default (no option specified) is to run a minimal set of warmup kernels based on\n" + << "\t RAJA features exercised in kernels selected to run. Other options are:\n\n"; + + str << "\t --warmup-disable (do not run any warmup kernels)\n\n"; - str << "\t --disable-warmup (disable warmup kernels) [Default is run warmup kernels that are relevant to kernels selected to run]\n\n"; + str << "\t --warmup-to-run (run each kernel specified to run once in warmup pass)\n\n"; - str << "\t --warmup-kernels, -wk [Default is run warmup kernels that are relevant to kernels selected to run]\n" + str << "\t --warmup-kernels, -wk [if no kernel names specified, default minimal set will be run]]\n" << "\t (names of individual kernels and/or groups of kernels to warmup)\n" << "\t See '--print-kernels'/'-pk' option for list of valid kernel and group names.\n" << "\t Kernel names are listed as _.\n"; @@ -2065,7 +2076,7 @@ void RunParams::processKernelInput() // // ================================================================ - run_warmup_kernels.clear(); + specified_warmup_kernel_ids.clear(); if ( !warmup_kernel_input.empty() ) { @@ -2103,7 +2114,7 @@ void RunParams::processKernelInput() KernelID tkid = static_cast(kid); if ( getFullKernelName(tkid).find(gname) != std::string::npos && exclude_kernels.find(tkid) == exclude_kernels.end()) { - run_warmup_kernels.insert(tkid); + specified_warmup_kernel_ids.insert(tkid); } } @@ -2121,7 +2132,7 @@ void RunParams::processKernelInput() KernelID tkid = static_cast(kid); if ( getKernelName(tkid) == *it || getFullKernelName(tkid) == *it ) { if (exclude_kernels.find(tkid) == exclude_kernels.end()) { - run_warmup_kernels.insert(tkid); + specified_warmup_kernel_ids.insert(tkid); } found_it = true; } diff --git a/src/common/RunParams.hpp b/src/common/RunParams.hpp index 4ba715964..cc715204d 100644 --- a/src/common/RunParams.hpp +++ b/src/common/RunParams.hpp @@ -132,6 +132,35 @@ class RunParams { } } + /*! + * \brief Enumeration indicating how to run warmup kernels + */ + enum WarmupMode { + Disable, /*!< no warmup kernels will be run */ + Default, /*!< run minimal set of warmup kernels based kernels to run */ + KernelsRun, /*!< run warmup pass of each kernel to run */ + Specified, /*!< run warmup pass of each kernel specified in warmup input */ + }; + + /*! + * \brief Translate SizeMeaning enum value to string + */ + static std::string WarmupModeToStr(WarmupMode wm) + { + switch (wm) { + case WarmupMode::Disable: + return "Disable"; + case WarmupMode::Default: + return "Default"; + case WarmupMode::KernelsRun: + return "KernelsRun"; + case WarmupMode::Specified: + return "Specified"; + default: + return "Unknown"; + } + } + /*! * \brief Return state of input parsed to this point. */ @@ -252,9 +281,10 @@ class RunParams { const std::string& getAddToCaliperConfig() const { return add_to_cali_config; } #endif - bool getDisableWarmup() const { return disable_warmup; } + WarmupMode getWarmupMode() const { return warmup_mode; } - const std::set& getWarmupKernelIDsToRun() const { return run_warmup_kernels; } + const std::set& getSpecifiedWarmupKernelIDs() const + { return specified_warmup_kernel_ids; } const std::set& getKernelIDsToRun() const { return run_kernels; } const std::set& getVariantIDsToRun() const { return run_variants; } VariantID getReferenceVariantID() const { return reference_vid; } @@ -364,6 +394,8 @@ class RunParams { DataSpace syclMPIDataSpace = DataSpace::SyclPinned; DataSpace kokkosMPIDataSpace = DataSpace::Copy; + WarmupMode warmup_mode; + // // Arrays to hold input strings for valid/invalid input. Helpful for // debugging command line args. @@ -398,9 +430,7 @@ class RunParams { std::string add_to_cali_config; #endif - bool disable_warmup; - - std::set run_warmup_kernels; + std::set specified_warmup_kernel_ids; std::set run_kernels; std::set run_variants; From f8b91183ada54234086e1af23f1c4a303b86c7d0 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Thu, 18 Dec 2025 14:12:50 -0800 Subject: [PATCH 3/5] Rename some things after reviewer comments --- src/common/Executor.cpp | 6 +++--- src/common/RunParams.cpp | 14 +++++++------- src/common/RunParams.hpp | 16 ++++++++-------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/common/Executor.cpp b/src/common/Executor.cpp index f803def35..e90ebd149 100644 --- a/src/common/Executor.cpp +++ b/src/common/Executor.cpp @@ -19,7 +19,7 @@ #include "CudaDataUtils.hpp" #include "HipDataUtils.hpp" -// Warmup kernels to run first to help reduce startup overheads in timings +// Warmup kernels for default warmup mode #include "basic/DAXPY.hpp" #include "basic/REDUCE3_INT.hpp" #include "basic/INDEXLIST_3LOOP.hpp" @@ -767,11 +767,11 @@ void Executor::runWarmupKernels() // std::set warmup_kernel_ids; - if ( warmup_mode == RunParams::WarmupMode::Specified ) { + if ( warmup_mode == RunParams::WarmupMode::Explicit ) { warmup_kernel_ids = run_params.getSpecifiedWarmupKernelIDs(); - } else if ( warmup_mode == RunParams::WarmupMode::KernelsRun ) { + } else if ( warmup_mode == RunParams::WarmupMode::PerfRunSame ) { // // Warmup kernels will be same as kernels specified to run in the suite diff --git a/src/common/RunParams.cpp b/src/common/RunParams.cpp index 1568089d3..a98448c79 100644 --- a/src/common/RunParams.cpp +++ b/src/common/RunParams.cpp @@ -845,7 +845,7 @@ void RunParams::parseCommandLineOptions(int argc, char** argv) } } - warmup_mode = WarmupMode::Specified; + warmup_mode = WarmupMode::Explicit; } else if ( opt == std::string("--kernels") || opt == std::string("-k") ) { @@ -1146,9 +1146,9 @@ void RunParams::parseCommandLineOptions(int argc, char** argv) warmup_mode = WarmupMode::Disable; - } else if ( std::string(argv[i]) == std::string("--warmup-to-run") ) { + } else if ( std::string(argv[i]) == std::string("--warmup-perfrun-same") ) { - warmup_mode = WarmupMode::KernelsRun; + warmup_mode = WarmupMode::PerfRunSame; } else if ( std::string(argv[i]) == std::string("--checkrun") ) { @@ -1356,14 +1356,14 @@ void RunParams::printHelpMessage(std::ostream& str) const str << "\t Options for selecting kernels to run....\n" << "\t ========================================\n\n"; - str << "\t For warmup kernels, the default (no option specified) is to run a minimal set of warmup kernels based on\n" - << "\t RAJA features exercised in kernels selected to run. Other options are:\n\n"; + str << "\t For warmup kernels, the default (no option specified) will run a minimal set of warmup kernels based on\n" + << "\t RAJA features exercised in kernels specified for perf run. Other options are:\n\n"; str << "\t --warmup-disable (do not run any warmup kernels)\n\n"; - str << "\t --warmup-to-run (run each kernel specified to run once in warmup pass)\n\n"; + str << "\t --warmup-perfrun-same (run same set of kernels for warmup as specified for perf run)\n\n"; - str << "\t --warmup-kernels, -wk [if no kernel names specified, default minimal set will be run]]\n" + str << "\t --warmup-kernels, -wk [if no kernel names specified, none will be run for warmup]\n" << "\t (names of individual kernels and/or groups of kernels to warmup)\n" << "\t See '--print-kernels'/'-pk' option for list of valid kernel and group names.\n" << "\t Kernel names are listed as _.\n"; diff --git a/src/common/RunParams.hpp b/src/common/RunParams.hpp index cc715204d..558f165c7 100644 --- a/src/common/RunParams.hpp +++ b/src/common/RunParams.hpp @@ -136,10 +136,10 @@ class RunParams { * \brief Enumeration indicating how to run warmup kernels */ enum WarmupMode { - Disable, /*!< no warmup kernels will be run */ - Default, /*!< run minimal set of warmup kernels based kernels to run */ - KernelsRun, /*!< run warmup pass of each kernel to run */ - Specified, /*!< run warmup pass of each kernel specified in warmup input */ + Disable, /*!< no warmup kernels will be run */ + Default, /*!< run minimal set of warmup kernels based kernels to run */ + PerfRunSame, /*!< run warmup pass of each kernel to run */ + Explicit, /*!< run warmup pass of each kernel explicitly named for warmup in input */ }; /*! @@ -152,10 +152,10 @@ class RunParams { return "Disable"; case WarmupMode::Default: return "Default"; - case WarmupMode::KernelsRun: - return "KernelsRun"; - case WarmupMode::Specified: - return "Specified"; + case WarmupMode::PerfRunSame: + return "PerfRunSame"; + case WarmupMode::Explicit: + return "Explicit"; default: return "Unknown"; } From 541ff86f1257e6f463bb0d13d4b543f093be2754 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Thu, 18 Dec 2025 15:57:27 -0800 Subject: [PATCH 4/5] Add a bit more explanation in how to run the suite section --- docs/sphinx/user_guide/run.rst | 66 +++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 25 deletions(-) diff --git a/docs/sphinx/user_guide/run.rst b/docs/sphinx/user_guide/run.rst index 3c83650cd..bd371b44f 100644 --- a/docs/sphinx/user_guide/run.rst +++ b/docs/sphinx/user_guide/run.rst @@ -51,29 +51,12 @@ kernel. Running the Suite ================== -After compilation, the main executable will reside in the ``bin`` subdirectory -of the build space. The executable will be able to run all kernels and -variants that have been built depending on which CMake options were specified -to configure the build. +After compilation, the RAJA Performance Suite executable will reside in the +``bin`` subdirectory of the build space. The executable will be able to run +all kernels and variants that have been built depending on which CMake options +were specified to configure the build. -To run the Suite in its default mode, type the executable name with no -command-line arguments:: - - $ ./bin/raja-perf.exe - -This will run all kernels and variants that have been built in their default -configurations. Information describing how the Suite will run along with -some information about each kernel will appear on the screen. More information -about kernel and execution details will also appear in a run report files -generated in the run directory after Suite execution completes. - -.. note:: You can pass the ``--dryrun`` command-line option to the executable - to see a summary of how the Suite will execute, by showing default - run parameters, without actually running it. You can also pass - other command-line options when doing a "dry run" and you will see - that the given options are represented in the screen output. - -The Suite can be run in a variety of ways determined by the command-line +The Suite can be run in many different ways chosen by the command-line options passed to the executable. For example, you can run or exclude subsets of kernels, variants, or groups. You can also pass options to set problem sizes, number of times each kernel is run (sampled), and many other run @@ -95,6 +78,27 @@ or:: .. note:: To see all available Suite execution options, pass the `--help` or `-h` option to the executable. +.. important:: We do not describe most of the Suite execution options in this + guide since the runtime help output is the main reference for + available options, defaults, and arguments they accept. + +To run the Suite in its default mode, type the executable name with no +command-line arguments:: + + $ ./bin/raja-perf.exe + +This will run all kernels and variants that have been built in their default +configurations. Information describing how the Suite will run along with +some information about each kernel will appear on the screen. More information +about kernel and execution details will also appear in a run report files +generated in the run directory after Suite execution completes. + +.. note:: You can pass the ``--dryrun`` command-line option to the executable + to see a summary of how the Suite will execute by showing run + parameters without actually running it. You can pass any other + command-line options when doing a "dry run" and you will see + that the given options are represented in the screen output. + Lastly, the program will report specific errors if given incorrect input, such as an option that requires a value and no value is provided. It will also emit a summary of command-line arguments it was given if the input contains @@ -116,7 +120,8 @@ will report the following in the screen output:: See run parameters or option messages above. The output indicates that the kernel input is invalid because the string Foo -is not the name of a kernel in the Suite, while DAXPY is the name of a kernel. +is not the name of a kernel in the Suite, while DAXPY is the name of a kernel +in the Suite. .. note:: The Suite executable will attempt to provide helpful information if it is given incorrect input, such as command-line arguments that @@ -130,8 +135,19 @@ is not the name of a kernel in the Suite, while DAXPY is the name of a kernel. Running with MPI ================== -Running the Suite with MPI is just like running any other MPI application. -For example, issuing the following command on a machine with slurm scheduling:: +The Suite can be configured and compiled to run in a distributed memory +parallel mode using MPI. Running the Suite on multiple MPI ranks will execute +the same code for each kernel on each rank with minimal synchronization points +to gather execution timing data from all ranks. This capability is provided so +that individual kernel performance more closely aligns with how such kernels +would perform in a real application. For example, compute node memory bandwidth +impact on performance may be different when running on many core system using +OpenMP multithreading to exercise all cores than when each core is mapped to +an MPI rank. + +Running the Suite on multiple MPI ranks is just like running any other MPI +application. For example, issuing the following command on a machine with +slurm scheduling:: $ srun -n 2 ./bin/raja-perf.exe From d9e882dc1e0db41ecdbd72d736df5d2d7b64e2ff Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Fri, 19 Dec 2025 08:59:16 -0800 Subject: [PATCH 5/5] Change from PR review --- docs/sphinx/user_guide/run.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/sphinx/user_guide/run.rst b/docs/sphinx/user_guide/run.rst index bd371b44f..e1ce415d2 100644 --- a/docs/sphinx/user_guide/run.rst +++ b/docs/sphinx/user_guide/run.rst @@ -141,9 +141,8 @@ the same code for each kernel on each rank with minimal synchronization points to gather execution timing data from all ranks. This capability is provided so that individual kernel performance more closely aligns with how such kernels would perform in a real application. For example, compute node memory bandwidth -impact on performance may be different when running on many core system using -OpenMP multithreading to exercise all cores than when each core is mapped to -an MPI rank. +may be different when running on a many core system using OpenMP multithreading +to exercise all cores than when each core is mapped to an MPI rank. Running the Suite on multiple MPI ranks is just like running any other MPI application. For example, issuing the following command on a machine with