llnl · adrienbernede · Dec 15, 2025 · Sep 30, 2025 · Oct 3, 2025 · Oct 3, 2025
diff --git a/.gitlab/custom-jobs-and-variables.yml b/.gitlab/custom-jobs-and-variables.yml
@@ -81,7 +81,7 @@ variables:
 
 .custom_perf:
   variables:
-    CALI_CONFIG: "event-trace,runtime-report,print.metadata"
+    CALI_CONFIG: "print.metadata"
 
 .reproducer_vars:
   script:

diff --git a/.gitlab/jobs/performances.yml b/.gitlab/jobs/performances.yml
@@ -18,7 +18,14 @@ clang_14_0_6_openmp_caliper:
     SPEC: "~shared +openmp +caliper %clang@=14.0.6 ^blt@develop"
     PERF_KERNELS: "Basic"
     PERF_VARIANTS: "RAJA_Seq RAJA_OpenMP"
-    PERF_RUN_OPTS: "--repfact 10.0"
+    # Exclude unstable tests (~20% variability):
+    PERF_EXCLUDE_KERNELS: >
+      ARRAY_OF_PTRS
+      COPY8
+      PI_ATOMIC
+      REDUCE3_INT
+      REDUCE_STRUCT
+    PERF_RUN_OPTS: "--repfact 5.0"
     OMP_NUM_THREADS: "16"
   extends: .perf_on_dane
 
@@ -27,7 +34,21 @@ gcc_10_3_1_openmp_caliper:
     SPEC: "~shared +openmp +caliper %gcc@=10.3.1 ^blt@develop"
     PERF_KERNELS: "Basic"
     PERF_VARIANTS: "RAJA_Seq RAJA_OpenMP"
-    PERF_RUN_OPTS: "--repfact 10.0"
+    # Exclude unstable tests (~20% variability):
+    PERF_EXCLUDE_KERNELS: >
+      ARRAY_OF_PTRS
+      COPY8
+      DAXPY
+      EMPTY
+      INIT3
+      INIT_VIEW1D
+      INIT_VIEW1D_OFFSET
+      MULADDSUB
+      PI_ATOMIC
+      PI_REDUCE
+      REDUCE3_INT
+      REDUCE_STRUCT
+    PERF_RUN_OPTS: "--repfact 5.0"
     OMP_NUM_THREADS: "16"
   extends: .perf_on_dane
 
@@ -36,15 +57,35 @@ clang_14_0_6_mpi_caliper:
     SPEC: "~shared +mpi +caliper %clang@=14.0.6 ^blt@develop"
     PERF_KERNELS: "Basic"
     PERF_VARIANTS: "RAJA_Seq"
-    PERF_RUN_OPTS: "--repfact 10.0"
+    # Exclude unstable tests (~20% variability):
+    PERF_EXCLUDE_KERNELS: >
+      ARRAY_OF_PTRS
+      COPY8
+      DAXPY
+      DAXPY_ATOMIC
+      REDUCE_STRUCT
+    PERF_RUN_OPTS: "--repfact 5.0"
     PERF_MPI_EXEC: "srun --overlap -n 16"
   extends: .perf_on_dane
 
 rocmcc_6_4_1_hip_openmp_caliper:
   variables:
-    SPEC: "~shared +rocm +openmp +caliper amdgpu_target=gfx942 %rocmcc@=6.4.1 ^[email protected] ^blt@develop ^caliper~shared ^adiak~shared ^papi~shared"
+    SPEC: "~shared +rocm +openmp +caliper amdgpu_target=gfx942 %rocmcc@=6.4.1 ^[email protected] ^blt@develop ^caliper~shared~libunwind ^adiak~shared~mpi ^papi~shared"
     PERF_KERNELS: "Basic"
     PERF_VARIANTS: "RAJA_HIP"
+    # Exclude unstable tests (~20% variability):
+    PERF_EXCLUDE_KERNELS: >
+      ARRAY_OF_PTRS
+      COPY8
+      EMPTY
+      IF_QUAD
+      INDEXLIST_3LOOP
+      MAT_MAT_SHARED
+      MULTI_REDUCE
+      PI_REDUCE
+      REDUCE3_INT
+      REDUCE_STRUCT
+      TRAP_INT
   extends: .perf_on_tuolumne
 
 process_results:

diff --git a/scripts/gitlab/build_and_test.sh b/scripts/gitlab/build_and_test.sh
@@ -32,6 +32,7 @@ push_to_registry=${PUSH_TO_REGISTRY:-true}
 perf_artifact_dir=${PERF_ARTIFACT_DIR:-""}
 perf_tests=${PERF_TESTS:-false}
 perf_kernels=${PERF_KERNELS:-""}
+perf_exclude_kernels=${PERF_EXCLUDE_KERNELS:-""}
 perf_variants=${PERF_VARIANTS:-""}
 perf_run_opts=${PERF_RUN_OPTS:-""}
 perf_mpi_exec=${PERF_MPI_EXEC:-""}
@@ -302,7 +303,7 @@ then
 
     timed_message "Performance tests for RAJA Perf Suite"
 
-    raja_perf_command="${build_dir}/bin/raja-perf.exe --kernels ${perf_kernels} --variants ${perf_variants} --outdir ${perf_artifact_dir} ${perf_run_opts}"
+    raja_perf_command="${build_dir}/bin/raja-perf.exe --kernels ${perf_kernels} --exclude-kernels ${perf_exclude_kernels} --variants ${perf_variants} --outdir ${perf_artifact_dir} ${perf_run_opts}"
 
     if [[ -n ${perf_mpi_exec} ]]
     then

diff --git a/scripts/gitlab/caliper_to_github_benchmark.py b/scripts/gitlab/caliper_to_github_benchmark.py
@@ -14,6 +14,7 @@
 
 METRIC = 'avg#inclusive#sum#time.duration'
 REPS_METRIC = 'any#any#max#Reps'
+PASSES_METRIC = 'sum#sum#rc.count'
 
 
 def caliper_to_benchmark_json(input_file):
@@ -36,19 +37,22 @@ def caliper_to_benchmark_json(input_file):
 
         total_time_s = float(record.get(METRIC, '0'))
         reps = int(record.get(REPS_METRIC, '1'))
+        passes = int(record.get(PASSES_METRIC, '1'))
 
         total_time_ms = total_time_s * 1000
 
-        # Calculate average time per rep
-        avg_time_per_rep_ms = total_time_ms / reps if reps > 0 else total_time_ms
+        # Calculate average time per rep (accounting for both reps and passes)
+        total_measurements = reps * passes
+        avg_time_per_rep_ms = total_time_ms / total_measurements if total_measurements > 0 else total_time_ms
 
         # Gather leaf-level kernel paths like "RAJAPerf/Group/KernelName"
         if isinstance(path, list) and len(path) >= 3:
             kernel_name = path[-1]
             benchmark = {
                 "name": f"{variant}_{kernel_name}",
                 "unit": "ms/rep",
-                "value": avg_time_per_rep_ms
+                "value": avg_time_per_rep_ms,
+                "extra": f"reps: {reps}, passes: {passes}, total_time: {total_time_s:.3f}s"
             }
             benchmarks.append(benchmark)