diff --git a/.gitlab/custom-jobs-and-variables.yml b/.gitlab/custom-jobs-and-variables.yml index 16aed0a9f..e252b9830 100644 --- a/.gitlab/custom-jobs-and-variables.yml +++ b/.gitlab/custom-jobs-and-variables.yml @@ -81,7 +81,7 @@ variables: .custom_perf: variables: - CALI_CONFIG: "event-trace,runtime-report,print.metadata" + CALI_CONFIG: "print.metadata" .reproducer_vars: script: diff --git a/.gitlab/jobs/performances.yml b/.gitlab/jobs/performances.yml index 5d94e76fc..81a260bba 100644 --- a/.gitlab/jobs/performances.yml +++ b/.gitlab/jobs/performances.yml @@ -18,7 +18,14 @@ clang_14_0_6_openmp_caliper: SPEC: "~shared +openmp +caliper %clang@=14.0.6 ^blt@develop" PERF_KERNELS: "Basic" PERF_VARIANTS: "RAJA_Seq RAJA_OpenMP" - PERF_RUN_OPTS: "--repfact 10.0" + # Exclude unstable tests (~20% variability): + PERF_EXCLUDE_KERNELS: > + ARRAY_OF_PTRS + COPY8 + PI_ATOMIC + REDUCE3_INT + REDUCE_STRUCT + PERF_RUN_OPTS: "--repfact 5.0" OMP_NUM_THREADS: "16" extends: .perf_on_dane @@ -27,7 +34,21 @@ gcc_10_3_1_openmp_caliper: SPEC: "~shared +openmp +caliper %gcc@=10.3.1 ^blt@develop" PERF_KERNELS: "Basic" PERF_VARIANTS: "RAJA_Seq RAJA_OpenMP" - PERF_RUN_OPTS: "--repfact 10.0" + # Exclude unstable tests (~20% variability): + PERF_EXCLUDE_KERNELS: > + ARRAY_OF_PTRS + COPY8 + DAXPY + EMPTY + INIT3 + INIT_VIEW1D + INIT_VIEW1D_OFFSET + MULADDSUB + PI_ATOMIC + PI_REDUCE + REDUCE3_INT + REDUCE_STRUCT + PERF_RUN_OPTS: "--repfact 5.0" OMP_NUM_THREADS: "16" extends: .perf_on_dane @@ -36,15 +57,35 @@ clang_14_0_6_mpi_caliper: SPEC: "~shared +mpi +caliper %clang@=14.0.6 ^blt@develop" PERF_KERNELS: "Basic" PERF_VARIANTS: "RAJA_Seq" - PERF_RUN_OPTS: "--repfact 10.0" + # Exclude unstable tests (~20% variability): + PERF_EXCLUDE_KERNELS: > + ARRAY_OF_PTRS + COPY8 + DAXPY + DAXPY_ATOMIC + REDUCE_STRUCT + PERF_RUN_OPTS: "--repfact 5.0" PERF_MPI_EXEC: "srun --overlap -n 16" extends: .perf_on_dane rocmcc_6_4_1_hip_openmp_caliper: variables: - SPEC: "~shared +rocm +openmp +caliper amdgpu_target=gfx942 %rocmcc@=6.4.1 ^hip@6.4.1 ^blt@develop ^caliper~shared ^adiak~shared ^papi~shared" + SPEC: "~shared +rocm +openmp +caliper amdgpu_target=gfx942 %rocmcc@=6.4.1 ^hip@6.4.1 ^blt@develop ^caliper~shared~libunwind ^adiak~shared~mpi ^papi~shared" PERF_KERNELS: "Basic" PERF_VARIANTS: "RAJA_HIP" + # Exclude unstable tests (~20% variability): + PERF_EXCLUDE_KERNELS: > + ARRAY_OF_PTRS + COPY8 + EMPTY + IF_QUAD + INDEXLIST_3LOOP + MAT_MAT_SHARED + MULTI_REDUCE + PI_REDUCE + REDUCE3_INT + REDUCE_STRUCT + TRAP_INT extends: .perf_on_tuolumne process_results: diff --git a/scripts/gitlab/build_and_test.sh b/scripts/gitlab/build_and_test.sh index a03b3fb12..3c6adec14 100755 --- a/scripts/gitlab/build_and_test.sh +++ b/scripts/gitlab/build_and_test.sh @@ -32,6 +32,7 @@ push_to_registry=${PUSH_TO_REGISTRY:-true} perf_artifact_dir=${PERF_ARTIFACT_DIR:-""} perf_tests=${PERF_TESTS:-false} perf_kernels=${PERF_KERNELS:-""} +perf_exclude_kernels=${PERF_EXCLUDE_KERNELS:-""} perf_variants=${PERF_VARIANTS:-""} perf_run_opts=${PERF_RUN_OPTS:-""} perf_mpi_exec=${PERF_MPI_EXEC:-""} @@ -302,7 +303,7 @@ then timed_message "Performance tests for RAJA Perf Suite" - raja_perf_command="${build_dir}/bin/raja-perf.exe --kernels ${perf_kernels} --variants ${perf_variants} --outdir ${perf_artifact_dir} ${perf_run_opts}" + raja_perf_command="${build_dir}/bin/raja-perf.exe --kernels ${perf_kernels} --exclude-kernels ${perf_exclude_kernels} --variants ${perf_variants} --outdir ${perf_artifact_dir} ${perf_run_opts}" if [[ -n ${perf_mpi_exec} ]] then diff --git a/scripts/gitlab/caliper_to_github_benchmark.py b/scripts/gitlab/caliper_to_github_benchmark.py index 3a69a81b1..eee115291 100755 --- a/scripts/gitlab/caliper_to_github_benchmark.py +++ b/scripts/gitlab/caliper_to_github_benchmark.py @@ -14,6 +14,7 @@ METRIC = 'avg#inclusive#sum#time.duration' REPS_METRIC = 'any#any#max#Reps' +PASSES_METRIC = 'sum#sum#rc.count' def caliper_to_benchmark_json(input_file): @@ -36,11 +37,13 @@ def caliper_to_benchmark_json(input_file): total_time_s = float(record.get(METRIC, '0')) reps = int(record.get(REPS_METRIC, '1')) + passes = int(record.get(PASSES_METRIC, '1')) total_time_ms = total_time_s * 1000 - # Calculate average time per rep - avg_time_per_rep_ms = total_time_ms / reps if reps > 0 else total_time_ms + # Calculate average time per rep (accounting for both reps and passes) + total_measurements = reps * passes + avg_time_per_rep_ms = total_time_ms / total_measurements if total_measurements > 0 else total_time_ms # Gather leaf-level kernel paths like "RAJAPerf/Group/KernelName" if isinstance(path, list) and len(path) >= 3: @@ -48,7 +51,8 @@ def caliper_to_benchmark_json(input_file): benchmark = { "name": f"{variant}_{kernel_name}", "unit": "ms/rep", - "value": avg_time_per_rep_ms + "value": avg_time_per_rep_ms, + "extra": f"reps: {reps}, passes: {passes}, total_time: {total_time_s:.3f}s" } benchmarks.append(benchmark)