diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 000000000..c586222f7 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,108 @@ +name: Process Benchmark + +on: + workflow_dispatch: + inputs: + benchmark_data: + description: 'Base64 encoded benchmark data' + required: true + benchmark_name: + description: 'Name for benchmark group on the dashboard' + required: false + default: 'RAJAPerf benchmarks' + +jobs: + check-performance: + if: github.ref != 'refs/heads/develop' + permissions: + # Needed to write PR check status + checks: write + # Need write if you turn on comments + pull-requests: read + contents: read + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + + - name: Find associated PR + id: findpr + run: | + echo "Looking for PR with head branch: ${{ github.ref_name }}" + gh pr list --head ${{ github.ref_name }} --json number,title + PR_NUMBER=$(gh pr list --head ${{ github.ref_name }} --json number -q '.[0].number') + echo "Found PR number: $PR_NUMBER" + echo "pr_number=$PR_NUMBER" >> $GITHUB_OUTPUT + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Decode benchmark data + run: echo "${{ github.event.inputs.benchmark_data }}" | base64 -d > benchmark.json + + - name: Compare benchmark result + id: benchmark + continue-on-error: true + uses: benchmark-action/github-action-benchmark@v1 + with: + name: ${{ github.event.inputs.benchmark_name }} + tool: 'customSmallerIsBetter' + output-file-path: benchmark.json + github-token: ${{ secrets.GITHUB_TOKEN }} + auto-push: false + save-data-file: false + gh-pages-branch: gh-pages + benchmark-data-dir-path: dev/bench + comment-always: false + comment-on-alert: false + summary-always: true + fail-on-alert: true + alert-threshold: '120%' + max-items-in-chart: 100 + + - name: Create Check for PRs + if: steps.findpr.outputs.pr_number != '' + uses: LouisBrunner/checks-action@6b626ffbad7cc56fd58627f774b9067e6118af23 + with: + token: ${{ secrets.GITHUB_TOKEN }} + name: Performance Benchmark (${{ github.event.inputs.benchmark_name }}) + sha: ${{ github.sha }} + conclusion: ${{ steps.benchmark.outcome == 'success' && 'success' || 'failure' }} + output: | + { + "title": "Performance Results", + "summary": "${{ steps.benchmark.outcome == 'success' && 'Performance check passed' || 'Performance regression detected!' }}", + "text_description": "See job summary for detailed benchmark results: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + } + + - name: Final status + if: steps.benchmark.outcome != 'success' + run: exit 1 + + store-and-visualize-on-develop: + # if: github.ref == 'refs/heads/develop' # TODO uncomment before merging + permissions: + # Needed to push to gh-pages branch + contents: write + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + + - name: Decode benchmark data + run: echo "${{ github.event.inputs.benchmark_data }}" | base64 -d > benchmark.json + + - name: Store benchmark result + id: benchmark + uses: benchmark-action/github-action-benchmark@v1 + with: + name: ${{ github.event.inputs.benchmark_name }} + tool: 'customSmallerIsBetter' + output-file-path: benchmark.json + github-token: ${{ secrets.GITHUB_TOKEN }} + auto-push: true + gh-pages-branch: gh-pages + benchmark-data-dir-path: dev/bench + comment-always: false + comment-on-alert: false + summary-always: true + fail-on-alert: true + alert-threshold: '120%' + max-items-in-chart: 100 diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f4f248672..dd525cc12 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -67,6 +67,7 @@ variables: stages: - prerequisites - build-and-test + - performance-measurements # Template for jobs triggering a build-and-test sub-pipeline: .build-and-test: @@ -75,7 +76,7 @@ stages: include: - local: '.gitlab/custom-jobs-and-variables.yml' - project: 'radiuss/radiuss-shared-ci' - ref: 'v2025.09.1' + ref: 'woptim/workflow-token' file: 'pipelines/${CI_MACHINE}.yml' - artifact: '${CI_MACHINE}-jobs.yml' job: 'generate-job-lists' @@ -83,6 +84,19 @@ stages: forward: pipeline_variables: true +performance-measurements: + stage: performance-measurements + trigger: + include: + - local: '.gitlab/custom-jobs-and-variables.yml' + - project: 'radiuss/radiuss-shared-ci' + ref: 'woptim/workflow-token' + file: 'pipelines/performances.yml' + - local: '.gitlab/jobs/performances.yml' + strategy: depend + forward: + pipeline_variables: true + include: # Sets ID tokens for every job using `default:` - project: 'lc-templates/id_tokens' @@ -90,7 +104,7 @@ include: ## Note: RAJAPerf uses RSC project in RAJA submodule # [Optional] checks preliminary to running the actual CI test #- project: 'radiuss/radiuss-shared-ci' - # ref: 'v2025.09.1' + # ref: 'woptim/workflow-token' # file: 'utilities/preliminary-ignore-draft-pr.yml' # pipelines subscribed by the project - local: '.gitlab/subscribed-pipelines.yml' diff --git a/.gitlab/custom-jobs-and-variables.yml b/.gitlab/custom-jobs-and-variables.yml index d10130469..e252b9830 100644 --- a/.gitlab/custom-jobs-and-variables.yml +++ b/.gitlab/custom-jobs-and-variables.yml @@ -18,6 +18,8 @@ variables: DANE_SHARED_ALLOC: "--exclusive --reservation=ci --time=45 --nodes=1" # Arguments for job level allocation DANE_JOB_ALLOC: "--reservation=ci --nodes=1" + # Arguments for performance job allocation (dedicated allocation with no overlapping). + DANE_PERF_ALLOC: "--exclusive --reservation=ci --time=15 --nodes=1" # Project specific variants for dane PROJECT_DANE_VARIANTS: "~shared +openmp" # Project specific deps for dane @@ -58,6 +60,8 @@ variables: TUOLUMNE_SHARED_ALLOC: "--queue=pci --exclusive --time-limit=31m --nodes=1 -o per-resource.count=2" # Arguments for job level allocation TUOLUMNE_JOB_ALLOC: "--nodes=1 --begin-time=+5s" +# Arguments for performance job allocation (dedicated allocation with no overlapping). + TUOLUMNE_PERF_ALLOC: "--queue=pci --exclusive --time-limit=15m --nodes=1 --begin-time=+5s" # Project specific variants for tuolumne PROJECT_TUOLUMNE_VARIANTS: "~shared +openmp" # Project specific deps for tuolumne @@ -75,6 +79,10 @@ variables: paths: - ./*.cmake +.custom_perf: + variables: + CALI_CONFIG: "print.metadata" + .reproducer_vars: script: - | diff --git a/.gitlab/jobs/performances.yml b/.gitlab/jobs/performances.yml new file mode 100644 index 000000000..81a260bba --- /dev/null +++ b/.gitlab/jobs/performances.yml @@ -0,0 +1,102 @@ +############################################################################## +# Copyright (c) 2025, Lawrence Livermore National Security, LLC +# and RAJA Performance Suite project contributors. +# See the RAJAPerf/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################## + +variables: + PERF_ARTIFACT_DIR: "performance-results" + PERF_RESULTS_FILE: "*_*-*.cali" + PERF_PROCESSED_FILE: "processed_results.json" + PERF_PROCESSING_CMD: "${CI_PROJECT_DIR}/scripts/gitlab/caliper_to_github_benchmark.py" + PERF_TESTS: "true" + +clang_14_0_6_openmp_caliper: + variables: + SPEC: "~shared +openmp +caliper %clang@=14.0.6 ^blt@develop" + PERF_KERNELS: "Basic" + PERF_VARIANTS: "RAJA_Seq RAJA_OpenMP" + # Exclude unstable tests (~20% variability): + PERF_EXCLUDE_KERNELS: > + ARRAY_OF_PTRS + COPY8 + PI_ATOMIC + REDUCE3_INT + REDUCE_STRUCT + PERF_RUN_OPTS: "--repfact 5.0" + OMP_NUM_THREADS: "16" + extends: .perf_on_dane + +gcc_10_3_1_openmp_caliper: + variables: + SPEC: "~shared +openmp +caliper %gcc@=10.3.1 ^blt@develop" + PERF_KERNELS: "Basic" + PERF_VARIANTS: "RAJA_Seq RAJA_OpenMP" + # Exclude unstable tests (~20% variability): + PERF_EXCLUDE_KERNELS: > + ARRAY_OF_PTRS + COPY8 + DAXPY + EMPTY + INIT3 + INIT_VIEW1D + INIT_VIEW1D_OFFSET + MULADDSUB + PI_ATOMIC + PI_REDUCE + REDUCE3_INT + REDUCE_STRUCT + PERF_RUN_OPTS: "--repfact 5.0" + OMP_NUM_THREADS: "16" + extends: .perf_on_dane + +clang_14_0_6_mpi_caliper: + variables: + SPEC: "~shared +mpi +caliper %clang@=14.0.6 ^blt@develop" + PERF_KERNELS: "Basic" + PERF_VARIANTS: "RAJA_Seq" + # Exclude unstable tests (~20% variability): + PERF_EXCLUDE_KERNELS: > + ARRAY_OF_PTRS + COPY8 + DAXPY + DAXPY_ATOMIC + REDUCE_STRUCT + PERF_RUN_OPTS: "--repfact 5.0" + PERF_MPI_EXEC: "srun --overlap -n 16" + extends: .perf_on_dane + +rocmcc_6_4_1_hip_openmp_caliper: + variables: + SPEC: "~shared +rocm +openmp +caliper amdgpu_target=gfx942 %rocmcc@=6.4.1 ^hip@6.4.1 ^blt@develop ^caliper~shared~libunwind ^adiak~shared~mpi ^papi~shared" + PERF_KERNELS: "Basic" + PERF_VARIANTS: "RAJA_HIP" + # Exclude unstable tests (~20% variability): + PERF_EXCLUDE_KERNELS: > + ARRAY_OF_PTRS + COPY8 + EMPTY + IF_QUAD + INDEXLIST_3LOOP + MAT_MAT_SHARED + MULTI_REDUCE + PI_REDUCE + REDUCE3_INT + REDUCE_STRUCT + TRAP_INT + extends: .perf_on_tuolumne + +process_results: + extends: .caliper_to_gh_benchmark + dependencies: + - clang_14_0_6_openmp_caliper + - gcc_10_3_1_openmp_caliper + - clang_14_0_6_mpi_caliper + - rocmcc_6_4_1_hip_openmp_caliper + +report_results: + extends: .report_to_gh_benchmark + dependencies: + - process_results diff --git a/scripts/gitlab/build_and_test.sh b/scripts/gitlab/build_and_test.sh index 92cb99996..3c6adec14 100755 --- a/scripts/gitlab/build_and_test.sh +++ b/scripts/gitlab/build_and_test.sh @@ -29,6 +29,13 @@ use_dev_shm=${USE_DEV_SHM:-true} spack_debug=${SPACK_DEBUG:-false} debug_mode=${DEBUG_MODE:-false} push_to_registry=${PUSH_TO_REGISTRY:-true} +perf_artifact_dir=${PERF_ARTIFACT_DIR:-""} +perf_tests=${PERF_TESTS:-false} +perf_kernels=${PERF_KERNELS:-""} +perf_exclude_kernels=${PERF_EXCLUDE_KERNELS:-""} +perf_variants=${PERF_VARIANTS:-""} +perf_run_opts=${PERF_RUN_OPTS:-""} +perf_mpi_exec=${PERF_MPI_EXEC:-""} raja_version=${UPDATE_RAJA:-""} sys_type=${SYS_TYPE:-""} @@ -256,9 +263,8 @@ then fi # Test -if [[ "${option}" != "--build-only" ]] && grep -q -i "ENABLE_TESTS.*ON" ${hostconfig_path} +if [[ "${option}" != "--build-only" && "${perf_tests}" != "true" ]] && grep -q -i "ENABLE_TESTS.*ON" ${hostconfig_path} then - if [[ ! -d ${build_dir} ]] then echo "[Error]: Build directory not found : ${build_dir}" && exit 1 @@ -288,7 +294,30 @@ then timed_message "RAJA Perf Suite tests completed" fi +# Performance tests +if [[ "${option}" != "--build-only" && "${perf_tests}" == "true" ]] +then + + cd ${project_dir} + mkdir -p ${perf_artifact_dir} + + timed_message "Performance tests for RAJA Perf Suite" + + raja_perf_command="${build_dir}/bin/raja-perf.exe --kernels ${perf_kernels} --exclude-kernels ${perf_exclude_kernels} --variants ${perf_variants} --outdir ${perf_artifact_dir} ${perf_run_opts}" + + if [[ -n ${perf_mpi_exec} ]] + then + raja_perf_command="${perf_mpi_exec} ${raja_perf_command}" + fi + + echo "Running: ${raja_perf_command}" + ${raja_perf_command} + + timed_message "Performance tests for RAJA Perf Suite completed" +fi + timed_message "Cleaning up" +cd ${build_dir} make clean timed_message "Build and test completed" diff --git a/scripts/gitlab/caliper_to_github_benchmark.py b/scripts/gitlab/caliper_to_github_benchmark.py new file mode 100755 index 000000000..eee115291 --- /dev/null +++ b/scripts/gitlab/caliper_to_github_benchmark.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +""" +Convert Caliper .cali output to GitHub benchmark action JSON format. + +Usage: python caliper_to_github_benchmark.py "pattern" output.json + python caliper_to_github_benchmark.py input.cali output.json + python caliper_to_github_benchmark.py "/path/to/*_*-*.cali" output.json +""" +import caliperreader as cr +from pathlib import Path +import json +import sys +from pathlib import Path + +METRIC = 'avg#inclusive#sum#time.duration' +REPS_METRIC = 'any#any#max#Reps' +PASSES_METRIC = 'sum#sum#rc.count' + + +def caliper_to_benchmark_json(input_file): + """Convert one Caliper file to benchmark format. + Args: + input_file: Path to Caliper file + Returns: + List of benchmark dictionaries + """ + + variant = input_file.stem + + reader = cr.CaliperReader() + reader.read(str(input_file)) + + benchmarks = [] + + for record in reader.records: + path = record['path'] if 'path' in record else 'UNKNOWN' + + total_time_s = float(record.get(METRIC, '0')) + reps = int(record.get(REPS_METRIC, '1')) + passes = int(record.get(PASSES_METRIC, '1')) + + total_time_ms = total_time_s * 1000 + + # Calculate average time per rep (accounting for both reps and passes) + total_measurements = reps * passes + avg_time_per_rep_ms = total_time_ms / total_measurements if total_measurements > 0 else total_time_ms + + # Gather leaf-level kernel paths like "RAJAPerf/Group/KernelName" + if isinstance(path, list) and len(path) >= 3: + kernel_name = path[-1] + benchmark = { + "name": f"{variant}_{kernel_name}", + "unit": "ms/rep", + "value": avg_time_per_rep_ms, + "extra": f"reps: {reps}, passes: {passes}, total_time: {total_time_s:.3f}s" + } + benchmarks.append(benchmark) + + return benchmarks + + +if __name__ == "__main__": + if len(sys.argv) != 3: + sys.exit("Usage: caliper_to_github_benchmark.py \"input_file_glob_pattern\" output.json") + + pattern = sys.argv[1] + output_file = sys.argv[2] + + cali_files = list(Path('.').glob(pattern)) + + if not cali_files: + print("Usage: caliper_to_github_benchmark.py \"input_file_glob_pattern\" output.json") + sys.exit(f"Error: No .cali files found matching pattern: {pattern}") + + print(f"Found {len(cali_files)} Caliper files to process") + + all_benchmarks = [] + for cali_file in cali_files: + print(f"Processing {cali_file}") + benchmarks = caliper_to_benchmark_json(cali_file) + all_benchmarks.extend(benchmarks) + + with open(output_file, 'w') as f: + json.dump(all_benchmarks, f, indent=2) + + print(f"Converted {len(all_benchmarks)} benchmarks from {len(cali_files)} files to {output_file}")