Skip to content

Commit 830884a

Browse files
committed
[Benchmarks] add RR benchmarks with test
1 parent 27a58af commit 830884a

File tree

6 files changed

+278
-9
lines changed

6 files changed

+278
-9
lines changed

.github/workflows/sycl-ur-perf-benchmarking.yml

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ jobs:
135135
build_sycl_dispatch:
136136
name: '[Dispatch] Build SYCL'
137137
needs: [ sanitize_inputs_dispatch ]
138+
if: false
138139
uses: ./.github/workflows/sycl-linux-build.yml
139140
with:
140141
build_ref: ${{ needs.sanitize_inputs_dispatch.outputs.build_ref }}
@@ -230,7 +231,7 @@ jobs:
230231
# Benchmark framework builds and runs on PRs path:
231232
build_pr:
232233
name: '[PR] Build SYCL'
233-
if: github.event_name == 'pull_request'
234+
if: false && github.event_name == 'pull_request'
234235
uses: ./.github/workflows/sycl-linux-build.yml
235236
with:
236237
build_ref: ${{ github.sha }}
@@ -243,6 +244,39 @@ jobs:
243244
changes: '[]'
244245
toolchain_artifact: sycl_linux_default
245246

247+
unittest_pr:
248+
name: '[PR] Benchmarks Unit Tests'
249+
if: github.event_name == 'pull_request'
250+
# needs: [build_pr]
251+
runs-on: PVC_PERF
252+
container:
253+
image: 'ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest'
254+
env:
255+
CMPLR_ROOT: '/opt/sycl/'
256+
steps:
257+
- name: Checkout code
258+
uses: actions/checkout@v3
259+
- name: Install python libraries and dependencies
260+
run: |
261+
pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
262+
- name: Checkout compute benchmarks code
263+
uses: actions/checkout@v3
264+
with:
265+
repository: intel/compute-benchmarks
266+
ref: master
267+
path: compute-benchmarks
268+
- name: Configure Compute Benchmarks
269+
run: |
270+
cmake -B compute-benchmarks/build -S compute-benchmarks -DCMAKE_BUILD_TYPE=Release \
271+
-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
272+
-DNULL_L0=ON -DBUILD_SYCL=ON -DBUILD_UR=OFF -DBUILD_OMP=OFF -DBUILD_HELLO_WORLD=OFF
273+
- name: Build Compute Benchmarks
274+
run: |
275+
cmake --build compute-benchmarks/build -- -j$(nproc)
276+
- name: Run tests
277+
run: |
278+
LLVM_BENCHMARKS_UNIT_TESTING=1 COMPUTE_BENCHMARKS_BUILD_PATH=$(pwd)/compute-benchmarks/build python3 ./devops/scripts/benchmarks/tests/test_integration.py
279+
246280
# TODO: When we have stable BMG runner(s), consider moving this job to that runner.
247281
test_benchmark_framework:
248282
name: '[PR] Benchmark suite testing'

devops/scripts/benchmarks/benches/compute.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,48 @@ def benchmarks(self) -> list[Benchmark]:
270270
)
271271
)
272272

273+
record_and_replay_params = product([0, 1], [0, 1])
274+
for emulate, instantiate in record_and_replay_params:
275+
276+
def createRrBench(**kwargs):
277+
return RecordAndReplay(
278+
self,
279+
RUNTIMES.LEVEL_ZERO,
280+
PROFILERS.TIMER,
281+
mRec=1,
282+
mInst=instantiate,
283+
mDest=0,
284+
emulate=emulate,
285+
**kwargs,
286+
)
287+
288+
benches += [
289+
createRrBench(
290+
nForksInLvl=2,
291+
nLvls=4,
292+
nCmdSetsInLvl=10,
293+
nInstantiations=10,
294+
nAppendKern=10,
295+
nAppendCopy=1,
296+
),
297+
createRrBench(
298+
nForksInLvl=1,
299+
nLvls=1,
300+
nCmdSetsInLvl=10,
301+
nInstantiations=10,
302+
nAppendKern=10,
303+
nAppendCopy=10,
304+
),
305+
createRrBench(
306+
nForksInLvl=1,
307+
nLvls=4,
308+
nCmdSetsInLvl=1,
309+
nInstantiations=0,
310+
nAppendKern=1,
311+
nAppendCopy=0,
312+
),
313+
]
314+
273315
# Add UR-specific benchmarks
274316
benches += [
275317
# TODO: multithread_benchmark_ur fails with segfault
@@ -648,6 +690,43 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
648690
]
649691

650692

693+
class RecordAndReplay(ComputeBenchmark):
694+
def __init__(self, bench, runtime: RUNTIMES, profiler_type, **kwargs):
695+
self.rr_params = kwargs
696+
self.iterations_regular = 1000
697+
self.iterations_trace = 10
698+
super().__init__(
699+
bench,
700+
f"record_and_replay_benchmark_{runtime.value}",
701+
"RecordGraph",
702+
runtime,
703+
profiler_type,
704+
)
705+
706+
def name(self):
707+
ret = []
708+
for k, v in self.rr_params.items():
709+
if k[0] == "n": # numeric parameter
710+
ret.append(f"{k[1:]} {v}")
711+
elif k[0] == "m":
712+
if v != 0: # measure parameter
713+
ret.append(f"{k[1:]}")
714+
else: # boolean parameter
715+
if v != 0:
716+
ret.append(k)
717+
ret.sort()
718+
return f"L0 {self.test} " + ", ".join(ret)
719+
720+
def display_name(self) -> str:
721+
return self.name()
722+
723+
def get_tags(self):
724+
return ["L0"]
725+
726+
def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
727+
return [f"--{k}={v}" for k, v in self.rr_params.items()]
728+
729+
651730
class QueueInOrderMemcpy(ComputeBenchmark):
652731
def __init__(self, bench, isCopyOnly, source, destination, size, profiler_type):
653732
self.isCopyOnly = isCopyOnly

devops/scripts/benchmarks/git_project.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
# See LICENSE.TXT
44
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
55

6+
import os
67
from pathlib import Path
78
import shutil
89

910
from utils.logger import log
1011
from utils.utils import run
1112
from options import options
1213

13-
1414
class GitProject:
1515
def __init__(
1616
self,
@@ -167,6 +167,11 @@ def _setup_repo(self) -> bool:
167167
Returns:
168168
bool: True if the repository was cloned or updated, False if it was already up-to-date.
169169
"""
170+
if os.environ.get("LLVM_BENCHMARKS_UNIT_TESTING") == "1":
171+
log.debug(
172+
f"Skipping git operations during unit testing of {self._name} (LLVM_BENCHMARKS_UNIT_TESTING=1)."
173+
)
174+
return False
170175
if not self.src_dir.exists():
171176
self._git_clone()
172177
return True

devops/scripts/benchmarks/main.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,13 @@ def process_results(
137137
stddev_threshold_override
138138
if stddev_threshold_override is not None
139139
else options.stddev_threshold
140-
) * mean_value
140+
)
141+
threshold_scaled = threshold * mean_value
141142

142-
if stddev > threshold:
143-
log.warning(f"stddev {stddev} above the threshold {threshold} for {label}")
143+
if stddev > threshold_scaled:
144+
log.warning(
145+
f"stddev {stddev} above the threshold {threshold_scaled} ({threshold} times {mean_value}) for {label}"
146+
)
144147
valid_results = False
145148

146149
rlist.sort(key=lambda res: res.value)
@@ -228,6 +231,9 @@ def main(directory, additional_env_vars, compare_names, filter):
228231
benchmark for benchmark in s.benchmarks() if benchmark.enabled()
229232
]
230233
if filter:
234+
log.debug(
235+
f"Filtering {len(suite_benchmarks)} benchmarks in {s.name()} suite for {filter.pattern}"
236+
)
231237
suite_benchmarks = [
232238
benchmark
233239
for benchmark in suite_benchmarks
@@ -713,6 +719,7 @@ def validate_and_parse_env_args(env_args):
713719
options.dry_run = args.dry_run
714720
options.umf = args.umf
715721
options.iterations_stddev = args.iterations_stddev
722+
options.stddev_threshold = args.stddev_threshold
716723
options.build_igc = args.build_igc
717724
options.current_run_name = args.relative_perf
718725
options.cudnn_directory = args.cudnn_directory
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
# Copyright (C) 2025 Intel Corporation
2+
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
# See LICENSE.TXT
4+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
6+
import os
7+
import shutil
8+
import unittest
9+
import tempfile
10+
import subprocess
11+
import json
12+
from collections import namedtuple
13+
14+
# oneapi has to be installed and sourced for sycl benchmarks tests
15+
16+
DataJson = namedtuple("DataJson", ["runs", "metadata", "tags", "names"])
17+
DataJsonRun = namedtuple("DataJsonRun", ["name", "results"])
18+
DataJsonResult = namedtuple(
19+
"DataJsonResult", ["name", "label", "suite", "value", "unit"]
20+
)
21+
22+
class App:
23+
def __init__(self):
24+
self.OUTPUT_DIR = None
25+
self.RESULTS_DIR = None
26+
self.WORKDIR_DIR = None
27+
28+
def prepare_dirs(self):
29+
self.OUTPUT_DIR = tempfile.mkdtemp()
30+
self.RESULTS_DIR = tempfile.mkdtemp()
31+
self.WORKDIR_DIR = tempfile.mkdtemp()
32+
33+
# when UT does not want to build compute-benchmarks from scratch, it can provide prebuilt path
34+
cb_targetpath = os.environ.get("COMPUTE_BENCHMARKS_BUILD_PATH")
35+
if cb_targetpath and os.path.isdir(cb_targetpath):
36+
cb_build_dir = os.path.join(self.WORKDIR_DIR, "compute-benchmarks-build")
37+
os.symlink(cb_targetpath, cb_build_dir)
38+
with open(
39+
os.path.join(self.WORKDIR_DIR, "BENCH_WORKDIR_VERSION"), "w"
40+
) as f:
41+
f.write("2.0") # TODO: take from main.INTERNAL_WORKDIR_VERSION
42+
43+
def remove_dirs(self):
44+
for d in [self.RESULTS_DIR, self.OUTPUT_DIR, self.WORKDIR_DIR]:
45+
if d is not None:
46+
shutil.rmtree(d, ignore_errors=True)
47+
48+
def run_main(self, *args):
49+
50+
# TODO: not yet tested: "--detect-version", "sycl,compute_runtime"
51+
52+
return subprocess.run(
53+
[
54+
"./devops/scripts/benchmarks/main.py",
55+
self.WORKDIR_DIR,
56+
"--sycl",
57+
os.environ.get("CMPLR_ROOT"),
58+
"--save",
59+
"testfile",
60+
"--output-html",
61+
"remote",
62+
"--results-dir",
63+
self.RESULTS_DIR,
64+
"--output-dir",
65+
self.OUTPUT_DIR,
66+
"--preset",
67+
"Minimal",
68+
"--timestamp-override",
69+
"20240102_030405",
70+
"--stddev-threshold",
71+
"999999999.9",
72+
"--exit-on-failure",
73+
*args,
74+
]
75+
)
76+
77+
def get_output(self):
78+
with open(os.path.join(self.OUTPUT_DIR, "data.json")) as f:
79+
out = json.load(f)
80+
return DataJson(
81+
runs=[
82+
DataJsonRun(
83+
name=run["name"],
84+
results=[
85+
DataJsonResult(
86+
name=r["name"],
87+
label=r["label"],
88+
suite=r["suite"],
89+
value=r["value"],
90+
unit=r["unit"],
91+
)
92+
for r in run["results"]
93+
],
94+
)
95+
for run in out["benchmarkRuns"]
96+
],
97+
metadata=out["benchmarkMetadata"],
98+
tags=out["benchmarkTags"],
99+
names=out["defaultCompareNames"],
100+
)
101+
102+
103+
# add "--verbose" for debug logs
104+
105+
106+
class TestE2E(unittest.TestCase):
107+
def setUp(self):
108+
# Load test data
109+
self.app = App()
110+
self.app.remove_dirs()
111+
self.app.prepare_dirs()
112+
113+
# clean directory with input, output
114+
115+
def tearDown(self):
116+
self.app.remove_dirs()
117+
118+
def test_record_and_replay(self):
119+
caseName = "L0 RecordGraph AppendCopy 1, AppendKern 10, CmdSetsInLvl 10, ForksInLvl 2, Instantiations 10, Lvls 4, Rec"
120+
run_result = self.app.run_main("--filter", caseName + "$")
121+
self.assertEqual(run_result.returncode, 0, "Subprocess did not exit cleanly")
122+
123+
out = self.app.get_output()
124+
125+
self.assertIn(caseName, [r.name for r in out.runs[0].results])
126+
127+
metadata = out.metadata[caseName]
128+
self.assertEqual(metadata["type"], "benchmark")
129+
self.assertEqual(set(metadata["tags"]), {"L0"})
130+
131+
def test_submit_kernel(self):
132+
caseName = "SubmitKernel out of order with measure completion KernelExecTime=20"
133+
run_result = self.app.run_main("--filter", caseName + "$")
134+
self.assertEqual(run_result.returncode, 0, "Subprocess did not exit cleanly")
135+
136+
out = self.app.get_output()
137+
138+
testName = "api_overhead_benchmark_l0 " + caseName
139+
self.assertIn(testName, [r.name for r in out.runs[0].results])
140+
141+
metadata = out.metadata[testName]
142+
self.assertEqual(metadata["type"], "benchmark")
143+
self.assertEqual(set(metadata["tags"]), {"L0", "latency", "micro", "submit"})
144+
145+
146+
if __name__ == "__main__":
147+
unittest.main()

devops/scripts/benchmarks/utils/utils.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,7 @@ def run(
104104

105105
return result
106106
except subprocess.CalledProcessError as e:
107-
if e.stdout and e.stdout.decode().strip():
108-
log.error(e.stdout.decode())
109-
if e.stderr and e.stderr.decode().strip():
110-
log.error(e.stderr.decode())
107+
log.error(f"{" ".join(e.cmd)}\nSTDOUT:{e.stdout.decode().strip()}\nSTDERR:{e.stderr.decode().strip()}")
111108
raise
112109

113110

0 commit comments

Comments
 (0)