Skip to content

Commit 5c4ddd5

Browse files
Enable launcher benchmarks in CI for XPU (#5568)
Addresses #5028 --------- Co-authored-by: Anatoly Myachev <[email protected]>
1 parent 983aa34 commit 5c4ddd5

File tree

3 files changed

+43
-11
lines changed

3 files changed

+43
-11
lines changed

.github/workflows/third-party-benchmarks.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,18 @@ jobs:
223223
--max-new-tokens $MAX_NEW_TOKENS \
224224
--batch-size $BATCH_SIZE
225225
226+
- name: Run launch microbenchmark tests
227+
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'launch_micro_benchmarks')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'launch_micro_benchmarks') }}
228+
run: |
229+
source scripts/capture-hw-details.sh
230+
python python/test/microbenchmark/launch_overhead.py --reports $REPORTS
231+
232+
python benchmarks/third_party/vllm/transform_results.py $REPORTS/launch_overhead_results.csv $REPORTS/launch_overhead-report.csv \
233+
--tag $TAG \
234+
--bgroup overhead \
235+
--benchmark launch-overhead \
236+
--param_cols="input_type"
237+
226238
- name: Upload benchmark reports
227239
if: ${{ steps.install.outcome == 'success' && !cancelled() }}
228240
uses: actions/upload-artifact@v5

benchmarks/third_party/vllm/transform_results.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def serialize_params(row):
4646

4747
dfs = []
4848
for compiler_name in compilers:
49-
for value_name in ['TFlops', 'GB/s']:
49+
for value_name in ['TFlops', 'GB/s', 'time_us']:
5050
col = f'{compiler_name}-{value_name}'
5151
if col not in df.columns:
5252
continue

python/test/microbenchmark/launch_overhead.py

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22
Original code by @bertmaher; profiling added by @apgoucher
33
"""
44

5+
import argparse
56
import cProfile
7+
import csv
8+
import os
69
import pstats
710
import time
811

@@ -42,11 +45,11 @@ def nop_args(
4245
def do_bench_walltime(fn):
4346
print("Compiling...")
4447
fn()
45-
torch.cuda.synchronize()
48+
torch.xpu.synchronize()
4649

4750
for _ in range(1000):
4851
fn()
49-
torch.cuda.synchronize()
52+
torch.xpu.synchronize()
5053

5154
n_repeat = 10000
5255

@@ -55,11 +58,11 @@ def do_bench_walltime(fn):
5558
for _ in range(25):
5659
print("Running %d benchmarking iterations..." % n_repeat)
5760
# Benchmark
58-
torch.cuda.synchronize()
61+
torch.xpu.synchronize()
5962
start_time = time.time()
6063
for _ in range(n_repeat):
6164
fn()
62-
torch.cuda.synchronize()
65+
torch.xpu.synchronize()
6366
end_time = time.time()
6467
wall_time_ms = (end_time - start_time) * 1e3 / n_repeat
6568
mses.append(wall_time_ms)
@@ -71,19 +74,19 @@ def do_bench_walltime(fn):
7174
profile.enable()
7275
for _ in range(n_repeat):
7376
fn()
74-
torch.cuda.synchronize()
77+
torch.xpu.synchronize()
7578
profile.disable()
7679
stats = pstats.Stats(profile)
7780
stats.sort_stats("time")
7881
stats.print_stats()
7982
return mses
8083

8184

82-
def main(use_tensor_desc: bool):
85+
def main(use_tensor_desc: bool, reports_dir: str = None):
8386
if use_tensor_desc:
84-
targs = [TensorDescriptor.from_tensor(torch.zeros(1, 16, device="cuda"), block_shape=[1, 16]) for _ in range(5)]
87+
targs = [TensorDescriptor.from_tensor(torch.zeros(1, 16, device="xpu"), block_shape=[1, 16]) for _ in range(5)]
8588
else:
86-
targs = [torch.zeros(1, device="cuda") for _ in range(5)]
89+
targs = [torch.zeros(1, device="xpu") for _ in range(5)]
8790
ncargs = [0, 1, 1024, 2**31 - 1, 2**64 - 1, False, True, None, (16, 16)]
8891
cargs = [32, False, True, 0, 64]
8992

@@ -94,9 +97,26 @@ def main(use_tensor_desc: bool):
9497
print(usecs)
9598
print(sorted(usecs)[len(usecs) >> 1])
9699

100+
if reports_dir:
101+
os.makedirs(reports_dir, exist_ok=True)
102+
csv_path = os.path.join(reports_dir, "launch_overhead_results.csv")
103+
file_exists = os.path.exists(csv_path)
104+
105+
with open(csv_path, "a", newline="") as csvfile:
106+
writer = csv.writer(csvfile)
107+
if not file_exists:
108+
writer.writerow(["input_type", "triton-time_us"])
109+
110+
input_type = "TensorDescriptor" if use_tensor_desc else "Tensor"
111+
writer.writerow([input_type, round(sorted(usecs)[len(usecs) >> 1], 2)])
112+
97113

98114
if __name__ == "__main__":
115+
parser = argparse.ArgumentParser(description="Benchmark launch overhead for Triton kernels")
116+
parser.add_argument("--reports", type=str, default=None, help="Path to directory for CSV reports")
117+
args = parser.parse_args()
118+
99119
print("launch overhead of kernel with Tensor inputs")
100-
main(use_tensor_desc=False)
120+
main(use_tensor_desc=False, reports_dir=args.reports)
101121
print("launch overhead of kernel with TensorDescriptor inputs")
102-
main(use_tensor_desc=True)
122+
main(use_tensor_desc=True, reports_dir=args.reports)

0 commit comments

Comments
 (0)