Skip to content

Commit c752cce

Browse files
authored
[SYCL][Bench] extend compute-benchmark suite (#18222)
Add multithreaded benchmarks with copy offload disabled to track any performance regression for copy offload feature.
1 parent 07649c3 commit c752cce

File tree

1 file changed

+16
-4
lines changed

1 file changed

+16
-4
lines changed

devops/scripts/benchmarks/benches/compute.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -196,9 +196,11 @@ def benchmarks(self) -> list[Benchmark]:
196196
# Add UR-specific benchmarks
197197
if options.ur is not None:
198198
benches += [
199-
MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1),
200-
MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1),
201-
MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0),
199+
MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1, 1),
200+
MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1, 1),
201+
MemcpyExecute(self, 100, 4, 102400, 10, 1, 1, 0, 1),
202+
MemcpyExecute(self, 100, 4, 102400, 10, 1, 1, 0, 0),
203+
MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0, 1),
202204
UsmMemoryAllocation(self, RUNTIMES.UR, "Device", 256, "Both"),
203205
UsmMemoryAllocation(self, RUNTIMES.UR, "Device", 256 * 1024, "Both"),
204206
UsmBatchMemoryAllocation(self, RUNTIMES.UR, "Device", 128, 256, "Both"),
@@ -538,6 +540,7 @@ def __init__(
538540
srcUSM,
539541
dstUSM,
540542
useEvent,
543+
useCopyOffload,
541544
):
542545
self.numOpsPerThread = numOpsPerThread
543546
self.numThreads = numThreads
@@ -546,22 +549,31 @@ def __init__(
546549
self.srcUSM = srcUSM
547550
self.dstUSM = dstUSM
548551
self.useEvents = useEvent
552+
self.useCopyOffload = useCopyOffload
549553
super().__init__(bench, "multithread_benchmark_ur", "MemcpyExecute")
550554

555+
def extra_env_vars(self) -> dict:
556+
if not self.useCopyOffload:
557+
return {"UR_L0_V2_FORCE_DISABLE_COPY_OFFLOAD": "1"}
558+
else:
559+
return {}
560+
551561
def name(self):
552562
return (
553563
f"multithread_benchmark_ur MemcpyExecute opsPerThread:{self.numOpsPerThread}, numThreads:{self.numThreads}, allocSize:{self.allocSize} srcUSM:{self.srcUSM} dstUSM:{self.dstUSM}"
554564
+ (" without events" if not self.useEvents else "")
565+
+ (" without copy offload" if not self.useCopyOffload else "")
555566
)
556567

557568
def description(self) -> str:
558569
src_type = "device" if self.srcUSM == 1 else "host"
559570
dst_type = "device" if self.dstUSM == 1 else "host"
560571
events = "with" if self.useEvents else "without"
572+
copy_offload = "with" if self.useCopyOffload else "without"
561573
return (
562574
f"Measures multithreaded memory copy performance with {self.numThreads} threads "
563575
f"each performing {self.numOpsPerThread} operations on {self.allocSize} bytes "
564-
f"from {src_type} to {dst_type} memory {events} events."
576+
f"from {src_type} to {dst_type} memory {events} events {copy_offload} driver copy offload."
565577
)
566578

567579
def get_tags(self):

0 commit comments

Comments
 (0)