@@ -196,9 +196,11 @@ def benchmarks(self) -> list[Benchmark]:
196
196
# Add UR-specific benchmarks
197
197
if options .ur is not None :
198
198
benches += [
199
- MemcpyExecute (self , 400 , 1 , 102400 , 10 , 1 , 1 , 1 ),
200
- MemcpyExecute (self , 400 , 1 , 102400 , 10 , 0 , 1 , 1 ),
201
- MemcpyExecute (self , 4096 , 4 , 1024 , 10 , 0 , 1 , 0 ),
199
+ MemcpyExecute (self , 400 , 1 , 102400 , 10 , 1 , 1 , 1 , 1 ),
200
+ MemcpyExecute (self , 400 , 1 , 102400 , 10 , 0 , 1 , 1 , 1 ),
201
+ MemcpyExecute (self , 100 , 4 , 102400 , 10 , 1 , 1 , 0 , 1 ),
202
+ MemcpyExecute (self , 100 , 4 , 102400 , 10 , 1 , 1 , 0 , 0 ),
203
+ MemcpyExecute (self , 4096 , 4 , 1024 , 10 , 0 , 1 , 0 , 1 ),
202
204
UsmMemoryAllocation (self , RUNTIMES .UR , "Device" , 256 , "Both" ),
203
205
UsmMemoryAllocation (self , RUNTIMES .UR , "Device" , 256 * 1024 , "Both" ),
204
206
UsmBatchMemoryAllocation (self , RUNTIMES .UR , "Device" , 128 , 256 , "Both" ),
@@ -538,6 +540,7 @@ def __init__(
538
540
srcUSM ,
539
541
dstUSM ,
540
542
useEvent ,
543
+ useCopyOffload ,
541
544
):
542
545
self .numOpsPerThread = numOpsPerThread
543
546
self .numThreads = numThreads
@@ -546,22 +549,31 @@ def __init__(
546
549
self .srcUSM = srcUSM
547
550
self .dstUSM = dstUSM
548
551
self .useEvents = useEvent
552
+ self .useCopyOffload = useCopyOffload
549
553
super ().__init__ (bench , "multithread_benchmark_ur" , "MemcpyExecute" )
550
554
555
+ def extra_env_vars (self ) -> dict :
556
+ if not self .useCopyOffload :
557
+ return {"UR_L0_V2_FORCE_DISABLE_COPY_OFFLOAD" : "1" }
558
+ else :
559
+ return {}
560
+
551
561
def name (self ):
552
562
return (
553
563
f"multithread_benchmark_ur MemcpyExecute opsPerThread:{ self .numOpsPerThread } , numThreads:{ self .numThreads } , allocSize:{ self .allocSize } srcUSM:{ self .srcUSM } dstUSM:{ self .dstUSM } "
554
564
+ (" without events" if not self .useEvents else "" )
565
+ + (" without copy offload" if not self .useCopyOffload else "" )
555
566
)
556
567
557
568
def description (self ) -> str :
558
569
src_type = "device" if self .srcUSM == 1 else "host"
559
570
dst_type = "device" if self .dstUSM == 1 else "host"
560
571
events = "with" if self .useEvents else "without"
572
+ copy_offload = "with" if self .useCopyOffload else "without"
561
573
return (
562
574
f"Measures multithreaded memory copy performance with { self .numThreads } threads "
563
575
f"each performing { self .numOpsPerThread } operations on { self .allocSize } bytes "
564
- f"from { src_type } to { dst_type } memory { events } events."
576
+ f"from { src_type } to { dst_type } memory { events } events { copy_offload } driver copy offload ."
565
577
)
566
578
567
579
def get_tags (self ):
0 commit comments