diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index 34d0b21cd4896..f9a94e3806873 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -149,15 +149,17 @@ def benchmarks(self) -> list[Benchmark]: for in_order_queue in [0, 1]: for measure_completion in [0, 1]: for use_events in [0, 1]: - benches.append( - SubmitKernel( - self, - runtime, - in_order_queue, - measure_completion, - use_events, + for kernel_exec_time in [1, 20]: + benches.append( + SubmitKernel( + self, + runtime, + in_order_queue, + measure_completion, + use_events, + kernel_exec_time, + ) ) - ) # Add SinKernelGraph benchmarks for runtime in self.enabled_runtimes(): @@ -332,11 +334,20 @@ def teardown(self): class SubmitKernel(ComputeBenchmark): - def __init__(self, bench, runtime: RUNTIMES, ioq, MeasureCompletion=0, UseEvents=0): + def __init__( + self, + bench, + runtime: RUNTIMES, + ioq, + MeasureCompletion=0, + UseEvents=0, + KernelExecTime=1, + ): self.ioq = ioq self.runtime = runtime self.MeasureCompletion = MeasureCompletion self.UseEvents = UseEvents + self.KernelExecTime = KernelExecTime self.NumKernels = 10 super().__init__( bench, f"api_overhead_benchmark_{runtime.value}", "SubmitKernel" @@ -353,7 +364,11 @@ def name(self): # to match the existing already stored results events_str = " not using events" if not self.UseEvents else "" - return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{events_str}" + kernel_exec_time_str = ( + f" KernelExecTime={self.KernelExecTime}" if self.KernelExecTime != 1 else "" + ) + + return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}" def display_name(self) -> str: order = "in order" if self.ioq else "out of order" @@ -362,6 +377,8 @@ def display_name(self) -> str: info.append("with measure completion") if self.UseEvents: info.append("using events") + if self.KernelExecTime != 1: + info.append(f"KernelExecTime={self.KernelExecTime}") additional_info = f" {' '.join(info)}" if info else "" return f"{self.runtime.value.upper()} SubmitKernel {order}{additional_info}, NumKernels {self.NumKernels}" @@ -373,7 +390,11 @@ def explicit_group(self): # to match the existing already stored results events_str = " not using events" if not self.UseEvents else "" - return f"SubmitKernel {order}{completion_str}{events_str}" + kernel_exec_time_str = ( + f" KernelExecTime={self.KernelExecTime}" if self.KernelExecTime != 1 else "" + ) + + return f"SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}" def description(self) -> str: order = "in-order" if self.ioq else "out-of-order" @@ -386,6 +407,7 @@ def description(self) -> str: return ( f"Measures CPU time overhead of submitting {order} kernels through {runtime_name} API{completion_desc}. " f"Runs {self.NumKernels} simple kernels with minimal execution time to isolate API overhead from kernel execution time." + f"Each kernel executes for approximately {self.KernelExecTime} micro seconds." ) def range(self) -> tuple[float, float]: @@ -398,7 +420,7 @@ def bin_args(self) -> list[str]: "--iterations=100000", "--Profiling=0", f"--NumKernels={self.NumKernels}", - "--KernelExecTime=1", + f"--KernelExecTime={self.KernelExecTime}", f"--UseEvents={self.UseEvents}", ]