@@ -154,12 +154,18 @@ def additional_metadata(self) -> dict[str, BenchmarkMetadata]:
154
154
for completion in ["" , " with completion" ]:
155
155
for events in ["" , " using events" ]:
156
156
for num_kernels in self .submit_graph_num_kernels :
157
- group_name = f"SubmitGraph { order } { completion } { events } , { num_kernels } kernels"
158
- metadata [group_name ] = BenchmarkMetadata (
159
- type = "group" ,
160
- tags = base_metadata .tags ,
161
- )
162
-
157
+ for host_tasks in ["" , " use host tasks" ]:
158
+ group_name = f"SubmitGraph { order } { completion } { events } { host_tasks } , { num_kernels } kernels"
159
+ metadata [group_name ] = BenchmarkMetadata (
160
+ type = "group" ,
161
+ tags = base_metadata .tags ,
162
+ )
163
+ # CPU count variants
164
+ cpu_count_group = f"{ group_name } , CPU count"
165
+ metadata [cpu_count_group ] = BenchmarkMetadata (
166
+ type = "group" ,
167
+ tags = base_metadata .tags ,
168
+ )
163
169
return metadata
164
170
165
171
def benchmarks (self ) -> list [Benchmark ]:
@@ -1088,6 +1094,22 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
1088
1094
bin_args .append (f"--profilerType={ self .profiler_type .value } " )
1089
1095
return bin_args
1090
1096
1097
+ def get_metadata (self ) -> dict [str , BenchmarkMetadata ]:
1098
+ metadata_dict = super ().get_metadata ()
1099
+
1100
+ # Create CPU count variant with modified display name and explicit_group
1101
+ cpu_count_name = self .name () + " CPU count"
1102
+ cpu_count_metadata = copy .deepcopy (metadata_dict [self .name ()])
1103
+ cpu_count_display_name = self .display_name () + ", CPU count"
1104
+ cpu_count_explicit_group = (
1105
+ self .explicit_group () + ", CPU count" if self .explicit_group () else ""
1106
+ )
1107
+ cpu_count_metadata .display_name = cpu_count_display_name
1108
+ cpu_count_metadata .explicit_group = cpu_count_explicit_group
1109
+ metadata_dict [cpu_count_name ] = cpu_count_metadata
1110
+
1111
+ return metadata_dict
1112
+
1091
1113
1092
1114
class UllsEmptyKernel (ComputeBenchmark ):
1093
1115
def __init__ (
0 commit comments