@@ -164,12 +164,18 @@ def additional_metadata(self) -> dict[str, BenchmarkMetadata]:
164
164
for completion in ["" , " with completion" ]:
165
165
for events in ["" , " using events" ]:
166
166
for num_kernels in self .submit_graph_num_kernels :
167
- group_name = f"SubmitGraph { order } { completion } { events } , { num_kernels } kernels"
168
- metadata [group_name ] = BenchmarkMetadata (
169
- type = "group" ,
170
- tags = base_metadata .tags ,
171
- )
172
-
167
+ for host_tasks in ["" , " use host tasks" ]:
168
+ group_name = f"SubmitGraph { order } { completion } { events } { host_tasks } , { num_kernels } kernels"
169
+ metadata [group_name ] = BenchmarkMetadata (
170
+ type = "group" ,
171
+ tags = base_metadata .tags ,
172
+ )
173
+ # CPU count variants
174
+ cpu_count_group = f"{ group_name } , CPU count"
175
+ metadata [cpu_count_group ] = BenchmarkMetadata (
176
+ type = "group" ,
177
+ tags = base_metadata .tags ,
178
+ )
173
179
return metadata
174
180
175
181
def benchmarks (self ) -> list [Benchmark ]:
@@ -1098,6 +1104,22 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
1098
1104
bin_args .append (f"--profilerType={ self .profiler_type .value } " )
1099
1105
return bin_args
1100
1106
1107
+ def get_metadata (self ) -> dict [str , BenchmarkMetadata ]:
1108
+ metadata_dict = super ().get_metadata ()
1109
+
1110
+ # Create CPU count variant with modified display name and explicit_group
1111
+ cpu_count_name = self .name () + " CPU count"
1112
+ cpu_count_metadata = copy .deepcopy (metadata_dict [self .name ()])
1113
+ cpu_count_display_name = self .display_name () + ", CPU count"
1114
+ cpu_count_explicit_group = (
1115
+ self .explicit_group () + ", CPU count" if self .explicit_group () else ""
1116
+ )
1117
+ cpu_count_metadata .display_name = cpu_count_display_name
1118
+ cpu_count_metadata .explicit_group = cpu_count_explicit_group
1119
+ metadata_dict [cpu_count_name ] = cpu_count_metadata
1120
+
1121
+ return metadata_dict
1122
+
1101
1123
1102
1124
class UllsEmptyKernel (ComputeBenchmark ):
1103
1125
def __init__ (
0 commit comments