@@ -241,14 +241,17 @@ def pick_comm_bw_(trace_data, comm_bw_data):
241241 ]
242242 for evt in nccl_events :
243243 knl_name = evt ["name" ][: evt ["name" ].index ("(" )]
244+ coll_name = evt ["args" ]["Collective name" ]
244245 data_size = _calculate_event_data_size (evt )
245246 ranks_count = evt ["args" ]["Group size" ]
246247
247248 ranks = _parse_ranks (evt ["args" ]["Process Group Ranks" ], ranks_count )
248249 pg_id = int (evt ["args" ]["Process Group Name" ])
249250 pg = (* ranks , pg_id ) if ranks and rank == min (ranks ) else None
250251
251- comm_bw_data [(knl_name , data_size , ranks_count )].append (
252+ # TODO: calculation of unbalanced all2all bw needs to be improved
253+ # all2all is implemented by single ncclDevKernel_SendRecv() in NCCL
254+ comm_bw_data [(knl_name , coll_name , data_size , ranks_count )].append (
252255 [
253256 evt ["dur" ],
254257 evt ["args" ]["algbw (GB/sec)" ],
@@ -331,25 +334,25 @@ def analyze_profiler_trace(trace_dir: str, report_dir: str):
331334 )
332335
333336 f .write (
334- f'\n { " " :>70s } |{ " " :>5s} |{ "AVG." :^19s} |{ "p01" :^8s} |{ "p50" :^8s} |{ "p90" :^8s} |{ "p99" :^8s} |\n '
337+ f'\n { " " :>86s } |{ " " :>5s} |{ "AVG." :^19s} |{ "p01" :^8s} |{ "p50" :^8s} |{ "p90" :^8s} |{ "p99" :^8s} |\n '
335338 )
336339
337340 f .write (
338- f'{ "kernel" :>50s} { "size" :>12s} { "#rks" :>6s} |{ "#pgs" :>5s} |{ " dur" :>10s} '
341+ f'{ "kernel" :>50s} { "coll" :>15s } { " size" :>12s} { "#rks" :>6s} |{ "#pgs" :>5s} |{ " dur" :>10s} '
339342 )
340343 for _ in range (5 ): # average, p01, p50, p90, p99
341344 f .write (f'{ " busbw" :>8s} |' )
342345 f .write ("\n " )
343346
344347 f .write (
345- f'{ " " :>50s } { " (B)" :>12s} { " " :>6s} |{ " " :>5s} |{ " (ms)" :>10s} '
348+ f'{ " " :>66s } { " (B)" :>12s} { " " :>6s} |{ " " :>5s} |{ " (ms)" :>10s} '
346349 )
347350 for _ in range (5 ): # average, p50, p90, p99
348351 f .write (f'{ "(GB/s)" :>8s} |' )
349352 f .write ("\n " )
350353
351354 for k , v in comm_bw_summary .items ():
352- f .write (f"{ k [0 ]:>50s} { k [1 ]:>12d } { k [2 ]:>6d} |{ v [0 ]:>5d} |{ v [1 ]/ 1e3 :>10.3f} " )
355+ f .write (f"{ k [0 ]:>50s} { k [1 ]:>15s } { k [2 ]:>12d } { k [ 3 ]:>6d} |{ v [0 ]:>5d} |{ v [1 ]/ 1e3 :>10.3f} " )
353356 for i in range (2 , len (v )):
354357 f .write (f"{ v [i ]:>8.2f} |" )
355358 f .write ("\n " )
0 commit comments