Skip to content

Commit 263ac16

Browse files
committed
refine profiler report to differentiate SendReceive in all2all
1 parent 3fe56f5 commit 263ac16

File tree

1 file changed

+8
-5
lines changed

1 file changed

+8
-5
lines changed

et_replay/comm/profiler_trace_analysis.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -241,14 +241,17 @@ def pick_comm_bw_(trace_data, comm_bw_data):
241241
]
242242
for evt in nccl_events:
243243
knl_name = evt["name"][: evt["name"].index("(")]
244+
coll_name = evt["args"]["Collective name"]
244245
data_size = _calculate_event_data_size(evt)
245246
ranks_count = evt["args"]["Group size"]
246247

247248
ranks = _parse_ranks(evt["args"]["Process Group Ranks"], ranks_count)
248249
pg_id = int(evt["args"]["Process Group Name"])
249250
pg = (*ranks, pg_id) if ranks and rank == min(ranks) else None
250251

251-
comm_bw_data[(knl_name, data_size, ranks_count)].append(
252+
# TODO: calculation of unbalanced all2all bw needs to be improved
253+
# all2all is implemented by single ncclDevKernel_SendRecv() in NCCL
254+
comm_bw_data[(knl_name, coll_name, data_size, ranks_count)].append(
252255
[
253256
evt["dur"],
254257
evt["args"]["algbw (GB/sec)"],
@@ -331,25 +334,25 @@ def analyze_profiler_trace(trace_dir: str, report_dir: str):
331334
)
332335

333336
f.write(
334-
f'\n{" ":>70s}|{" ":>5s}|{"AVG.":^19s}|{"p01":^8s}|{"p50":^8s}|{"p90":^8s}|{"p99":^8s}|\n'
337+
f'\n{" ":>86s}|{" ":>5s}|{"AVG.":^19s}|{"p01":^8s}|{"p50":^8s}|{"p90":^8s}|{"p99":^8s}|\n'
335338
)
336339

337340
f.write(
338-
f'{"kernel":>50s} {"size":>12s} {"#rks":>6s}|{"#pgs":>5s}|{" dur":>10s} '
341+
f'{"kernel":>50s} {"coll":>15s} {"size":>12s} {"#rks":>6s}|{"#pgs":>5s}|{" dur":>10s} '
339342
)
340343
for _ in range(5): # average, p01, p50, p90, p99
341344
f.write(f'{" busbw":>8s}|')
342345
f.write("\n")
343346

344347
f.write(
345-
f'{" ":>50s} {" (B)":>12s} {" ":>6s}|{" ":>5s}|{" (ms)":>10s} '
348+
f'{" ":>66s} {" (B)":>12s} {" ":>6s}|{" ":>5s}|{" (ms)":>10s} '
346349
)
347350
for _ in range(5): # average, p50, p90, p99
348351
f.write(f'{"(GB/s)":>8s}|')
349352
f.write("\n")
350353

351354
for k, v in comm_bw_summary.items():
352-
f.write(f"{k[0]:>50s} {k[1]:>12d} {k[2]:>6d}|{v[0]:>5d}|{v[1]/1e3:>10.3f} ")
355+
f.write(f"{k[0]:>50s} {k[1]:>15s} {k[2]:>12d} {k[3]:>6d}|{v[0]:>5d}|{v[1]/1e3:>10.3f} ")
353356
for i in range(2, len(v)):
354357
f.write(f"{v[i]:>8.2f}|")
355358
f.write("\n")

0 commit comments

Comments
 (0)