Skip to content

Commit 7685201

Browse files
authored
[MISC] Rename the torch profiler filename as instance_id+rank_id for merging the Profiler results of each Rank (#25867)
Signed-off-by: wang.yuqi <noooop@126.com>
1 parent 82e64c7 commit 7685201

File tree

3 files changed

+8
-2
lines changed

3 files changed

+8
-2
lines changed

vllm/config/vllm.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import hashlib
66
import json
77
import os
8+
import time
89
from contextlib import contextmanager
910
from dataclasses import field, replace
1011
from functools import lru_cache
@@ -270,6 +271,9 @@ def with_hf_config(
270271
def __post_init__(self):
271272
"""Verify configs are valid & consistent with each other."""
272273

274+
# To give each torch profile run a unique instance name.
275+
self.instance_id = f"{time.time_ns()}"
276+
273277
self.try_verify_and_update_config()
274278

275279
if self.model_config is not None:

vllm/v1/worker/gpu_worker.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ def __init__(
7979
# VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
8080
if envs.VLLM_TORCH_PROFILER_DIR:
8181
torch_profiler_trace_dir = envs.VLLM_TORCH_PROFILER_DIR
82+
worker_name = f"{vllm_config.instance_id}-rank-{self.rank}"
8283
logger.info(
8384
"Profiling enabled. Traces will be saved to: %s",
8485
torch_profiler_trace_dir,
@@ -101,7 +102,7 @@ def __init__(
101102
with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK,
102103
with_flops=envs.VLLM_TORCH_PROFILER_WITH_FLOPS,
103104
on_trace_ready=torch.profiler.tensorboard_trace_handler(
104-
torch_profiler_trace_dir, use_gzip=True
105+
torch_profiler_trace_dir, worker_name=worker_name, use_gzip=True
105106
),
106107
)
107108
else:

vllm/v1/worker/xpu_worker.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def __init__(
3939
# VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
4040
if envs.VLLM_TORCH_PROFILER_DIR:
4141
torch_profiler_trace_dir = envs.VLLM_TORCH_PROFILER_DIR
42+
worker_name = f"{vllm_config.instance_id}-rank-{self.rank}"
4243
logger.info(
4344
"Profiling enabled. Traces will be saved to: %s",
4445
torch_profiler_trace_dir,
@@ -61,7 +62,7 @@ def __init__(
6162
with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK,
6263
with_flops=envs.VLLM_TORCH_PROFILER_WITH_FLOPS,
6364
on_trace_ready=torch.profiler.tensorboard_trace_handler(
64-
torch_profiler_trace_dir, use_gzip=True
65+
torch_profiler_trace_dir, worker_name=worker_name, use_gzip=True
6566
),
6667
)
6768
else:

0 commit comments

Comments
 (0)