Skip to content

Commit 3aed917

Browse files
waltformeshreyankg
authored andcommitted
[Misc] Accurately capture the time of loading weights (vllm-project#14063)
Signed-off-by: Jun Duan <jun.duan.phd@outlook.com>
1 parent 44bfb06 commit 3aed917

File tree

3 files changed

+13
-2
lines changed

3 files changed

+13
-2
lines changed

vllm/model_executor/model_loader/loader.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import itertools
1111
import math
1212
import os
13+
import time
1314
import warnings
1415
from abc import ABC, abstractmethod
1516
from contextlib import contextmanager
@@ -216,6 +217,9 @@ class Source:
216217
allow_patterns_overrides: Optional[list[str]] = None
217218
"""If defined, weights will load exclusively using these patterns."""
218219

220+
counter_before_loading_weights: float = 0.0
221+
counter_after_loading_weights: float = 0.0
222+
219223
def __init__(self, load_config: LoadConfig):
220224
super().__init__(load_config)
221225
if load_config.model_loader_extra_config:
@@ -368,6 +372,8 @@ def _xla_weights_iterator(iterator: Generator):
368372

369373
weights_iterator = _xla_weights_iterator(weights_iterator)
370374

375+
if self.counter_before_loading_weights == 0.0:
376+
self.counter_before_loading_weights = time.perf_counter()
371377
# Apply the prefix.
372378
return ((source.prefix + name, tensor)
373379
for (name, tensor) in weights_iterator)
@@ -412,6 +418,11 @@ def load_model(self, vllm_config: VllmConfig) -> nn.Module:
412418
weights_to_load = {name for name, _ in model.named_parameters()}
413419
loaded_weights = model.load_weights(
414420
self._get_all_weights(model_config, model))
421+
self.counter_after_loading_weights = time.perf_counter()
422+
logger.info(
423+
"Loading weights took %.2f seconds",
424+
self.counter_after_loading_weights -
425+
self.counter_before_loading_weights)
415426
# We only enable strict check for non-quantized models
416427
# that have loaded weights tracking currently.
417428
if model_config.quantization is None and loaded_weights is not None:

vllm/v1/worker/gpu_model_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1061,7 +1061,7 @@ def load_model(self) -> None:
10611061
self.device)
10621062
time_after_load = time.perf_counter()
10631063
self.model_memory_usage = m.consumed_memory
1064-
logger.info("Loading model weights took %.4f GB and %.6f seconds",
1064+
logger.info("Model loading took %.4f GB and %.6f seconds",
10651065
self.model_memory_usage / float(2**30),
10661066
time_after_load - time_before_load)
10671067

vllm/worker/model_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1114,7 +1114,7 @@ def load_model(self) -> None:
11141114
time_after_load = time.perf_counter()
11151115

11161116
self.model_memory_usage = m.consumed_memory
1117-
logger.info("Loading model weights took %.4f GB and %.6f seconds",
1117+
logger.info("Model loading took %.4f GB and %.6f seconds",
11181118
self.model_memory_usage / float(2**30),
11191119
time_after_load - time_before_load)
11201120

0 commit comments

Comments
 (0)