Skip to content

Commit 5913eb8

Browse files
committed
[BugFix] Fix --disable-log-stats in V1 server mode
Also sum gpu blocks across DP ranks when reporting the num_gpu_blocks metric. Signed-off-by: Nick Hill <nhill@redhat.com>
1 parent 182f40e commit 5913eb8

File tree

2 files changed

+7
-5
lines changed

2 files changed

+7
-5
lines changed

vllm/v1/engine/async_llm.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,9 @@ def __init__(
120120
executor_class=executor_class,
121121
log_stats=self.log_stats,
122122
)
123-
for stat_logger in self.stat_loggers[0]:
124-
stat_logger.log_engine_initialized()
123+
if self.stat_loggers:
124+
for stat_logger in self.stat_loggers[0]:
125+
stat_logger.log_engine_initialized()
125126
self.output_handler: Optional[asyncio.Task] = None
126127
try:
127128
# Start output handler eagerly if we are in the asyncio eventloop.

vllm/v1/engine/core_client.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -442,9 +442,10 @@ def _wait_for_engine_startup(self):
442442
logger.info("Core engine process %d ready.", eng_id)
443443
identities.discard(eng_id)
444444
# Setup KV cache config with initialization state from
445-
# engine core process.
446-
self.vllm_config.cache_config.num_gpu_blocks = message_dict[
447-
'num_gpu_blocks']
445+
# engine core process. Sum values from all engines in DP case.
446+
num_gpu_blocks = self.vllm_config.cache_config.num_gpu_blocks or 0
447+
num_gpu_blocks += message_dict['num_gpu_blocks']
448+
self.vllm_config.cache_config.num_gpu_blocks = num_gpu_blocks
448449

449450
def _init_core_engines(
450451
self,

0 commit comments

Comments
 (0)