diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index 14ce820cc39e..00ceb7d3d0c4 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -120,8 +120,9 @@ def __init__( executor_class=executor_class, log_stats=self.log_stats, ) - for stat_logger in self.stat_loggers[0]: - stat_logger.log_engine_initialized() + if self.stat_loggers: + for stat_logger in self.stat_loggers[0]: + stat_logger.log_engine_initialized() self.output_handler: Optional[asyncio.Task] = None try: # Start output handler eagerly if we are in the asyncio eventloop. diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py index 0d5d92f72537..91a0a75a3081 100644 --- a/vllm/v1/engine/core_client.py +++ b/vllm/v1/engine/core_client.py @@ -442,9 +442,10 @@ def _wait_for_engine_startup(self): logger.info("Core engine process %d ready.", eng_id) identities.discard(eng_id) # Setup KV cache config with initialization state from - # engine core process. - self.vllm_config.cache_config.num_gpu_blocks = message_dict[ - 'num_gpu_blocks'] + # engine core process. Sum values from all engines in DP case. + num_gpu_blocks = self.vllm_config.cache_config.num_gpu_blocks or 0 + num_gpu_blocks += message_dict['num_gpu_blocks'] + self.vllm_config.cache_config.num_gpu_blocks = num_gpu_blocks def _init_core_engines( self,