diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index f7e38c0e6b948..5886ebc24253a 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -838,7 +838,7 @@ def step(self) -> List[Union[RequestOutput, EmbeddingRequestOutput]]: # Tracing self.do_tracing(scheduler_outputs) - if not request_outputs: + if not self.has_unfinished_requests(): # Stop the execute model loop in parallel workers until there are # more requests to process. This avoids waiting indefinitely in # torch.distributed ops which may otherwise timeout, and unblocks