From 6f0348cbec36e023b3428672db95eb812cf0e69c Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Mon, 1 Jul 2024 16:17:58 -0700 Subject: [PATCH] [BugFix] Ensure worker model loop is always stopped at the right time (#5987) --- vllm/engine/llm_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index f7e38c0e6b948..5886ebc24253a 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -838,7 +838,7 @@ def step(self) -> List[Union[RequestOutput, EmbeddingRequestOutput]]: # Tracing self.do_tracing(scheduler_outputs) - if not request_outputs: + if not self.has_unfinished_requests(): # Stop the execute model loop in parallel workers until there are # more requests to process. This avoids waiting indefinitely in # torch.distributed ops which may otherwise timeout, and unblocks