From 6f0348cbec36e023b3428672db95eb812cf0e69c Mon Sep 17 00:00:00 2001
From: Nick Hill <nickhill@us.ibm.com>
Date: Mon, 1 Jul 2024 16:17:58 -0700
Subject: [PATCH] [BugFix] Ensure worker model loop is always stopped at the
 right time (#5987)

---
 vllm/engine/llm_engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index f7e38c0e6b948..5886ebc24253a 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -838,7 +838,7 @@ def step(self) -> List[Union[RequestOutput, EmbeddingRequestOutput]]:
         # Tracing
         self.do_tracing(scheduler_outputs)
 
-        if not request_outputs:
+        if not self.has_unfinished_requests():
             # Stop the execute model loop in parallel workers until there are
             # more requests to process. This avoids waiting indefinitely in
             # torch.distributed ops which may otherwise timeout, and unblocks