Skip to content

Commit cba9f52

Browse files
jinqinnfhl2000
authored andcommitted
[Misc] Configurable timeout for execute_model RPC calls via env var (vllm-project#19544)
Signed-off-by: jinqinn <goodqinjin@163.com> Signed-off-by: fhl <2410591650@qq.com>
1 parent 6dc86aa commit cba9f52

File tree

2 files changed

+12
-11
lines changed

2 files changed

+12
-11
lines changed

vllm/envs.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@
130130
VLLM_TOOL_PARSE_REGEX_TIMEOUT_SECONDS: int = 1
131131
VLLM_SLEEP_WHEN_IDLE: bool = False
132132
VLLM_MQ_MAX_CHUNK_BYTES_MB: int = 16
133+
VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS: int = 300
133134
VLLM_KV_CACHE_LAYOUT: Optional[str] = None
134135
VLLM_COMPUTE_NANS_IN_LOGITS: bool = False
135136

@@ -897,6 +898,11 @@ def get_vllm_port() -> Optional[int]:
897898
"VLLM_MQ_MAX_CHUNK_BYTES_MB":
898899
lambda: int(os.getenv("VLLM_MQ_MAX_CHUNK_BYTES_MB", "16")),
899900

901+
# Timeout in seconds for execute_model RPC calls in multiprocessing
902+
# executor (only applies when TP > 1).
903+
"VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS":
904+
lambda: int(os.getenv("VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS", "300")),
905+
900906
# KV Cache layout used throughout vllm.
901907
# Some common values are:
902908
# - NHD

vllm/v1/executor/multiproc_executor.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,6 @@
3737

3838
logger = init_logger(__name__)
3939

40-
POLLING_TIMEOUT_MS = 5000
41-
POLLING_TIMEOUT_S = POLLING_TIMEOUT_MS // 1000
42-
43-
EXECUTE_MODEL_TIMEOUT_S = 300
44-
4540

4641
class MultiprocExecutor(Executor):
4742

@@ -160,12 +155,12 @@ def execute_model(
160155
self,
161156
scheduler_output,
162157
) -> Union[ModelRunnerOutput, Future[ModelRunnerOutput]]:
163-
(output, ) = self.collective_rpc("execute_model",
164-
args=(scheduler_output, ),
165-
unique_reply_rank=self.output_rank,
166-
non_block=self.max_concurrent_batches
167-
> 1,
168-
timeout=EXECUTE_MODEL_TIMEOUT_S)
158+
(output, ) = self.collective_rpc(
159+
"execute_model",
160+
args=(scheduler_output, ),
161+
unique_reply_rank=self.output_rank,
162+
non_block=self.max_concurrent_batches > 1,
163+
timeout=envs.VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS)
169164
return output
170165

171166
def collective_rpc(self,

0 commit comments

Comments
 (0)