From 47f5e03b5b9fc719b7e5ee00cbd6d1e79627f105 Mon Sep 17 00:00:00 2001 From: Kevin Lin <42618777+kevin314@users.noreply.github.com> Date: Mon, 16 Sep 2024 15:56:28 -0500 Subject: [PATCH] [Bugfix] Bind api server port before starting engine (#8491) --- vllm/entrypoints/openai/api_server.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index b50fc6a265f8d..3d1d832986c1e 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -5,6 +5,7 @@ import os import re import signal +import socket import tempfile from argparse import Namespace from contextlib import asynccontextmanager @@ -525,6 +526,9 @@ async def run_server(args, **uvicorn_kwargs) -> None: logger.info("vLLM API server version %s", VLLM_VERSION) logger.info("args: %s", args) + temp_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + temp_socket.bind(("", args.port)) + def signal_handler(*_) -> None: # Interrupt server on sigterm while initializing raise KeyboardInterrupt("terminated") @@ -541,6 +545,8 @@ def signal_handler(*_) -> None: model_config = await async_engine_client.get_model_config() init_app_state(async_engine_client, model_config, app.state, args) + temp_socket.close() + shutdown_task = await serve_http( app, limit_concurrency=async_engine_client.limit_concurrency,