diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 3d1d832986c1e..b50fc6a265f8d 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -5,7 +5,6 @@ import os import re import signal -import socket import tempfile from argparse import Namespace from contextlib import asynccontextmanager @@ -526,9 +525,6 @@ async def run_server(args, **uvicorn_kwargs) -> None: logger.info("vLLM API server version %s", VLLM_VERSION) logger.info("args: %s", args) - temp_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - temp_socket.bind(("", args.port)) - def signal_handler(*_) -> None: # Interrupt server on sigterm while initializing raise KeyboardInterrupt("terminated") @@ -545,8 +541,6 @@ def signal_handler(*_) -> None: model_config = await async_engine_client.get_model_config() init_app_state(async_engine_client, model_config, app.state, args) - temp_socket.close() - shutdown_task = await serve_http( app, limit_concurrency=async_engine_client.limit_concurrency,