From f714b51e27ec4d1cd04c67545856100baf28930d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniele=20Trifir=C3=B2?= Date: Tue, 17 Sep 2024 16:36:43 +0200 Subject: [PATCH] [Bugfix] fix server startup for embedding models/in-process frontend https://github.com/vllm-project/vllm/pull/8491#issuecomment-2355950899 --- vllm/entrypoints/openai/api_server.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 5078a2654eb22..f76789d4b3a17 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -526,8 +526,11 @@ async def run_server(args, **uvicorn_kwargs) -> None: logger.info("vLLM API server version %s", VLLM_VERSION) logger.info("args: %s", args) - temp_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - temp_socket.bind(("", args.port)) + # workaround to make sure that we bind the port before the engine is set up. + # This avoids race conditions with ray. + # see https://github.com/vllm-project/vllm/issues/8204 + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.bind(("", args.port)) def signal_handler(*_) -> None: # Interrupt server on sigterm while initializing @@ -541,8 +544,6 @@ def signal_handler(*_) -> None: model_config = await engine_client.get_model_config() init_app_state(engine_client, model_config, app.state, args) - temp_socket.close() - shutdown_task = await serve_http( app, host=args.host, @@ -553,6 +554,7 @@ def signal_handler(*_) -> None: ssl_certfile=args.ssl_certfile, ssl_ca_certs=args.ssl_ca_certs, ssl_cert_reqs=args.ssl_cert_reqs, + fd=sock.fileno(), **uvicorn_kwargs, )