Skip to content

Commit 578087e

Browse files
authored
[Frontend] Pass pre-created socket to uvicorn (#13113)
1 parent fa253f1 commit 578087e

File tree

3 files changed

+17
-6
lines changed

3 files changed

+17
-6
lines changed

vllm/entrypoints/api_server.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ async def run_server(args: Namespace,
127127

128128
shutdown_task = await serve_http(
129129
app,
130+
sock=None,
130131
host=args.host,
131132
port=args.port,
132133
log_level=args.log_level,

vllm/entrypoints/launcher.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22

33
import asyncio
44
import signal
5+
import socket
56
from http import HTTPStatus
6-
from typing import Any
7+
from typing import Any, Optional
78

89
import uvicorn
910
from fastapi import FastAPI, Request, Response
@@ -17,7 +18,8 @@
1718
logger = init_logger(__name__)
1819

1920

20-
async def serve_http(app: FastAPI, **uvicorn_kwargs: Any):
21+
async def serve_http(app: FastAPI, sock: Optional[socket.socket],
22+
**uvicorn_kwargs: Any):
2123
logger.info("Available routes are:")
2224
for route in app.routes:
2325
methods = getattr(route, "methods", None)
@@ -34,7 +36,8 @@ async def serve_http(app: FastAPI, **uvicorn_kwargs: Any):
3436

3537
loop = asyncio.get_running_loop()
3638

37-
server_task = loop.create_task(server.serve())
39+
server_task = loop.create_task(
40+
server.serve(sockets=[sock] if sock else None))
3841

3942
def signal_handler() -> None:
4043
# prevents the uvicorn signal handler to exit early

vllm/entrypoints/openai/api_server.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import re
1111
import signal
1212
import socket
13-
import sys
1413
import tempfile
1514
import uuid
1615
from argparse import Namespace
@@ -831,6 +830,7 @@ def create_server_socket(addr: Tuple[str, int]) -> socket.socket:
831830

832831
sock = socket.socket(family=family, type=socket.SOCK_STREAM)
833832
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
833+
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
834834
sock.bind(addr)
835835

836836
return sock
@@ -878,8 +878,17 @@ def signal_handler(*_) -> None:
878878
model_config = await engine_client.get_model_config()
879879
await init_app_state(engine_client, model_config, app.state, args)
880880

881+
def _listen_addr(a: str) -> str:
882+
if is_valid_ipv6_address(a):
883+
return '[' + a + ']'
884+
return a or "0.0.0.0"
885+
886+
logger.info("Starting vLLM API server on http://%s:%d",
887+
_listen_addr(sock_addr[0]), sock_addr[1])
888+
881889
shutdown_task = await serve_http(
882890
app,
891+
sock=sock,
883892
host=args.host,
884893
port=args.port,
885894
log_level=args.uvicorn_log_level,
@@ -888,8 +897,6 @@ def signal_handler(*_) -> None:
888897
ssl_certfile=args.ssl_certfile,
889898
ssl_ca_certs=args.ssl_ca_certs,
890899
ssl_cert_reqs=args.ssl_cert_reqs,
891-
# Workaround to work on macOS
892-
fd=sock.fileno() if sys.platform.startswith("darwin") else None,
893900
**uvicorn_kwargs,
894901
)
895902

0 commit comments

Comments
 (0)