Skip to content

Commit df44ffc

Browse files
committed
Add "/server_info" endpoint in api_server to retrieve the vllm_config. 
Signed-off-by: Xihui Cang <xihuicang@gmail.com>
1 parent dc1b4a6 commit df44ffc

File tree

1 file changed

+27
-1
lines changed

1 file changed

+27
-1
lines changed

vllm/entrypoints/openai/api_server.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import asyncio
44
import atexit
5+
import dataclasses
56
import gc
67
import importlib
78
import inspect
@@ -30,7 +31,7 @@
3031
from typing_extensions import assert_never
3132

3233
import vllm.envs as envs
33-
from vllm.config import ModelConfig
34+
from vllm.config import ModelConfig, VllmConfig
3435
from vllm.engine.arg_utils import AsyncEngineArgs
3536
from vllm.engine.async_llm_engine import AsyncLLMEngine # type: ignore
3637
from vllm.engine.multiprocessing.client import MQLLMEngineClient
@@ -104,6 +105,20 @@
104105
_running_tasks: set[asyncio.Task] = set()
105106

106107

108+
# Store global states
109+
@dataclasses.dataclass
110+
class _GlobalState:
111+
vllmconfig: VllmConfig
112+
113+
114+
_global_state: Optional[_GlobalState] = None
115+
116+
117+
def set_global_state(global_state: _GlobalState):
118+
global _global_state
119+
_global_state = global_state
120+
121+
107122
@asynccontextmanager
108123
async def lifespan(app: FastAPI):
109124
try:
@@ -165,6 +180,7 @@ async def build_async_engine_client_from_engine_args(
165180
usage_context = UsageContext.OPENAI_API_SERVER
166181
vllm_config = engine_args.create_engine_config(usage_context=usage_context)
167182

183+
set_global_state(_GlobalState(vllmconfig=vllm_config))
168184
# V1 AsyncLLM.
169185
if envs.VLLM_USE_V1:
170186
if disable_frontend_multiprocessing:
@@ -327,6 +343,7 @@ def mount_metrics(app: FastAPI):
327343
"/load",
328344
"/ping",
329345
"/version",
346+
"/server_info",
330347
],
331348
registry=registry,
332349
).add().instrument(app).expose(app)
@@ -462,6 +479,15 @@ async def show_version():
462479
return JSONResponse(content=ver)
463480

464481

482+
@router.get("/server_info")
483+
async def show_server_info():
484+
if _global_state is None:
485+
server_info = {"vllm_config": "Vllm Config not available"}
486+
else:
487+
server_info = {"vllm_config": str(_global_state.vllmconfig)}
488+
return JSONResponse(content=server_info)
489+
490+
465491
@router.post("/v1/chat/completions",
466492
dependencies=[Depends(validate_json_request)])
467493
@with_cancellation

0 commit comments

Comments
 (0)