|
2 | 2 |
|
3 | 3 | import asyncio |
4 | 4 | import atexit |
| 5 | +import dataclasses |
5 | 6 | import gc |
6 | 7 | import importlib |
7 | 8 | import inspect |
|
30 | 31 | from typing_extensions import assert_never |
31 | 32 |
|
32 | 33 | import vllm.envs as envs |
33 | | -from vllm.config import ModelConfig |
| 34 | +from vllm.config import ModelConfig, VllmConfig |
34 | 35 | from vllm.engine.arg_utils import AsyncEngineArgs |
35 | 36 | from vllm.engine.async_llm_engine import AsyncLLMEngine # type: ignore |
36 | 37 | from vllm.engine.multiprocessing.client import MQLLMEngineClient |
|
104 | 105 | _running_tasks: set[asyncio.Task] = set() |
105 | 106 |
|
106 | 107 |
|
| 108 | +# Store global states |
| 109 | +@dataclasses.dataclass |
| 110 | +class _GlobalState: |
| 111 | + vllmconfig: VllmConfig |
| 112 | + |
| 113 | + |
| 114 | +_global_state: Optional[_GlobalState] = None |
| 115 | + |
| 116 | + |
| 117 | +def set_global_state(global_state: _GlobalState): |
| 118 | + global _global_state |
| 119 | + _global_state = global_state |
| 120 | + |
| 121 | + |
107 | 122 | @asynccontextmanager |
108 | 123 | async def lifespan(app: FastAPI): |
109 | 124 | try: |
@@ -165,6 +180,7 @@ async def build_async_engine_client_from_engine_args( |
165 | 180 | usage_context = UsageContext.OPENAI_API_SERVER |
166 | 181 | vllm_config = engine_args.create_engine_config(usage_context=usage_context) |
167 | 182 |
|
| 183 | + set_global_state(_GlobalState(vllmconfig=vllm_config)) |
168 | 184 | # V1 AsyncLLM. |
169 | 185 | if envs.VLLM_USE_V1: |
170 | 186 | if disable_frontend_multiprocessing: |
@@ -327,6 +343,7 @@ def mount_metrics(app: FastAPI): |
327 | 343 | "/load", |
328 | 344 | "/ping", |
329 | 345 | "/version", |
| 346 | + "/server_info", |
330 | 347 | ], |
331 | 348 | registry=registry, |
332 | 349 | ).add().instrument(app).expose(app) |
@@ -462,6 +479,15 @@ async def show_version(): |
462 | 479 | return JSONResponse(content=ver) |
463 | 480 |
|
464 | 481 |
|
| 482 | +@router.get("/server_info") |
| 483 | +async def show_server_info(): |
| 484 | + if _global_state is None: |
| 485 | + server_info = {"vllm_config": "Vllm Config not available"} |
| 486 | + else: |
| 487 | + server_info = {"vllm_config": str(_global_state.vllmconfig)} |
| 488 | + return JSONResponse(content=server_info) |
| 489 | + |
| 490 | + |
465 | 491 | @router.post("/v1/chat/completions", |
466 | 492 | dependencies=[Depends(validate_json_request)]) |
467 | 493 | @with_cancellation |
|
0 commit comments