Skip to content

Commit 41973c3

Browse files
committed
Initialize vLLM config in init_app_state, if envs.VLLM_SERVER_DEV_MODE is 1, then add "/server_info" endpoint in api_server.
Signed-off-by: Xihui Cang <xihuicang@gmail.com>
1 parent 1a72372 commit 41973c3

File tree

5 files changed

+24
-26
lines changed

5 files changed

+24
-26
lines changed

vllm/engine/async_llm_engine.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1171,6 +1171,10 @@ async def get_model_config(self) -> ModelConfig:
11711171
"""Get the model configuration of the vLLM engine."""
11721172
return self.engine.get_model_config()
11731173

1174+
async def get_vllm_config(self) -> VllmConfig:
1175+
"""Get the vllm configuration of the vLLM engine."""
1176+
return self.engine.get_vllm_config()
1177+
11741178
async def get_parallel_config(self) -> ParallelConfig:
11751179
"""Get the parallel configuration of the vLLM engine."""
11761180
return self.engine.get_parallel_config()

vllm/engine/llm_engine.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -918,6 +918,10 @@ def get_model_config(self) -> ModelConfig:
918918
"""Gets the model configuration."""
919919
return self.model_config
920920

921+
def get_vllm_config(self) -> VllmConfig:
922+
"""Gets the vllm configuration."""
923+
return self.vllm_config
924+
921925
def get_parallel_config(self) -> ParallelConfig:
922926
"""Gets the parallel configuration."""
923927
return self.parallel_config

vllm/engine/multiprocessing/client.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ def __init__(self, ipc_path: str, engine_config: VllmConfig,
9393
self._errored_with: Optional[BaseException] = None
9494

9595
# Get the configs.
96+
self.vllm_config = engine_config
9697
self.model_config = engine_config.model_config
9798
self.decoding_config = engine_config.decoding_config
9899

@@ -383,6 +384,9 @@ async def get_decoding_config(self) -> DecodingConfig:
383384
async def get_model_config(self) -> ModelConfig:
384385
return self.model_config
385386

387+
async def get_vllm_config(self) -> VllmConfig:
388+
return self.vllm_config
389+
386390
async def is_tracing_enabled(self) -> bool:
387391
return self.tracing_flag
388392

vllm/entrypoints/openai/api_server.py

Lines changed: 8 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import asyncio
44
import atexit
5-
import dataclasses
65
import gc
76
import importlib
87
import inspect
@@ -105,20 +104,6 @@
105104
_running_tasks: set[asyncio.Task] = set()
106105

107106

108-
# Store global states
109-
@dataclasses.dataclass
110-
class _GlobalState:
111-
vllmconfig: VllmConfig
112-
113-
114-
_global_state: Optional[_GlobalState] = None
115-
116-
117-
def set_global_state(global_state: _GlobalState):
118-
global _global_state
119-
_global_state = global_state
120-
121-
122107
@asynccontextmanager
123108
async def lifespan(app: FastAPI):
124109
try:
@@ -180,7 +165,6 @@ async def build_async_engine_client_from_engine_args(
180165
usage_context = UsageContext.OPENAI_API_SERVER
181166
vllm_config = engine_args.create_engine_config(usage_context=usage_context)
182167

183-
set_global_state(_GlobalState(vllmconfig=vllm_config))
184168
# V1 AsyncLLM.
185169
if envs.VLLM_USE_V1:
186170
if disable_frontend_multiprocessing:
@@ -744,13 +728,10 @@ async def is_sleeping(raw_request: Request):
744728
logger.info("check whether the engine is sleeping")
745729
is_sleeping = await engine_client(raw_request).is_sleeping()
746730
return JSONResponse(content={"is_sleeping": is_sleeping})
747-
731+
748732
@router.get("/server_info")
749-
async def show_server_info():
750-
if _global_state is None:
751-
server_info = {"vllm_config": "Vllm Config not available"}
752-
else:
753-
server_info = {"vllm_config": str(_global_state.vllmconfig)}
733+
async def show_server_info(raw_request: Request):
734+
server_info = {"vllm_config": str(raw_request.app.state.vllm_config)}
754735
return JSONResponse(content=server_info)
755736

756737

@@ -919,7 +900,7 @@ async def log_response(request: Request, call_next):
919900

920901
async def init_app_state(
921902
engine_client: EngineClient,
922-
model_config: ModelConfig,
903+
vllm_config: VllmConfig,
923904
state: State,
924905
args: Namespace,
925906
) -> None:
@@ -940,6 +921,8 @@ async def init_app_state(
940921

941922
state.engine_client = engine_client
942923
state.log_stats = not args.disable_log_stats
924+
state.vllm_config = vllm_config
925+
model_config = vllm_config.model_config
943926

944927
resolved_chat_template = load_chat_template(args.chat_template)
945928
if resolved_chat_template is not None:
@@ -1094,8 +1077,8 @@ def signal_handler(*_) -> None:
10941077
async with build_async_engine_client(args) as engine_client:
10951078
app = build_app(args)
10961079

1097-
model_config = await engine_client.get_model_config()
1098-
await init_app_state(engine_client, model_config, app.state, args)
1080+
vllm_config = await engine_client.get_vllm_config()
1081+
await init_app_state(engine_client, vllm_config, app.state, args)
10991082

11001083
def _listen_addr(a: str) -> str:
11011084
if is_valid_ipv6_address(a):

vllm/v1/engine/async_llm.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def __init__(
6464
assert start_engine_loop
6565

6666
self.model_config = vllm_config.model_config
67-
67+
self.vllm_config = vllm_config
6868
self.log_requests = log_requests
6969
self.log_stats = log_stats
7070

@@ -379,6 +379,9 @@ def encode(
379379
):
380380
raise ValueError("Not Supported on V1 yet.")
381381

382+
async def get_vllm_config(self) -> VllmConfig:
383+
return self.vllm_config
384+
382385
async def get_model_config(self) -> ModelConfig:
383386
return self.model_config
384387

0 commit comments

Comments
 (0)