22
33import asyncio
44import atexit
5- import dataclasses
65import gc
76import importlib
87import inspect
105104_running_tasks : set [asyncio .Task ] = set ()
106105
107106
108- # Store global states
109- @dataclasses .dataclass
110- class _GlobalState :
111- vllmconfig : VllmConfig
112-
113-
114- _global_state : Optional [_GlobalState ] = None
115-
116-
117- def set_global_state (global_state : _GlobalState ):
118- global _global_state
119- _global_state = global_state
120-
121-
122107@asynccontextmanager
123108async def lifespan (app : FastAPI ):
124109 try :
@@ -180,7 +165,6 @@ async def build_async_engine_client_from_engine_args(
180165 usage_context = UsageContext .OPENAI_API_SERVER
181166 vllm_config = engine_args .create_engine_config (usage_context = usage_context )
182167
183- set_global_state (_GlobalState (vllmconfig = vllm_config ))
184168 # V1 AsyncLLM.
185169 if envs .VLLM_USE_V1 :
186170 if disable_frontend_multiprocessing :
@@ -744,13 +728,10 @@ async def is_sleeping(raw_request: Request):
744728 logger .info ("check whether the engine is sleeping" )
745729 is_sleeping = await engine_client (raw_request ).is_sleeping ()
746730 return JSONResponse (content = {"is_sleeping" : is_sleeping })
747-
731+
748732 @router .get ("/server_info" )
749- async def show_server_info ():
750- if _global_state is None :
751- server_info = {"vllm_config" : "Vllm Config not available" }
752- else :
753- server_info = {"vllm_config" : str (_global_state .vllmconfig )}
733+ async def show_server_info (raw_request : Request ):
734+ server_info = {"vllm_config" : str (raw_request .app .state .vllm_config )}
754735 return JSONResponse (content = server_info )
755736
756737
@@ -919,7 +900,7 @@ async def log_response(request: Request, call_next):
919900
920901async def init_app_state (
921902 engine_client : EngineClient ,
922- model_config : ModelConfig ,
903+ vllm_config : VllmConfig ,
923904 state : State ,
924905 args : Namespace ,
925906) -> None :
@@ -940,6 +921,8 @@ async def init_app_state(
940921
941922 state .engine_client = engine_client
942923 state .log_stats = not args .disable_log_stats
924+ state .vllm_config = vllm_config
925+ model_config = vllm_config .model_config
943926
944927 resolved_chat_template = load_chat_template (args .chat_template )
945928 if resolved_chat_template is not None :
@@ -1094,8 +1077,8 @@ def signal_handler(*_) -> None:
10941077 async with build_async_engine_client (args ) as engine_client :
10951078 app = build_app (args )
10961079
1097- model_config = await engine_client .get_model_config ()
1098- await init_app_state (engine_client , model_config , app .state , args )
1080+ vllm_config = await engine_client .get_vllm_config ()
1081+ await init_app_state (engine_client , vllm_config , app .state , args )
10991082
11001083 def _listen_addr (a : str ) -> str :
11011084 if is_valid_ipv6_address (a ):
0 commit comments