File tree Expand file tree Collapse file tree 4 files changed +14
-11
lines changed Expand file tree Collapse file tree 4 files changed +14
-11
lines changed Original file line number Diff line number Diff line change 88from  fastapi .responses  import  JSONResponse , Response 
99
1010import  vllm .entrypoints .api_server 
11+ import  vllm .envs  as  envs 
1112from  vllm .engine .arg_utils  import  AsyncEngineArgs 
1213from  vllm .engine .async_llm_engine  import  AsyncLLMEngine 
1314from  vllm .utils  import  FlexibleArgumentParser 
@@ -46,9 +47,8 @@ def stats() -> Response:
4647    engine_args  =  AsyncEngineArgs .from_cli_args (args )
4748    engine  =  AsyncLLMEngineWithStats .from_engine_args (engine_args )
4849    vllm .entrypoints .api_server .engine  =  engine 
49-     uvicorn .run (
50-         app ,
51-         host = args .host ,
52-         port = args .port ,
53-         log_level = "debug" ,
54-         timeout_keep_alive = vllm .entrypoints .api_server .TIMEOUT_KEEP_ALIVE )
50+     uvicorn .run (app ,
51+                 host = args .host ,
52+                 port = args .port ,
53+                 log_level = "debug" ,
54+                 timeout_keep_alive = envs .VLLM_HTTP_TIMEOUT_KEEP_ALIVE )
Original file line number Diff line number Diff line change 1717from  fastapi  import  FastAPI , Request 
1818from  fastapi .responses  import  JSONResponse , Response , StreamingResponse 
1919
20+ import  vllm .envs  as  envs 
2021from  vllm .engine .arg_utils  import  AsyncEngineArgs 
2122from  vllm .engine .async_llm_engine  import  AsyncLLMEngine 
2223from  vllm .entrypoints .launcher  import  serve_http 
2930
3031logger  =  init_logger ("vllm.entrypoints.api_server" )
3132
32- TIMEOUT_KEEP_ALIVE  =  5   # seconds. 
3333app  =  FastAPI ()
3434engine  =  None 
3535
@@ -134,7 +134,7 @@ async def run_server(args: Namespace,
134134        host = args .host ,
135135        port = args .port ,
136136        log_level = args .log_level ,
137-         timeout_keep_alive = TIMEOUT_KEEP_ALIVE ,
137+         timeout_keep_alive = envs . VLLM_HTTP_TIMEOUT_KEEP_ALIVE ,
138138        ssl_keyfile = args .ssl_keyfile ,
139139        ssl_certfile = args .ssl_certfile ,
140140        ssl_ca_certs = args .ssl_ca_certs ,
Original file line number Diff line number Diff line change 103103from  vllm .v1 .metrics .prometheus  import  get_prometheus_registry 
104104from  vllm .version  import  __version__  as  VLLM_VERSION 
105105
106- TIMEOUT_KEEP_ALIVE  =  5   # seconds 
107- 
108106prometheus_multiproc_dir : tempfile .TemporaryDirectory 
109107
110108# Cannot use __name__ (https://github.com/vllm-project/vllm/pull/4765) 
@@ -1360,7 +1358,7 @@ async def run_server_worker(listen_address,
13601358            # NOTE: When the 'disable_uvicorn_access_log' value is True, 
13611359            # no access log will be output. 
13621360            access_log = not  args .disable_uvicorn_access_log ,
1363-             timeout_keep_alive = TIMEOUT_KEEP_ALIVE ,
1361+             timeout_keep_alive = envs . VLLM_HTTP_TIMEOUT_KEEP_ALIVE ,
13641362            ssl_keyfile = args .ssl_keyfile ,
13651363            ssl_certfile = args .ssl_certfile ,
13661364            ssl_ca_certs = args .ssl_ca_certs ,
Original file line number Diff line number Diff line change 7171    VERBOSE : bool  =  False 
7272    VLLM_ALLOW_LONG_MAX_MODEL_LEN : bool  =  False 
7373    VLLM_RPC_TIMEOUT : int  =  10000   # ms 
74+     VLLM_HTTP_TIMEOUT_KEEP_ALIVE : int  =  5   # seconds 
7475    VLLM_PLUGINS : Optional [list [str ]] =  None 
7576    VLLM_LORA_RESOLVER_CACHE_DIR : Optional [str ] =  None 
7677    VLLM_TORCH_PROFILER_DIR : Optional [str ] =  None 
@@ -557,6 +558,10 @@ def get_vllm_port() -> Optional[int]:
557558    "VLLM_RPC_TIMEOUT" :
558559    lambda : int (os .getenv ("VLLM_RPC_TIMEOUT" , "10000" )),
559560
561+     # Timeout in seconds for keeping HTTP connections alive in API server 
562+     "VLLM_HTTP_TIMEOUT_KEEP_ALIVE" :
563+     lambda : int (os .environ .get ("VLLM_HTTP_TIMEOUT_KEEP_ALIVE" , "5" )),
564+ 
560565    # a list of plugin names to load, separated by commas. 
561566    # if this is not set, it means all plugins will be loaded 
562567    # if this is set to an empty string, no plugins will be loaded 
 
 
   
 
     
   
   
          
    
    
     
    
      
     
     
    You can’t perform that action at this time.
  
 
    
  
    
      
        
     
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments