11import atexit
22import os
3- from typing import List , Union
3+ from typing import List , Optional
44
55import msgspec
66import zmq
1010from vllm .utils import get_open_zmq_ipc_path , kill_process_tree
1111from vllm .v1 .engine import (EngineCoreOutput , EngineCoreOutputs ,
1212 EngineCoreProfile , EngineCoreRequest ,
13- EngineCoreRequestType )
14- from vllm .v1 .engine .core import EngineCore , EngineCoreProc
13+ EngineCoreRequestType , EngineCoreRequestUnion )
14+ from vllm .v1 .engine .core import (EngineCore , EngineCoreProc ,
15+ EngineCoreProcHandle )
1516from vllm .v1 .serial_utils import PickleEncoder
1617
1718logger = init_logger (__name__ )
@@ -59,7 +60,7 @@ def get_output(self) -> List[EngineCoreOutput]:
5960 def add_request (self , request : EngineCoreRequest ) -> None :
6061 raise NotImplementedError
6162
62- async def profile (self , is_start = True ) -> None :
63+ def profile (self , is_start : bool = True ) -> None :
6364 raise NotImplementedError
6465
6566 def abort_requests (self , request_ids : List [str ]) -> None :
@@ -71,6 +72,9 @@ async def get_output_async(self) -> List[EngineCoreOutput]:
7172 async def add_request_async (self , request : EngineCoreRequest ) -> None :
7273 raise NotImplementedError
7374
75+ async def profile_async (self , is_start : bool = True ) -> None :
76+ raise NotImplementedError
77+
7478 async def abort_requests_async (self , request_ids : List [str ]) -> None :
7579 raise NotImplementedError
7680
@@ -105,7 +109,7 @@ def shutdown(self):
105109 def __del__ (self ):
106110 self .shutdown ()
107111
108- def profile (self , is_start = True ) -> None :
112+ def profile (self , is_start : bool = True ) -> None :
109113 self .engine_core .profile (is_start )
110114
111115
@@ -133,7 +137,10 @@ def __init__(
133137 self .decoder = msgspec .msgpack .Decoder (EngineCoreOutputs )
134138
135139 # ZMQ setup.
136- self .ctx = (zmq .asyncio .Context () if asyncio_mode else zmq .Context ())
140+ if asyncio_mode :
141+ self .ctx = zmq .asyncio .Context ()
142+ else :
143+ self .ctx = zmq .Context () # type: ignore[attr-defined]
137144
138145 # Path for IPC.
139146 ready_path = get_open_zmq_ipc_path ()
@@ -149,11 +156,13 @@ def __init__(
149156 self .input_socket .bind (input_path )
150157
151158 # Start EngineCore in background process.
159+ self .proc_handle : Optional [EngineCoreProcHandle ]
152160 self .proc_handle = EngineCoreProc .make_engine_core_process (
153161 * args ,
154- input_path = input_path ,
155- output_path = output_path ,
156- ready_path = ready_path ,
162+ input_path =
163+ input_path , # type: ignore[misc] # MyPy incorrectly flags duplicate keywords
164+ output_path = output_path , # type: ignore[misc]
165+ ready_path = ready_path , # type: ignore[misc]
157166 ** kwargs ,
158167 )
159168 atexit .register (self .shutdown )
@@ -204,10 +213,8 @@ def get_output(self) -> List[EngineCoreOutput]:
204213 engine_core_outputs = self .decoder .decode (frame .buffer ).outputs
205214 return engine_core_outputs
206215
207- def _send_input (
208- self , request_type : EngineCoreRequestType ,
209- request : Union [EngineCoreRequest , EngineCoreProfile ,
210- List [str ]]) -> None :
216+ def _send_input (self , request_type : EngineCoreRequestType ,
217+ request : EngineCoreRequestUnion ) -> None :
211218
212219 # (RequestType, SerializedRequest)
213220 msg = (request_type .value , self .encoder .encode (request ))
@@ -219,7 +226,7 @@ def add_request(self, request: EngineCoreRequest) -> None:
219226 def abort_requests (self , request_ids : List [str ]) -> None :
220227 self ._send_input (EngineCoreRequestType .ABORT , request_ids )
221228
222- def profile (self , is_start = True ) -> None :
229+ def profile (self , is_start : bool = True ) -> None :
223230 self ._send_input (EngineCoreRequestType .PROFILE ,
224231 EngineCoreProfile (is_start ))
225232
@@ -237,10 +244,8 @@ async def get_output_async(self) -> List[EngineCoreOutput]:
237244
238245 return engine_core_outputs
239246
240- async def _send_input (
241- self , request_type : EngineCoreRequestType ,
242- request : Union [EngineCoreRequest , EngineCoreProfile ,
243- List [str ]]) -> None :
247+ async def _send_input (self , request_type : EngineCoreRequestType ,
248+ request : EngineCoreRequestUnion ) -> None :
244249
245250 msg = (request_type .value , self .encoder .encode (request ))
246251 await self .input_socket .send_multipart (msg , copy = False )
@@ -252,6 +257,6 @@ async def abort_requests_async(self, request_ids: List[str]) -> None:
252257 if len (request_ids ) > 0 :
253258 await self ._send_input (EngineCoreRequestType .ABORT , request_ids )
254259
255- async def profile (self , is_start = True ) -> None :
260+ async def profile_async (self , is_start : bool = True ) -> None :
256261 await self ._send_input (EngineCoreRequestType .PROFILE ,
257262 EngineCoreProfile (is_start ))
0 commit comments