@@ -81,10 +81,7 @@ def run_busy_loop(self):
8181 self .execute_dummy_batch ()
8282
8383
84- def run_engine_core_dplb (* args ,
85- dp_rank : int = 0 ,
86- local_dp_rank : int = 0 ,
87- ** kwargs ):
84+ def run_engine_core (* args , dp_rank : int = 0 , local_dp_rank : int = 0 , ** kwargs ):
8885 """Launch EngineCore busy loop in background process."""
8986
9087 # Signal handler used for graceful termination.
@@ -188,62 +185,7 @@ def _update_from_kv_xfer_finished(self,
188185 req_id )
189186
190187
191- def run_engine_core (* args , dp_rank : int = 0 , local_dp_rank : int = 0 , ** kwargs ):
192- """Launch EngineCore busy loop in background process."""
193-
194- # Signal handler used for graceful termination.
195- # SystemExit exception is only raised once to allow this and worker
196- # processes to terminate without error
197- shutdown_requested = False
198-
199- # Ensure we can serialize transformer config after spawning
200- maybe_register_config_serialize_by_value ()
201-
202- def signal_handler (signum , frame ):
203- nonlocal shutdown_requested
204- if not shutdown_requested :
205- shutdown_requested = True
206- raise SystemExit ()
207-
208- # Either SIGTERM or SIGINT will terminate the engine_core
209- signal .signal (signal .SIGTERM , signal_handler )
210- signal .signal (signal .SIGINT , signal_handler )
211-
212- engine_core : Optional [EngineCoreProc ] = None
213- try :
214- parallel_config : ParallelConfig = kwargs ["vllm_config" ].parallel_config
215- if parallel_config .data_parallel_size > 1 or dp_rank > 0 :
216- # Set data parallel rank for this engine process.
217- parallel_config .data_parallel_rank = dp_rank
218- parallel_config .data_parallel_rank_local = local_dp_rank
219- engine_core = DPEngineCoreProc (* args , ** kwargs )
220- else :
221- engine_core = EngineCoreProc (* args , ** kwargs )
222-
223- engine_core .scheduler .finish_requests = types .MethodType (
224- finish_requests , engine_core .scheduler )
225- engine_core .scheduler ._update_from_kv_xfer_finished = types .MethodType (
226- _update_from_kv_xfer_finished , engine_core .scheduler )
227- engine_core .run_busy_loop ()
228-
229- except SystemExit :
230- logger .debug ("EngineCore exiting." )
231- raise
232- except Exception as e :
233- if engine_core is None :
234- logger .exception ("EngineCore failed to start." )
235- else :
236- logger .exception ("EngineCore encountered a fatal error." )
237- engine_core ._send_engine_dead ()
238- raise e
239- finally :
240- if engine_core is not None :
241- engine_core .shutdown ()
242-
243-
244188# Apply this patch only if the external data parallelism is enabled
245189if vllm_ascend_envs .VLLM_ASCEND_EXTERNAL_DP_LB_ENABLED :
246190 # Patch the EngineCoreClient to use the custom make_async_mp_client
247- EngineCoreProc .run_engine_core = run_engine_core_dplb # type: ignore[attr-defined]
248- else :
249191 EngineCoreProc .run_engine_core = run_engine_core # type: ignore[attr-defined]
0 commit comments