@@ -84,8 +84,12 @@ def signal_handler():
8484
8585 if config .is_prefill_worker :
8686 await init_prefill (runtime , config )
87+ logger .debug ("init_prefill completed" )
8788 else :
8889 await init (runtime , config )
90+ logger .debug ("init completed" )
91+
92+ logger .debug ("Worker function completed, exiting..." )
8993
9094
9195def setup_vllm_engine (config , stat_logger = None ):
@@ -147,6 +151,7 @@ async def init_prefill(runtime: DistributedRuntime, config: Config):
147151 )
148152
149153 try :
154+ logger .debug ("Starting serve_endpoint for prefill worker" )
150155 await asyncio .gather (
151156 # for prefill, we want to shutdown the engine after all prefill requests are finished because
152157 # (temp reason): we don't support re-routing prefill requests
@@ -161,10 +166,12 @@ async def init_prefill(runtime: DistributedRuntime, config: Config):
161166 handler .clear_kv_blocks , metrics_labels = [("model" , config .model )]
162167 ),
163168 )
169+ logger .debug ("serve_endpoint completed for prefill worker" )
164170 except Exception as e :
165171 logger .error (f"Failed to serve endpoints: { e } " )
166172 raise
167173 finally :
174+ logger .debug ("Cleaning up prefill worker" )
168175 handler .cleanup ()
169176
170177
@@ -254,22 +261,25 @@ async def init(runtime: DistributedRuntime, config: Config):
254261 )
255262
256263 try :
264+ logger .debug ("Starting serve_endpoint for decode worker" )
257265 await asyncio .gather (
258266 # for decode, we want to transfer the in-flight requests to other decode engines,
259267 # because waiting them to finish can take a long time for long OSLs
260268 generate_endpoint .serve_endpoint (
261269 handler .generate ,
262- graceful_shutdown = False ,
270+ graceful_shutdown = config . migration_limit <= 0 ,
263271 metrics_labels = [("model" , config .model )],
264272 ),
265273 clear_endpoint .serve_endpoint (
266274 handler .clear_kv_blocks , metrics_labels = [("model" , config .model )]
267275 ),
268276 )
277+ logger .debug ("serve_endpoint completed for decode worker" )
269278 except Exception as e :
270279 logger .error (f"Failed to serve endpoints: { e } " )
271280 raise
272281 finally :
282+ logger .debug ("Cleaning up decode worker" )
273283 # Cleanup background tasks
274284 handler .cleanup ()
275285
0 commit comments