@@ -336,36 +336,34 @@ def build_connector_meta(
336336
337337 cached_reqs = scheduler_output .scheduled_cached_reqs
338338 for i , req_id in enumerate (cached_reqs .req_ids ):
339+ resumed_from_preemption = cached_reqs .resumed_from_preemption [i ]
340+ if not resumed_from_preemption or req_id not in self ._requests_need_load :
341+ continue
342+
339343 num_computed_tokens = cached_reqs .num_computed_tokens [i ]
340344 num_new_tokens = scheduler_output .num_scheduled_tokens [req_id ]
341345 new_block_ids = cached_reqs .new_block_ids [i ]
342- resumed_from_preemption = cached_reqs .resumed_from_preemption [i ]
343-
344- # NOTE(rob): here we rely on the resumed requests being
345- # the first N requests in the list scheduled_cache_reqs.
346- if not resumed_from_preemption :
347- break
348- if req_id in self ._requests_need_load :
349- # NOTE(rob): cached_req_data does not have the full
350- # list of token ids (only new tokens). So we look it
351- # up in the actual request object.
352- request = self ._requests_need_load [req_id ]
353- total_tokens = num_computed_tokens + num_new_tokens
354- token_ids = request .all_token_ids [:total_tokens ]
355-
356- # NOTE(rob): For resumed req, new_block_ids is all
357- # of the block_ids for the request.
358- assert new_block_ids is not None
359- block_ids = new_block_ids [0 ]
360346
361- meta .add_request (
362- token_ids = token_ids ,
363- block_ids = block_ids ,
364- block_size = self ._block_size ,
365- is_store = False ,
366- mm_hashes = [f .identifier for f in request .mm_features ],
367- )
368- total_need_load += 1
347+ # NOTE(rob): cached_req_data does not have the full
348+ # list of token ids (only new tokens). So we look it
349+ # up in the actual request object.
350+ request = self ._requests_need_load [req_id ]
351+ total_tokens = num_computed_tokens + num_new_tokens
352+ token_ids = request .all_token_ids [:total_tokens ]
353+
354+ # NOTE(rob): For resumed req, new_block_ids is all
355+ # of the block_ids for the request.
356+ assert new_block_ids is not None
357+ block_ids = new_block_ids [0 ]
358+
359+ meta .add_request (
360+ token_ids = token_ids ,
361+ block_ids = block_ids ,
362+ block_size = self ._block_size ,
363+ is_store = False ,
364+ mm_hashes = [f .identifier for f in request .mm_features ],
365+ )
366+ total_need_load += 1
369367
370368 assert total_need_load == len (self ._requests_need_load )
371369 self ._requests_need_load .clear ()
0 commit comments