Skip to content

Commit f092db6

Browse files
njhillrtourgeman
authored andcommitted
[BugFix] Fix handling of resumed reqs in SharedStorageConnector (vllm-project#27719)
Signed-off-by: Nick Hill <nhill@redhat.com>
1 parent d9d94d2 commit f092db6

File tree

1 file changed

+24
-26
lines changed

1 file changed

+24
-26
lines changed

vllm/distributed/kv_transfer/kv_connector/v1/shared_storage_connector.py

Lines changed: 24 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -336,36 +336,34 @@ def build_connector_meta(
336336

337337
cached_reqs = scheduler_output.scheduled_cached_reqs
338338
for i, req_id in enumerate(cached_reqs.req_ids):
339+
resumed_from_preemption = cached_reqs.resumed_from_preemption[i]
340+
if not resumed_from_preemption or req_id not in self._requests_need_load:
341+
continue
342+
339343
num_computed_tokens = cached_reqs.num_computed_tokens[i]
340344
num_new_tokens = scheduler_output.num_scheduled_tokens[req_id]
341345
new_block_ids = cached_reqs.new_block_ids[i]
342-
resumed_from_preemption = cached_reqs.resumed_from_preemption[i]
343-
344-
# NOTE(rob): here we rely on the resumed requests being
345-
# the first N requests in the list scheduled_cache_reqs.
346-
if not resumed_from_preemption:
347-
break
348-
if req_id in self._requests_need_load:
349-
# NOTE(rob): cached_req_data does not have the full
350-
# list of token ids (only new tokens). So we look it
351-
# up in the actual request object.
352-
request = self._requests_need_load[req_id]
353-
total_tokens = num_computed_tokens + num_new_tokens
354-
token_ids = request.all_token_ids[:total_tokens]
355-
356-
# NOTE(rob): For resumed req, new_block_ids is all
357-
# of the block_ids for the request.
358-
assert new_block_ids is not None
359-
block_ids = new_block_ids[0]
360346

361-
meta.add_request(
362-
token_ids=token_ids,
363-
block_ids=block_ids,
364-
block_size=self._block_size,
365-
is_store=False,
366-
mm_hashes=[f.identifier for f in request.mm_features],
367-
)
368-
total_need_load += 1
347+
# NOTE(rob): cached_req_data does not have the full
348+
# list of token ids (only new tokens). So we look it
349+
# up in the actual request object.
350+
request = self._requests_need_load[req_id]
351+
total_tokens = num_computed_tokens + num_new_tokens
352+
token_ids = request.all_token_ids[:total_tokens]
353+
354+
# NOTE(rob): For resumed req, new_block_ids is all
355+
# of the block_ids for the request.
356+
assert new_block_ids is not None
357+
block_ids = new_block_ids[0]
358+
359+
meta.add_request(
360+
token_ids=token_ids,
361+
block_ids=block_ids,
362+
block_size=self._block_size,
363+
is_store=False,
364+
mm_hashes=[f.identifier for f in request.mm_features],
365+
)
366+
total_need_load += 1
369367

370368
assert total_need_load == len(self._requests_need_load)
371369
self._requests_need_load.clear()

0 commit comments

Comments
 (0)