Skip to content

Commit e00cb76

Browse files
njhillNickLucche
authored andcommitted
have KVCacheManager return empty blocks for nonexistent requests
Signed-off-by: Nick Hill <nhill@redhat.com>
1 parent 12a2c06 commit e00cb76

File tree

3 files changed

+10
-13
lines changed

3 files changed

+10
-13
lines changed

vllm/v1/core/kv_cache_coordinator.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ def get_blocks(self, request_id: str) -> list[list[KVCacheBlock]]:
166166
return [
167167
manager.req_to_blocks[request_id]
168168
for manager in self.single_type_managers
169+
if request_id in manager.req_to_blocks
169170
]
170171

171172
@abstractmethod

vllm/v1/core/kv_cache_manager.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,3 +388,4 @@ def cache_blocks(self, request: Request, block_hashes: list[BlockHash],
388388
def create_empty_block_list(self) -> KVCacheBlocks:
389389
"""Creates a new KVCacheBlocks instance with no blocks."""
390390
return KVCacheBlocks([[] for _ in range(self.num_kv_cache_groups)])
391+

vllm/v1/core/sched/scheduler.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ def __init__(
7676
# KV Connector pushes/pull of remote KVs for P/D and offloading.
7777
self.connector = None
7878
if self.vllm_config.kv_transfer_config is not None:
79+
assert len(self.kv_cache_config.kv_cache_groups) == 1, (
80+
"Multiple KV cache groups are not currently supported "
81+
"with KV connectors")
7982
self.connector = KVConnectorFactory.create_connector_v1(
8083
config=self.vllm_config, role=KVConnectorRole.SCHEDULER)
8184

@@ -985,16 +988,8 @@ def _connector_finished(
985988
"""
986989
if self.connector is None:
987990
return False, None
988-
assert len(self.kv_cache_config.kv_cache_groups
989-
) == 1, "KV connector only supports one KV cache group now"
990-
if (request.status == RequestStatus.FINISHED_ABORTED and \
991-
request.request_id not in
992-
self.kv_cache_manager.single_type_manager.req_to_blocks):
993-
# Ensure empty blocks ids are passed to respect connector interface
994-
block_ids = KVCacheBlocks.create_empty().get_block_ids()[0]
995-
else:
996-
block_ids = self.kv_cache_manager.get_block_ids(
997-
request.request_id)[0]
991+
992+
(block_ids, ) = self.kv_cache_manager.get_block_ids(request.request_id)
998993
return self.connector.request_finished(request, block_ids)
999994

1000995
def _update_waiting_for_remote_kv(self, request: Request) -> bool:
@@ -1009,12 +1004,12 @@ def _update_waiting_for_remote_kv(self, request: Request) -> bool:
10091004
and the request state will be moved back to WAITING from
10101005
WAITING_FOR_REMOTE_KV.
10111006
"""
1007+
assert self.connector is not None
10121008
if request.request_id not in self.finished_recving_kv_req_ids:
10131009
return False
1014-
assert len(self.kv_cache_config.kv_cache_groups
1015-
) == 1, "KV connector only supports one KV cache group now"
1010+
10161011
# Now that the blocks are ready, actually cache them.
1017-
block_ids = self.kv_cache_manager.get_block_ids(request.request_id)[0]
1012+
(block_ids, ) = self.kv_cache_manager.get_block_ids(request.request_id)
10181013
num_computed_tokens = len(block_ids) * self.block_size
10191014
# Handle the case where num request tokens less then one block.
10201015
num_computed_tokens = min(num_computed_tokens, request.num_tokens)

0 commit comments

Comments
 (0)