@@ -76,6 +76,9 @@ def __init__(
7676        # KV Connector pushes/pull of remote KVs for P/D and offloading. 
7777        self .connector  =  None 
7878        if  self .vllm_config .kv_transfer_config  is  not None :
79+             assert  len (self .kv_cache_config .kv_cache_groups ) ==  1 , (
80+                 "Multiple KV cache groups are not currently supported " 
81+                 "with KV connectors" )
7982            self .connector  =  KVConnectorFactory .create_connector_v1 (
8083                config = self .vllm_config , role = KVConnectorRole .SCHEDULER )
8184
@@ -985,9 +988,8 @@ def _connector_finished(
985988        """ 
986989        if  self .connector  is  None :
987990            return  False , None 
988-         assert  len (self .kv_cache_config .kv_cache_groups 
989-                    ) ==  1 , "KV connector only supports one KV cache group now" 
990-         block_ids  =  self .kv_cache_manager .get_block_ids (request .request_id )[0 ]
991+ 
992+         (block_ids , ) =  self .kv_cache_manager .get_block_ids (request .request_id )
991993        return  self .connector .request_finished (request , block_ids )
992994
993995    def  _update_waiting_for_remote_kv (self , request : Request ) ->  bool :
@@ -1002,12 +1004,12 @@ def _update_waiting_for_remote_kv(self, request: Request) -> bool:
10021004        and the request state will be moved back to WAITING from 
10031005        WAITING_FOR_REMOTE_KV. 
10041006        """ 
1007+         assert  self .connector  is  not None 
10051008        if  request .request_id  not  in self .finished_recving_kv_req_ids :
10061009            return  False 
1007-         assert  len (self .kv_cache_config .kv_cache_groups 
1008-                    ) ==  1 , "KV connector only supports one KV cache group now" 
1010+ 
10091011        # Now that the blocks are ready, actually cache them. 
1010-         block_ids   =  self .kv_cache_manager .get_block_ids (request .request_id )[ 0 ] 
1012+         ( block_ids , )  =  self .kv_cache_manager .get_block_ids (request .request_id )
10111013        num_computed_tokens  =  len (block_ids ) *  self .block_size 
10121014        # Handle the case where num request tokens less then one block. 
10131015        num_computed_tokens  =  min (num_computed_tokens , request .num_tokens )
0 commit comments