File tree Expand file tree Collapse file tree 3 files changed +4
-5
lines changed Expand file tree Collapse file tree 3 files changed +4
-5
lines changed Original file line number Diff line number Diff line change @@ -30,7 +30,7 @@ def __init__(
3030 caching_hash_algo : str = "builtin" ,
3131 num_preallocate_tokens : int = 64 ,
3232 log_stats : bool = False ,
33- connector : "KVConnectorBase_V1" = None ,
33+ connector : Optional [ "KVConnectorBase_V1" ] = None ,
3434 ) -> None :
3535 assert len (kv_cache_config .kv_cache_groups ) == 1 , (
3636 "KVCacheManager does not support hybrid models with more than 1 "
Original file line number Diff line number Diff line change @@ -124,5 +124,5 @@ class SchedulerOutput:
124124 # the bitmask for the whole batch
125125 grammar_bitmask : Optional [npt .NDArray [np .int32 ]]
126126
127- # the connector metadata
127+ # KV Cache Connector metadata.
128128 kv_connector_metadata : Optional [KVConnectorMetadata ] = None
Original file line number Diff line number Diff line change @@ -66,11 +66,10 @@ def __init__(
6666 # Create KVConnector for the Scheduler. Note that each Worker
6767 # will have a corresponding KVConnector with Role=WORKER.
6868 # KV Connector pushes/pull of remote KVs for P/D and offloading.
69+ self .connector = None
6970 if self .vllm_config .kv_transfer_config is not None :
70- self .connector = KVConnectorFactory .create_connector (
71+ self .connector = KVConnectorFactory .create_connector_v1 (
7172 config = self .vllm_config , role = KVConnectorRole .SCHEDULER )
72- else :
73- self .connector = None
7473
7574 num_gpu_blocks = self .cache_config .num_gpu_blocks
7675 assert isinstance (num_gpu_blocks , int ) and num_gpu_blocks > 0
You can’t perform that action at this time.
0 commit comments