@@ -899,6 +899,7 @@ def test_kv_connector_basic():
899899 scheduler = create_scheduler (
900900 enable_prefix_caching = True ,
901901 use_kv_connector = True ,
902+ disable_hybrid_kv_cache_manager = True ,
902903 )
903904 NUM_TOTAL_BLOCKS = scheduler .kv_cache_manager .block_pool .get_num_free_blocks ()
904905 BLOCK_SIZE = scheduler .cache_config .block_size
@@ -1024,6 +1025,7 @@ def test_external_prefix_cache_metrics():
10241025 scheduler = create_scheduler (
10251026 enable_prefix_caching = False ,
10261027 use_kv_connector = True ,
1028+ disable_hybrid_kv_cache_manager = True ,
10271029 )
10281030
10291031 # Mock connector to simulate a partial external cache hit
@@ -1088,6 +1090,7 @@ def test_kv_connector_unable_to_allocate():
10881090 use_kv_connector = True ,
10891091 block_size = BLOCK_SIZE ,
10901092 num_blocks = NUM_BLOCKS ,
1093+ disable_hybrid_kv_cache_manager = True ,
10911094 )
10921095 NUM_MATCHED_NEW_TOKENS = BLOCK_SIZE * 2
10931096 scheduler .connector .get_num_new_matched_tokens = Mock (name = "method" )
@@ -1171,6 +1174,7 @@ def test_kv_connector_handles_preemption():
11711174 use_kv_connector = True ,
11721175 block_size = BLOCK_SIZE ,
11731176 num_blocks = NUM_BLOCKS ,
1177+ disable_hybrid_kv_cache_manager = True ,
11741178 )
11751179
11761180 NUM_MATCHED_NEW_TOKENS = BLOCK_SIZE
@@ -1387,6 +1391,7 @@ def create_scheduler_with_priority(
13871391 block_size : int = 16 ,
13881392 max_model_len : int | None = None ,
13891393 num_speculative_tokens : int | None = None ,
1394+ disable_hybrid_kv_cache_manager : bool = False ,
13901395) -> Scheduler :
13911396 """Create scheduler with priority policy enabled.
13921397
@@ -1411,6 +1416,7 @@ def create_scheduler_with_priority(
14111416 disable_chunked_mm_input = disable_chunked_mm_input ,
14121417 enable_chunked_prefill = True ,
14131418 policy = "priority" , # Enable priority scheduling
1419+ disable_hybrid_kv_cache_manager = disable_hybrid_kv_cache_manager ,
14141420 )
14151421 model_config = ModelConfig (
14161422 model = model ,
@@ -2018,6 +2024,7 @@ def test_priority_scheduling_preemption_and_resumption_when_out_of_kv():
20182024 num_blocks = 5 , # Can hold 64 tokens (first block is null)
20192025 block_size = 16 , # Standard block size
20202026 use_kv_connector = True ,
2027+ disable_hybrid_kv_cache_manager = True ,
20212028 )
20222029
20232030 # Create a request and schedule it
0 commit comments