@@ -98,11 +98,7 @@ def create_scheduler(
9898    )
9999    kv_cache_config  =  KVCacheConfig (
100100        num_blocks = num_blocks ,  # A large number of blocks to hold all requests 
101-         ** ({
102-             "tensors" : {}
103-         } if  vllm_version_is ("0.9.0" ) else  {
104-             "kv_cache_tensors" : []
105-         }),
101+         kv_cache_tensors = [],
106102        kv_cache_groups = [
107103            KVCacheGroupSpec (['layer' ],
108104                             FullAttentionSpec (block_size , 1 , 1 , torch .float32 ,
@@ -145,8 +141,8 @@ def create_requests(num_requests: int,
145141            multi_modal_hashes = None ,
146142            eos_token_id = EOS_TOKEN_ID ,
147143            ** ({
148-                 "arrival_time " : 0.0 
149-             } if  vllm_version_is ("0.9.0 " ) else  {}),
144+                 "pooling_params " : None 
145+             } if  not   vllm_version_is ("0.9.1 " ) else  {}),
150146        )
151147        requests .append (request )
152148    return  requests 
@@ -262,7 +258,9 @@ def test_schedule_concurrent_partial_requests(enable_prefix_caching: bool):
262258        spec_token_ids = None ,
263259        logprobs = None ,
264260        prompt_logprobs_dict = {},
265-     )
261+         ** ({
262+             "pooler_output" : []
263+         } if  not  vllm_version_is ("0.9.1" ) else  {}))
266264    scheduler .update_from_output (output , model_runner_output )
267265
268266    # Schedule the next step. All three requests are running. 
@@ -286,7 +284,10 @@ def test_schedule_concurrent_partial_requests(enable_prefix_caching: bool):
286284        spec_token_ids = None ,
287285        logprobs = None ,
288286        prompt_logprobs_dict = {},
289-     )
287+         ** ({
288+             "pooler_output" : []
289+         } if  not  vllm_version_is ("0.9.1" ) else  {}))
290+ 
290291    scheduler .update_from_output (output1 , model_runner_output )
291292    output2  =  scheduler .schedule ()
292293    assert  len (scheduler .running ) ==  3 
@@ -337,7 +338,10 @@ def test_stop_via_update_from_output():
337338                            11 ]],  # First request hits EOS, second continues 
338339        spec_token_ids = None ,
339340        logprobs = None ,
340-         prompt_logprobs_dict = {})
341+         prompt_logprobs_dict = {},
342+         ** ({
343+             "pooler_output" : []
344+         } if  not  vllm_version_is ("0.9.1" ) else  {}))
341345
342346    scheduler .update_from_output (scheduler_output , model_output )
343347
@@ -385,7 +389,10 @@ def test_stop_via_update_from_output():
385389                           [13 , 14 ]],  # First request hits stop token 
386390        spec_token_ids = None ,
387391        logprobs = None ,
388-         prompt_logprobs_dict = {})
392+         prompt_logprobs_dict = {},
393+         ** ({
394+             "pooler_output" : []
395+         } if  not  vllm_version_is ("0.9.1" ) else  {}))
389396
390397    scheduler .update_from_output (scheduler_output , model_output )
391398
@@ -432,7 +439,10 @@ def test_stop_via_update_from_output():
432439                           [13 ]],  # First request exceeds max_tokens 
433440        spec_token_ids = None ,
434441        logprobs = None ,
435-         prompt_logprobs_dict = {})
442+         prompt_logprobs_dict = {},
443+         ** ({
444+             "pooler_output" : []
445+         } if  not  vllm_version_is ("0.9.1" ) else  {}))
436446
437447    scheduler .update_from_output (scheduler_output , model_output )
438448
@@ -474,7 +484,10 @@ def test_stop_via_update_from_output():
474484        sampled_token_ids = [[EOS_TOKEN_ID , 10 , 11 ]],
475485        spec_token_ids = None ,
476486        logprobs = None ,
477-         prompt_logprobs_dict = {})
487+         prompt_logprobs_dict = {},
488+         ** ({
489+             "pooler_output" : []
490+         } if  not  vllm_version_is ("0.9.1" ) else  {}))
478491
479492    scheduler .update_from_output (scheduler_output , model_output )
480493
@@ -524,7 +537,10 @@ def test_schedule_concurrent_batches(enable_prefix_caching: Optional[bool],
524537        spec_token_ids = None ,
525538        logprobs = None ,
526539        prompt_logprobs_dict = {},
527-     )
540+         ** ({
541+             "pooler_output" : []
542+         } if  not  vllm_version_is ("0.9.1" ) else  {}))
543+ 
528544    scheduler .update_from_output (scheduler_output0 , model_runner_output )
529545
530546    # Schedule the next step. 
@@ -541,7 +557,10 @@ def test_schedule_concurrent_batches(enable_prefix_caching: Optional[bool],
541557        spec_token_ids = None ,
542558        logprobs = None ,
543559        prompt_logprobs_dict = {},
544-     )
560+         ** ({
561+             "pooler_output" : []
562+         } if  not  vllm_version_is ("0.9.1" ) else  {}))
563+ 
545564    scheduler .update_from_output (scheduler_output1 , model_runner_output )
546565
547566
@@ -565,8 +584,6 @@ def test_schedule_spec_decoding_stats(spec_tokens, output_tokens, expected):
565584    1. Speculated tokens get scheduled correctly 
566585    2. Spec decoding stats properly count number of draft and accepted tokens 
567586    """ 
568-     if  vllm_version_is ("0.9.0" ):
569-         return 
570587    num_spec_tokens  =  max (1 , max (len (t ) for  t  in  spec_tokens ))
571588    scheduler  =  create_scheduler (num_speculative_tokens = num_spec_tokens )
572589    requests  =  create_requests (num_requests = len (spec_tokens ), num_tokens = 1 )
@@ -593,7 +610,10 @@ def test_schedule_spec_decoding_stats(spec_tokens, output_tokens, expected):
593610        spec_token_ids = spec_tokens ,
594611        logprobs = None ,
595612        prompt_logprobs_dict = {},
596-     )
613+         ** ({
614+             "pooler_output" : []
615+         } if  not  vllm_version_is ("0.9.1" ) else  {}))
616+ 
597617    engine_core_outputs  =  scheduler .update_from_output (output ,
598618                                                       model_runner_output )
599619
@@ -632,7 +652,10 @@ def test_schedule_spec_decoding_stats(spec_tokens, output_tokens, expected):
632652        spec_token_ids = None ,
633653        logprobs = None ,
634654        prompt_logprobs_dict = {},
635-     )
655+         ** ({
656+             "pooler_output" : []
657+         } if  not  vllm_version_is ("0.9.1" ) else  {}))
658+ 
636659    engine_core_outputs  =  scheduler .update_from_output (output ,
637660                                                       model_runner_output )
638661
@@ -727,7 +750,9 @@ def make_output(scheduler: AscendScheduler):
727750        spec_token_ids = None ,
728751        logprobs = None ,
729752        prompt_logprobs_dict = {},
730-     )
753+         ** ({
754+             "pooler_output" : []
755+         } if  not  vllm_version_is ("0.9.1" ) else  {}))
731756
732757
733758def  assert_scheduler_empty (scheduler : AscendScheduler ):
@@ -744,11 +769,10 @@ def assert_scheduler_empty(scheduler: AscendScheduler):
744769    assert  len (scheduler .encoder_cache_manager .cached ) ==  0 
745770
746771    # KVCache Manager. 
747-     if  not  vllm_version_is ("0.9.0" ):
748-         assert  len (scheduler .kv_cache_manager .coordinator .
749-                    single_type_managers [0 ].req_to_blocks ) ==  0 
750-         assert  len (scheduler .kv_cache_manager .coordinator .
751-                    single_type_managers [0 ].num_cached_block ) ==  0 
772+     assert  len (scheduler .kv_cache_manager .coordinator .single_type_managers [0 ].
773+                req_to_blocks ) ==  0 
774+     assert  len (scheduler .kv_cache_manager .coordinator .single_type_managers [0 ].
775+                num_cached_block ) ==  0 
752776    assert  len (scheduler .kv_cache_manager .req_to_block_hashes ) ==  0 
753777    num_free_blocks  =  (
754778        scheduler .kv_cache_manager .block_pool .free_block_queue .num_free_blocks )
@@ -789,4 +813,4 @@ def test_memory_leak():
789813        scheduler .update_from_output (scheduler_output , model_runner_output )
790814
791815    # Confirm no memory leak. 
792-     assert_scheduler_empty (scheduler )
816+     assert_scheduler_empty (scheduler )
0 commit comments