@@ -98,11 +98,7 @@ def create_scheduler(
9898 )
9999 kv_cache_config = KVCacheConfig (
100100 num_blocks = num_blocks , # A large number of blocks to hold all requests
101- ** ({
102- "tensors" : {}
103- } if vllm_version_is ("0.9.0" ) else {
104- "kv_cache_tensors" : []
105- }),
101+ kv_cache_tensors = [],
106102 kv_cache_groups = [
107103 KVCacheGroupSpec (['layer' ],
108104 FullAttentionSpec (block_size , 1 , 1 , torch .float32 ,
@@ -145,8 +141,8 @@ def create_requests(num_requests: int,
145141 multi_modal_hashes = None ,
146142 eos_token_id = EOS_TOKEN_ID ,
147143 ** ({
148- "arrival_time " : 0.0
149- } if vllm_version_is ("0.9.0 " ) else {}),
144+ "pooling_params " : None
145+ } if not vllm_version_is ("0.9.1 " ) else {}),
150146 )
151147 requests .append (request )
152148 return requests
@@ -262,7 +258,9 @@ def test_schedule_concurrent_partial_requests(enable_prefix_caching: bool):
262258 spec_token_ids = None ,
263259 logprobs = None ,
264260 prompt_logprobs_dict = {},
265- )
261+ ** ({
262+ "pooler_output" : []
263+ } if not vllm_version_is ("0.9.1" ) else {}))
266264 scheduler .update_from_output (output , model_runner_output )
267265
268266 # Schedule the next step. All three requests are running.
@@ -286,7 +284,10 @@ def test_schedule_concurrent_partial_requests(enable_prefix_caching: bool):
286284 spec_token_ids = None ,
287285 logprobs = None ,
288286 prompt_logprobs_dict = {},
289- )
287+ ** ({
288+ "pooler_output" : []
289+ } if not vllm_version_is ("0.9.1" ) else {}))
290+
290291 scheduler .update_from_output (output1 , model_runner_output )
291292 output2 = scheduler .schedule ()
292293 assert len (scheduler .running ) == 3
@@ -337,7 +338,10 @@ def test_stop_via_update_from_output():
337338 11 ]], # First request hits EOS, second continues
338339 spec_token_ids = None ,
339340 logprobs = None ,
340- prompt_logprobs_dict = {})
341+ prompt_logprobs_dict = {},
342+ ** ({
343+ "pooler_output" : []
344+ } if not vllm_version_is ("0.9.1" ) else {}))
341345
342346 scheduler .update_from_output (scheduler_output , model_output )
343347
@@ -385,7 +389,10 @@ def test_stop_via_update_from_output():
385389 [13 , 14 ]], # First request hits stop token
386390 spec_token_ids = None ,
387391 logprobs = None ,
388- prompt_logprobs_dict = {})
392+ prompt_logprobs_dict = {},
393+ ** ({
394+ "pooler_output" : []
395+ } if not vllm_version_is ("0.9.1" ) else {}))
389396
390397 scheduler .update_from_output (scheduler_output , model_output )
391398
@@ -432,7 +439,10 @@ def test_stop_via_update_from_output():
432439 [13 ]], # First request exceeds max_tokens
433440 spec_token_ids = None ,
434441 logprobs = None ,
435- prompt_logprobs_dict = {})
442+ prompt_logprobs_dict = {},
443+ ** ({
444+ "pooler_output" : []
445+ } if not vllm_version_is ("0.9.1" ) else {}))
436446
437447 scheduler .update_from_output (scheduler_output , model_output )
438448
@@ -474,7 +484,10 @@ def test_stop_via_update_from_output():
474484 sampled_token_ids = [[EOS_TOKEN_ID , 10 , 11 ]],
475485 spec_token_ids = None ,
476486 logprobs = None ,
477- prompt_logprobs_dict = {})
487+ prompt_logprobs_dict = {},
488+ ** ({
489+ "pooler_output" : []
490+ } if not vllm_version_is ("0.9.1" ) else {}))
478491
479492 scheduler .update_from_output (scheduler_output , model_output )
480493
@@ -524,7 +537,10 @@ def test_schedule_concurrent_batches(enable_prefix_caching: Optional[bool],
524537 spec_token_ids = None ,
525538 logprobs = None ,
526539 prompt_logprobs_dict = {},
527- )
540+ ** ({
541+ "pooler_output" : []
542+ } if not vllm_version_is ("0.9.1" ) else {}))
543+
528544 scheduler .update_from_output (scheduler_output0 , model_runner_output )
529545
530546 # Schedule the next step.
@@ -541,7 +557,10 @@ def test_schedule_concurrent_batches(enable_prefix_caching: Optional[bool],
541557 spec_token_ids = None ,
542558 logprobs = None ,
543559 prompt_logprobs_dict = {},
544- )
560+ ** ({
561+ "pooler_output" : []
562+ } if not vllm_version_is ("0.9.1" ) else {}))
563+
545564 scheduler .update_from_output (scheduler_output1 , model_runner_output )
546565
547566
@@ -565,8 +584,6 @@ def test_schedule_spec_decoding_stats(spec_tokens, output_tokens, expected):
565584 1. Speculated tokens get scheduled correctly
566585 2. Spec decoding stats properly count number of draft and accepted tokens
567586 """
568- if vllm_version_is ("0.9.0" ):
569- return
570587 num_spec_tokens = max (1 , max (len (t ) for t in spec_tokens ))
571588 scheduler = create_scheduler (num_speculative_tokens = num_spec_tokens )
572589 requests = create_requests (num_requests = len (spec_tokens ), num_tokens = 1 )
@@ -593,7 +610,10 @@ def test_schedule_spec_decoding_stats(spec_tokens, output_tokens, expected):
593610 spec_token_ids = spec_tokens ,
594611 logprobs = None ,
595612 prompt_logprobs_dict = {},
596- )
613+ ** ({
614+ "pooler_output" : []
615+ } if not vllm_version_is ("0.9.1" ) else {}))
616+
597617 engine_core_outputs = scheduler .update_from_output (output ,
598618 model_runner_output )
599619
@@ -632,7 +652,10 @@ def test_schedule_spec_decoding_stats(spec_tokens, output_tokens, expected):
632652 spec_token_ids = None ,
633653 logprobs = None ,
634654 prompt_logprobs_dict = {},
635- )
655+ ** ({
656+ "pooler_output" : []
657+ } if not vllm_version_is ("0.9.1" ) else {}))
658+
636659 engine_core_outputs = scheduler .update_from_output (output ,
637660 model_runner_output )
638661
@@ -727,7 +750,9 @@ def make_output(scheduler: AscendScheduler):
727750 spec_token_ids = None ,
728751 logprobs = None ,
729752 prompt_logprobs_dict = {},
730- )
753+ ** ({
754+ "pooler_output" : []
755+ } if not vllm_version_is ("0.9.1" ) else {}))
731756
732757
733758def assert_scheduler_empty (scheduler : AscendScheduler ):
@@ -744,11 +769,10 @@ def assert_scheduler_empty(scheduler: AscendScheduler):
744769 assert len (scheduler .encoder_cache_manager .cached ) == 0
745770
746771 # KVCache Manager.
747- if not vllm_version_is ("0.9.0" ):
748- assert len (scheduler .kv_cache_manager .coordinator .
749- single_type_managers [0 ].req_to_blocks ) == 0
750- assert len (scheduler .kv_cache_manager .coordinator .
751- single_type_managers [0 ].num_cached_block ) == 0
772+ assert len (scheduler .kv_cache_manager .coordinator .single_type_managers [0 ].
773+ req_to_blocks ) == 0
774+ assert len (scheduler .kv_cache_manager .coordinator .single_type_managers [0 ].
775+ num_cached_block ) == 0
752776 assert len (scheduler .kv_cache_manager .req_to_block_hashes ) == 0
753777 num_free_blocks = (
754778 scheduler .kv_cache_manager .block_pool .free_block_queue .num_free_blocks )
@@ -789,4 +813,4 @@ def test_memory_leak():
789813 scheduler .update_from_output (scheduler_output , model_runner_output )
790814
791815 # Confirm no memory leak.
792- assert_scheduler_empty (scheduler )
816+ assert_scheduler_empty (scheduler )
0 commit comments