@@ -43,6 +43,7 @@ def test_basic_lifecycle():
4343    # STEP (1): Prefill. 
4444    # (1a): schedule() 
4545    scheduler_output  =  scheduler .schedule ()
46+     assert  len (scheduler .requests ) ==  1 
4647    assert  len (scheduler .running ) ==  1 
4748    assert  len (scheduler_output .scheduled_new_reqs ) ==  1 
4849
@@ -67,6 +68,7 @@ def test_basic_lifecycle():
6768    assert  len (scheduler .waiting ) ==  0 
6869
6970    # ... but blocks should not be freed. 
71+     assert  len (scheduler .requests ) ==  1 
7072    blocks  =  scheduler .kv_cache_manager .coordinator .single_type_managers [
7173        0 
7274    ].req_to_blocks [request_id ]
@@ -76,6 +78,7 @@ def test_basic_lifecycle():
7678    # STEP (2): Send Finished to PB. 
7779    # (2a): schedule() - pass finished request to PB. 
7880    scheduler_output  =  scheduler .schedule ()
81+     assert  len (scheduler .requests ) ==  1 
7982    assert  len (scheduler .running ) ==  0 
8083    assert  len (scheduler_output .finished_req_ids ) ==  1 
8184    assert  request_id  in  scheduler_output .finished_req_ids 
@@ -92,6 +95,7 @@ def test_basic_lifecycle():
9295    # STEP (3): Finished sending. 
9396    # (3a): schedule() - pass finished request to PB. 
9497    scheduler_output  =  scheduler .schedule ()
98+     assert  len (scheduler .requests ) ==  1 
9599    assert  len (scheduler .running ) ==  0 
96100    assert  len (scheduler_output .finished_req_ids ) ==  0 
97101    assert  len (scheduler_output .scheduled_new_reqs ) ==  0 
@@ -133,6 +137,7 @@ def test_short_prompt_lifecycle():
133137    # STEP (1): Prefill. 
134138    # (1a): schedule() 
135139    scheduler_output  =  scheduler .schedule ()
140+     assert  len (scheduler .requests ) ==  1 
136141    assert  len (scheduler .running ) ==  1 
137142    assert  len (scheduler_output .scheduled_new_reqs ) ==  1 
138143
@@ -178,7 +183,7 @@ def test_prefix_cache_lifecycle():
178183        reqs = [request_normal ], use_eos = True 
179184    )
180185    scheduler .update_from_output (scheduler_output , model_runner_output )
181-     scheduler .schedule ()
186+     scheduler_output   =   scheduler .schedule ()
182187    scheduler .update_from_output (scheduler_output , EMPTY_MODEL_RUNNER_OUTPUT )
183188
184189    ##################### 
@@ -213,3 +218,45 @@ def test_prefix_cache_lifecycle():
213218    )
214219    scheduler .update_from_output (scheduler_output , model_runner_output )
215220    assert_scheduler_empty (scheduler )
221+ 
222+ 
223+ def  test_abort_during_kv_transfer ():
224+     """Test aborting request does not release blocks for remote decode.""" 
225+ 
226+     vllm_config  =  create_vllm_config ()
227+     scheduler  =  create_scheduler (vllm_config )
228+ 
229+     # Prime the KVCache. 
230+     BLOCK_SIZE  =  vllm_config .cache_config .block_size 
231+     NUM_EXTERNAL_FULL_BLOCKS  =  2 
232+     NUM_TOKENS  =  int (BLOCK_SIZE  *  (NUM_EXTERNAL_FULL_BLOCKS  +  0.5 ))
233+ 
234+     request  =  create_request (
235+         request_id = 1 ,
236+         block_size = BLOCK_SIZE ,
237+         num_tokens = NUM_TOKENS ,
238+         do_remote_decode = True ,
239+     )
240+ 
241+     scheduler .add_request (request )
242+     scheduler_output  =  scheduler .schedule ()
243+     model_runner_output  =  create_model_runner_output (reqs = [request ])
244+     scheduler .update_from_output (scheduler_output , model_runner_output )
245+     scheduler_output  =  scheduler .schedule ()
246+     scheduler .update_from_output (scheduler_output , EMPTY_MODEL_RUNNER_OUTPUT )
247+ 
248+     # Request removed from PB but blocks should not be freed. 
249+     assert  len (scheduler .requests ) ==  1 
250+ 
251+     # Abort the request, and check the blocks are still not freed 
252+     scheduler .finish_requests ([request .request_id ], RequestStatus .FINISHED_ABORTED )
253+     assert  len (scheduler .requests ) ==  1 
254+ 
255+     # Simulate a finished sending notification 
256+     scheduler_output  =  scheduler .schedule ()
257+     model_runner_output  =  copy .deepcopy (EMPTY_MODEL_RUNNER_OUTPUT )
258+     model_runner_output .kv_connector_output  =  KVConnectorOutput (
259+         finished_sending = [request .request_id ]
260+     )
261+     scheduler .update_from_output (scheduler_output , model_runner_output )
262+     assert_scheduler_empty (scheduler )
0 commit comments