@@ -43,6 +43,7 @@ def test_basic_lifecycle():
4343 # STEP (1): Prefill.
4444 # (1a): schedule()
4545 scheduler_output = scheduler .schedule ()
46+ assert len (scheduler .requests ) == 1
4647 assert len (scheduler .running ) == 1
4748 assert len (scheduler_output .scheduled_new_reqs ) == 1
4849
@@ -67,6 +68,7 @@ def test_basic_lifecycle():
6768 assert len (scheduler .waiting ) == 0
6869
6970 # ... but blocks should not be freed.
71+ assert len (scheduler .requests ) == 1
7072 blocks = scheduler .kv_cache_manager .coordinator .single_type_managers [
7173 0
7274 ].req_to_blocks [request_id ]
@@ -76,6 +78,7 @@ def test_basic_lifecycle():
7678 # STEP (2): Send Finished to PB.
7779 # (2a): schedule() - pass finished request to PB.
7880 scheduler_output = scheduler .schedule ()
81+ assert len (scheduler .requests ) == 1
7982 assert len (scheduler .running ) == 0
8083 assert len (scheduler_output .finished_req_ids ) == 1
8184 assert request_id in scheduler_output .finished_req_ids
@@ -92,6 +95,7 @@ def test_basic_lifecycle():
9295 # STEP (3): Finished sending.
9396 # (3a): schedule() - pass finished request to PB.
9497 scheduler_output = scheduler .schedule ()
98+ assert len (scheduler .requests ) == 1
9599 assert len (scheduler .running ) == 0
96100 assert len (scheduler_output .finished_req_ids ) == 0
97101 assert len (scheduler_output .scheduled_new_reqs ) == 0
@@ -133,6 +137,7 @@ def test_short_prompt_lifecycle():
133137 # STEP (1): Prefill.
134138 # (1a): schedule()
135139 scheduler_output = scheduler .schedule ()
140+ assert len (scheduler .requests ) == 1
136141 assert len (scheduler .running ) == 1
137142 assert len (scheduler_output .scheduled_new_reqs ) == 1
138143
@@ -178,7 +183,7 @@ def test_prefix_cache_lifecycle():
178183 reqs = [request_normal ], use_eos = True
179184 )
180185 scheduler .update_from_output (scheduler_output , model_runner_output )
181- scheduler .schedule ()
186+ scheduler_output = scheduler .schedule ()
182187 scheduler .update_from_output (scheduler_output , EMPTY_MODEL_RUNNER_OUTPUT )
183188
184189 #####################
@@ -213,3 +218,45 @@ def test_prefix_cache_lifecycle():
213218 )
214219 scheduler .update_from_output (scheduler_output , model_runner_output )
215220 assert_scheduler_empty (scheduler )
221+
222+
223+ def test_abort_during_kv_transfer ():
224+ """Test aborting request does not release blocks for remote decode."""
225+
226+ vllm_config = create_vllm_config ()
227+ scheduler = create_scheduler (vllm_config )
228+
229+ # Prime the KVCache.
230+ BLOCK_SIZE = vllm_config .cache_config .block_size
231+ NUM_EXTERNAL_FULL_BLOCKS = 2
232+ NUM_TOKENS = int (BLOCK_SIZE * (NUM_EXTERNAL_FULL_BLOCKS + 0.5 ))
233+
234+ request = create_request (
235+ request_id = 1 ,
236+ block_size = BLOCK_SIZE ,
237+ num_tokens = NUM_TOKENS ,
238+ do_remote_decode = True ,
239+ )
240+
241+ scheduler .add_request (request )
242+ scheduler_output = scheduler .schedule ()
243+ model_runner_output = create_model_runner_output (reqs = [request ])
244+ scheduler .update_from_output (scheduler_output , model_runner_output )
245+ scheduler_output = scheduler .schedule ()
246+ scheduler .update_from_output (scheduler_output , EMPTY_MODEL_RUNNER_OUTPUT )
247+
248+ # Request removed from PB but blocks should not be freed.
249+ assert len (scheduler .requests ) == 1
250+
251+ # Abort the request, and check the blocks are still not freed
252+ scheduler .finish_requests ([request .request_id ], RequestStatus .FINISHED_ABORTED )
253+ assert len (scheduler .requests ) == 1
254+
255+ # Simulate a finished sending notification
256+ scheduler_output = scheduler .schedule ()
257+ model_runner_output = copy .deepcopy (EMPTY_MODEL_RUNNER_OUTPUT )
258+ model_runner_output .kv_connector_output = KVConnectorOutput (
259+ finished_sending = [request .request_id ]
260+ )
261+ scheduler .update_from_output (scheduler_output , model_runner_output )
262+ assert_scheduler_empty (scheduler )
0 commit comments