From c3756256cacb58810060d1391a9986d74a6251d0 Mon Sep 17 00:00:00 2001
From: snowcharm <snowcharmqq@gmail.com>
Date: Mon, 21 Apr 2025 15:54:52 +0800
Subject: [PATCH 1/2] optimize _update_states

Signed-off-by: snowcharm <snowcharmqq@gmail.com>
---
 vllm/v1/worker/gpu_model_runner.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index 7c88ecc31d02..ace599e9e12a 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -454,15 +454,11 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> None:
 
         # Add the new or resumed requests to the persistent batch.
         # The smaller empty indices are filled first.
-        removed_req_indices = sorted(removed_req_indices, reverse=True)
+        removed_req_indices.sort(reverse=True)
         for req_id in req_ids_to_add:
             req_state = self.requests[req_id]
-            if removed_req_indices:
-                # Fill the empty index.
-                req_index = removed_req_indices.pop()
-            else:
-                # Append to the end.
-                req_index = None
+            req_index = removed_req_indices.pop(
+            ) if removed_req_indices else None
             self.input_batch.add_request(req_state, req_index)
 
         # Condense the batched states if there are empty indices.

From a82913a54696a848f26e7da116bbe20faf23d44b Mon Sep 17 00:00:00 2001
From: snowcharm <snowcharmqq@gmail.com>
Date: Tue, 22 Apr 2025 11:29:33 +0800
Subject: [PATCH 2/2] revert change to be better clear

Signed-off-by: snowcharm <snowcharmqq@gmail.com>
---
 vllm/v1/worker/gpu_model_runner.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index ace599e9e12a..1d093ba0582a 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -457,8 +457,12 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> None:
         removed_req_indices.sort(reverse=True)
         for req_id in req_ids_to_add:
             req_state = self.requests[req_id]
-            req_index = removed_req_indices.pop(
-            ) if removed_req_indices else None
+            if removed_req_indices:
+                # Fill the empty index.
+                req_index = removed_req_indices.pop()
+            else:
+                # Append to the end.
+                req_index = None
             self.input_batch.add_request(req_state, req_index)
 
         # Condense the batched states if there are empty indices.