From c3756256cacb58810060d1391a9986d74a6251d0 Mon Sep 17 00:00:00 2001 From: snowcharm Date: Mon, 21 Apr 2025 15:54:52 +0800 Subject: [PATCH 1/2] optimize _update_states Signed-off-by: snowcharm --- vllm/v1/worker/gpu_model_runner.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 7c88ecc31d02..ace599e9e12a 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -454,15 +454,11 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> None: # Add the new or resumed requests to the persistent batch. # The smaller empty indices are filled first. - removed_req_indices = sorted(removed_req_indices, reverse=True) + removed_req_indices.sort(reverse=True) for req_id in req_ids_to_add: req_state = self.requests[req_id] - if removed_req_indices: - # Fill the empty index. - req_index = removed_req_indices.pop() - else: - # Append to the end. - req_index = None + req_index = removed_req_indices.pop( + ) if removed_req_indices else None self.input_batch.add_request(req_state, req_index) # Condense the batched states if there are empty indices. From a82913a54696a848f26e7da116bbe20faf23d44b Mon Sep 17 00:00:00 2001 From: snowcharm Date: Tue, 22 Apr 2025 11:29:33 +0800 Subject: [PATCH 2/2] revert change to be better clear Signed-off-by: snowcharm --- vllm/v1/worker/gpu_model_runner.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index ace599e9e12a..1d093ba0582a 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -457,8 +457,12 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> None: removed_req_indices.sort(reverse=True) for req_id in req_ids_to_add: req_state = self.requests[req_id] - req_index = removed_req_indices.pop( - ) if removed_req_indices else None + if removed_req_indices: + # Fill the empty index. + req_index = removed_req_indices.pop() + else: + # Append to the end. + req_index = None self.input_batch.add_request(req_state, req_index) # Condense the batched states if there are empty indices.