diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index 08368b7d99ef..0349155cdf93 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -278,6 +278,7 @@ def schedule(self) -> SchedulerOutput: token_budget += num_scheduled_tokens[preempted_req.request_id] req_to_new_blocks.pop(preempted_req.request_id) num_scheduled_tokens.pop(preempted_req.request_id) + req_index -= 1 else: preempted_req = self.running.pop()