Skip to content

Commit b0ccfc5

Browse files
authored
[Bugfix][V1] GPUModelRunner._update_states should return True when there is a finished request in batch (#13126)
1 parent ba59b78 commit b0ccfc5

File tree

2 files changed

+238
-1
lines changed

2 files changed

+238
-1
lines changed
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
import pytest
3+
4+
from vllm.config import CacheConfig, ModelConfig, SchedulerConfig, VllmConfig
5+
from vllm.sampling_params import SamplingParams
6+
from vllm.v1.core.scheduler_output import (CachedRequestData, NewRequestData,
7+
SchedulerOutput)
8+
from vllm.v1.worker.gpu_model_runner import GPUModelRunner
9+
10+
11+
@pytest.fixture
12+
def model_runner():
13+
scheduler_config = SchedulerConfig(
14+
max_num_seqs=10,
15+
max_num_batched_tokens=512,
16+
max_model_len=512,
17+
)
18+
model_config = ModelConfig(
19+
model="facebook/opt-125m",
20+
task="generate",
21+
tokenizer="facebook/opt-125m",
22+
tokenizer_mode="auto",
23+
trust_remote_code=True,
24+
dtype="float16",
25+
seed=42,
26+
)
27+
cache_config = CacheConfig(
28+
block_size=16,
29+
gpu_memory_utilization=0.9,
30+
swap_space=0,
31+
cache_dtype="auto",
32+
)
33+
vllm_config = VllmConfig(
34+
model_config=model_config,
35+
cache_config=cache_config,
36+
scheduler_config=scheduler_config,
37+
)
38+
39+
device = "cuda"
40+
return GPUModelRunner(vllm_config, device)
41+
42+
43+
def _schedule_new_request(*req_ids: str) -> SchedulerOutput:
44+
new_reqs = []
45+
num_scheduled_tokens = {}
46+
total_num_scheduled_tokens = 0
47+
for req_id in req_ids:
48+
new_reqs.append(
49+
NewRequestData(
50+
req_id=req_id,
51+
prompt_token_ids=[1, 2, 3],
52+
prompt="test",
53+
mm_inputs=[],
54+
mm_hashes=[],
55+
mm_positions=[],
56+
sampling_params=SamplingParams(),
57+
block_ids=[0],
58+
num_computed_tokens=0,
59+
lora_request=None,
60+
))
61+
num_scheduled_tokens[req_id] = 3
62+
total_num_scheduled_tokens += num_scheduled_tokens[req_id]
63+
64+
return SchedulerOutput(
65+
scheduled_new_reqs=new_reqs,
66+
scheduled_cached_reqs=[],
67+
num_scheduled_tokens=num_scheduled_tokens,
68+
total_num_scheduled_tokens=total_num_scheduled_tokens,
69+
scheduled_encoder_inputs={},
70+
num_common_prefix_blocks=0,
71+
finished_req_ids=set(),
72+
free_encoder_input_ids=[],
73+
)
74+
75+
76+
def _is_req_scheduled(model_runner, req_id: str) -> bool:
77+
return req_id in model_runner.input_batch.req_id_to_index
78+
79+
80+
def _is_req_added(model_runner, req_id: str) -> bool:
81+
return req_id in model_runner.requests
82+
83+
84+
def test_update_states_new_request(model_runner):
85+
req_id = "req_0"
86+
87+
# new req
88+
scheduler_output = _schedule_new_request(req_id)
89+
90+
batch_changed = model_runner._update_states(scheduler_output)
91+
assert batch_changed is True
92+
assert _is_req_added(model_runner, req_id)
93+
assert _is_req_scheduled(model_runner, req_id)
94+
95+
96+
def test_update_states_request_finished(model_runner):
97+
req_id = "req_0"
98+
99+
# new req
100+
scheduler_output = _schedule_new_request(req_id)
101+
102+
model_runner._update_states(scheduler_output)
103+
assert _is_req_added(model_runner, req_id)
104+
assert _is_req_scheduled(model_runner, req_id)
105+
106+
# finish req
107+
scheduler_output = SchedulerOutput(
108+
scheduled_new_reqs=[],
109+
scheduled_cached_reqs=[],
110+
num_scheduled_tokens={},
111+
total_num_scheduled_tokens=0,
112+
scheduled_encoder_inputs={},
113+
num_common_prefix_blocks=0,
114+
finished_req_ids={req_id},
115+
free_encoder_input_ids=[],
116+
)
117+
118+
batch_changed = model_runner._update_states(scheduler_output)
119+
assert batch_changed is True
120+
assert not _is_req_added(model_runner, req_id)
121+
assert not _is_req_scheduled(model_runner, req_id)
122+
123+
124+
def test_update_states_request_resumed(model_runner):
125+
req_id = "req_0"
126+
127+
# new req
128+
scheduler_output = _schedule_new_request(req_id)
129+
130+
model_runner._update_states(scheduler_output)
131+
assert _is_req_added(model_runner, req_id)
132+
assert _is_req_scheduled(model_runner, req_id)
133+
134+
# unschedule req
135+
scheduler_output = SchedulerOutput(
136+
scheduled_new_reqs=[],
137+
scheduled_cached_reqs=[],
138+
num_scheduled_tokens={},
139+
total_num_scheduled_tokens=0,
140+
scheduled_encoder_inputs={},
141+
num_common_prefix_blocks=0,
142+
finished_req_ids={},
143+
free_encoder_input_ids=[],
144+
)
145+
146+
model_runner._update_states(scheduler_output)
147+
assert _is_req_added(model_runner, req_id)
148+
assert not _is_req_scheduled(model_runner, req_id)
149+
150+
# resume req
151+
cached_req_data = CachedRequestData(
152+
req_id=req_id,
153+
resumed_from_preemption=False,
154+
new_block_ids=[],
155+
num_computed_tokens=0,
156+
)
157+
158+
scheduler_output = SchedulerOutput(
159+
scheduled_new_reqs=[],
160+
scheduled_cached_reqs=[cached_req_data],
161+
num_scheduled_tokens={req_id: 1},
162+
total_num_scheduled_tokens=1,
163+
scheduled_encoder_inputs={},
164+
num_common_prefix_blocks=0,
165+
finished_req_ids=set(),
166+
free_encoder_input_ids=[],
167+
)
168+
169+
batch_changed = model_runner._update_states(scheduler_output)
170+
assert batch_changed is True
171+
assert _is_req_added(model_runner, req_id)
172+
assert _is_req_scheduled(model_runner, req_id)
173+
174+
175+
def test_update_states_no_changes(model_runner):
176+
req_id = "req_0"
177+
178+
# new req
179+
scheduler_output = _schedule_new_request(req_id)
180+
181+
model_runner._update_states(scheduler_output)
182+
assert _is_req_added(model_runner, req_id)
183+
assert _is_req_scheduled(model_runner, req_id)
184+
185+
# schedule req
186+
scheduler_output = SchedulerOutput(
187+
scheduled_new_reqs=[],
188+
scheduled_cached_reqs=[],
189+
num_scheduled_tokens={req_id: 1},
190+
total_num_scheduled_tokens=1,
191+
scheduled_encoder_inputs={},
192+
num_common_prefix_blocks=0,
193+
finished_req_ids=set(),
194+
free_encoder_input_ids=[],
195+
)
196+
197+
batch_changed = model_runner._update_states(scheduler_output)
198+
assert batch_changed is False
199+
assert _is_req_added(model_runner, req_id)
200+
assert _is_req_scheduled(model_runner, req_id)
201+
202+
203+
def test_update_states_request_unscheduled(model_runner):
204+
req_ids = ("req_0", "req_1")
205+
206+
# new reqs
207+
scheduler_output = _schedule_new_request(*req_ids)
208+
209+
model_runner._update_states(scheduler_output)
210+
211+
assert _is_req_added(model_runner, req_ids[0])
212+
assert _is_req_scheduled(model_runner, req_ids[0])
213+
214+
assert _is_req_added(model_runner, req_ids[1])
215+
assert _is_req_scheduled(model_runner, req_ids[1])
216+
217+
# unschedule req_1
218+
scheduler_output = SchedulerOutput(
219+
scheduled_new_reqs=[],
220+
scheduled_cached_reqs=[],
221+
num_scheduled_tokens={req_ids[0]: 1},
222+
total_num_scheduled_tokens=1,
223+
scheduled_encoder_inputs={},
224+
num_common_prefix_blocks=0,
225+
finished_req_ids=set(),
226+
free_encoder_input_ids=[],
227+
)
228+
229+
batch_changed = model_runner._update_states(scheduler_output)
230+
assert batch_changed is True
231+
232+
assert _is_req_added(model_runner, req_ids[0])
233+
assert _is_req_scheduled(model_runner, req_ids[0])
234+
235+
assert _is_req_added(model_runner, req_ids[1])
236+
assert not _is_req_scheduled(model_runner, req_ids[1])

vllm/v1/worker/gpu_model_runner.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,8 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> bool:
363363
# Condense the batched states if there are empty indices.
364364
if removed_req_indices:
365365
self.input_batch.condense(removed_req_indices)
366-
return len(unscheduled_req_ids) > 0 or len(req_ids_to_add) > 0
366+
return (len(unscheduled_req_ids) > 0 or len(req_ids_to_add) > 0
367+
or len(scheduler_output.finished_req_ids) > 0)
367368

368369
def _prepare_inputs(self, scheduler_output: "SchedulerOutput"):
369370
total_num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens

0 commit comments

Comments
 (0)