Skip to content

Commit f01a7e1

Browse files
author
Varun Sundar Rabindranath
committed
fixes
Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
1 parent 3ccf486 commit f01a7e1

File tree

1 file changed

+7
-15
lines changed

1 file changed

+7
-15
lines changed

vllm/v1/worker/gpu_model_runner.py

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3401,30 +3401,26 @@ def _dummy_run(
34013401
with self.maybe_dummy_run_with_lora(
34023402
self.lora_config, num_scheduled_tokens, remove_lora
34033403
):
3404-
# Make sure padding doesn't exceed max_num_tokens
3405-
assert num_tokens_after_padding <= self.max_num_tokens
3404+
# Make sure padding doesn't exceed max_num_tokens
3405+
assert num_tokens_after_padding <= self.max_num_tokens
34063406
model_kwargs = self._init_model_kwargs(num_tokens_after_padding)
34073407
if self.supports_mm_inputs and not self.model_config.is_encoder_decoder:
34083408
input_ids = None
3409-
inputs_embeds = self.inputs_embeds.gpu[:
3410-
num_tokens_after_padding]
3409+
inputs_embeds = self.inputs_embeds.gpu[:num_tokens_after_padding]
34113410
model_kwargs = {
34123411
**model_kwargs,
34133412
**self._dummy_mm_kwargs(num_reqs),
34143413
}
34153414
elif self.enable_prompt_embeds:
34163415
input_ids = None
3417-
inputs_embeds = self.inputs_embeds.gpu[:
3418-
num_tokens_after_padding]
3419-
model_kwargs = self._init_model_kwargs(
3420-
num_tokens_after_padding)
3416+
inputs_embeds = self.inputs_embeds.gpu[:num_tokens_after_padding]
3417+
model_kwargs = self._init_model_kwargs(num_tokens_after_padding)
34213418
else:
34223419
input_ids = self.input_ids.gpu[:num_tokens_after_padding]
34233420
inputs_embeds = None
34243421

34253422
if self.uses_mrope:
3426-
positions = self.mrope_positions.gpu[:, :
3427-
num_tokens_after_padding]
3423+
positions = self.mrope_positions.gpu[:, :num_tokens_after_padding]
34283424
else:
34293425
positions = self.positions.gpu[:num_tokens_after_padding]
34303426

@@ -3441,12 +3437,8 @@ def _dummy_run(
34413437
)
34423438

34433439
intermediate_tensors = self.sync_and_slice_intermediate_tensors(
3444-
<<<<<<< HEAD
3445-
num_tokens, None, False
3440+
num_tokens_after_padding, None, False
34463441
)
3447-
=======
3448-
num_tokens_after_padding, None, False)
3449-
>>>>>>> f38a17972 (pad input buffers)
34503442

34513443
# filter out the valid batch descriptor
34523444
_cg_mode, batch_descriptor = (

0 commit comments

Comments
 (0)