We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 3edcca7 commit 3ccf486Copy full SHA for 3ccf486
vllm/v1/worker/gpu_model_runner.py
@@ -3401,6 +3401,8 @@ def _dummy_run(
3401
with self.maybe_dummy_run_with_lora(
3402
self.lora_config, num_scheduled_tokens, remove_lora
3403
):
3404
+ # Make sure padding doesn't exceed max_num_tokens
3405
+ assert num_tokens_after_padding <= self.max_num_tokens
3406
model_kwargs = self._init_model_kwargs(num_tokens_after_padding)
3407
if self.supports_mm_inputs and not self.model_config.is_encoder_decoder:
3408
input_ids = None
0 commit comments