We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 226073e commit 1116b82Copy full SHA for 1116b82
vllm/v1/worker/gpu_model_runner.py
@@ -2478,7 +2478,7 @@ def propose_draft_token_ids(sampled_token_ids):
2478
effective_drafter_max_model_len = (
2479
self.speculative_config.draft_model_config.max_model_len)
2480
input_fits_in_drafter = spec_decode_common_attn_metadata and (
2481
- spec_decode_common_attn_metadata.seq_lens.max() +
+ spec_decode_common_attn_metadata.max_seq_len +
2482
self.speculative_config.num_speculative_tokens
2483
<= effective_drafter_max_model_len)
2484
if use_padded_batch_for_eagle and input_fits_in_drafter:
0 commit comments