We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 20d93dc commit 22fd4a6Copy full SHA for 22fd4a6
vllm/config/__init__.py
@@ -2120,9 +2120,9 @@ def __post_init__(self):
2120
if self.num_speculative_tokens is None:
2121
self.num_speculative_tokens = max_num_speculative_tokens
2122
else:
2123
- assert self.num_speculative_tokens < max_num_speculative_tokens, (
+ assert self.num_speculative_tokens <= max_num_speculative_tokens, (
2124
"num_speculative_tokens should be None or must be less than or equal to the "
2125
- "max value in num_speculative_tokens_per_method.")
+ "max value in num_speculative_tokens_per_method.")
2126
2127
# Automatically configure the method for ngram when "model" is used
2128
# instead of "method"
0 commit comments