Update vllm/config/__init__.py

ekagra-ranjan · gemini-code-assist[bot] · ekagra-ranjan · commit 22fd4a6c3f72 · 2025-09-05T21:36:20.000Z
Co-authored-by: gemini-code-assist[bot] &lt;176961590+gemini-code-assist[bot]@users.noreply.github.com&gt;
Signed-off-by: Ekagra Ranjan &lt;3116519+ekagra-ranjan@users.noreply.github.com&gt;
diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py
@@ -2120,9 +2120,9 @@ def __post_init__(self):
             if self.num_speculative_tokens is None:
                 self.num_speculative_tokens = max_num_speculative_tokens
             else:
-                assert self.num_speculative_tokens < max_num_speculative_tokens, (
+                assert self.num_speculative_tokens <= max_num_speculative_tokens, (
                     "num_speculative_tokens should be None or must be less than or equal to the "
-                    "max value in num_speculative_tokens_per_method.") 
+                    "max value in num_speculative_tokens_per_method.")
 
         # Automatically configure the method for ngram when "model" is used
         # instead of "method"