diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index dc2bb3a52cac..f0c6b15b79da 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1325,7 +1325,7 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool: recommend_to_remove=False) return False - # Only Ngram speculative decoding so far. + # V1 supports N-gram, Medusa, and Eagle speculative decoding. is_ngram_enabled = False is_eagle_enabled = False is_medusa_enabled = False @@ -1390,14 +1390,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool: _raise_or_fallback(feature_name=name, recommend_to_remove=False) return False - # ngram is supported on V1, but off by default for now. - if is_ngram_enabled and _warn_or_fallback("ngram"): - return False - - # Eagle is under development, so we don't support it yet. - if is_eagle_enabled and _warn_or_fallback("Eagle"): - return False - # Non-[CUDA, TPU] may be supported on V1, but off by default for now. v0_hardware = not any( (current_platform.is_cuda(), current_platform.is_tpu()))