Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,6 @@ steps:
- pytest -v -s v1/metrics
- pytest -v -s v1/test_serial_utils.py
- pytest -v -s v1/test_utils.py
- pytest -v -s v1/test_oracle.py
- pytest -v -s v1/test_metrics_reader.py
# Integration test for streaming correctness (requires special branch).
- pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api
Expand Down
79 changes: 0 additions & 79 deletions tests/v1/test_oracle.py

This file was deleted.

104 changes: 1 addition & 103 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1131,15 +1131,7 @@ def create_engine_config(
)
model_config = self.create_model_config()

# * If VLLM_USE_V1 is unset, we enable V1 for "supported features"
# and fall back to V0 for experimental or unsupported features.
# * If VLLM_USE_V1=1, we enable V1 for supported + experimental
# features and raise error for unsupported features.
# * If VLLM_USE_V1=0, we disable V1.
use_v1 = False
try_v1 = envs.VLLM_USE_V1 or not envs.is_set("VLLM_USE_V1")
if try_v1 and self._is_v1_supported_oracle(model_config):
use_v1 = True
use_v1 = envs.VLLM_USE_V1 or not envs.is_set("VLLM_USE_V1")

# If user explicitly set VLLM_USE_V1, sanity check we respect it.
if envs.is_set("VLLM_USE_V1"):
Expand Down Expand Up @@ -1437,100 +1429,6 @@ def create_engine_config(

return config

def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
"""Oracle for whether to use V0 or V1 Engine by default."""

#############################################################
# Unsupported Feature Flags on V1.

if (self.logits_processor_pattern
!= EngineArgs.logits_processor_pattern):
_raise_or_fallback(feature_name="--logits-processor-pattern",
recommend_to_remove=False)
return False

# No Mamba or Encoder-Decoder so far.
if not model_config.is_v1_compatible:
_raise_or_fallback(feature_name=model_config.architectures,
recommend_to_remove=False)
return False

# No Concurrent Partial Prefills so far.
if (self.max_num_partial_prefills
!= SchedulerConfig.max_num_partial_prefills
or self.max_long_partial_prefills
!= SchedulerConfig.max_long_partial_prefills):
_raise_or_fallback(feature_name="Concurrent Partial Prefill",
recommend_to_remove=False)
return False

# V1 supports N-gram, Medusa, and Eagle speculative decoding.
if self.speculative_config is not None:
# speculative_config could still be a dict at this point
if isinstance(self.speculative_config, dict):
method = self.speculative_config.get("method", None)
else:
method = self.speculative_config.method

if method == "draft_model":
raise NotImplementedError(
"Draft model speculative decoding is not supported yet. "
"Please consider using other speculative decoding methods "
"such as ngram, medusa, eagle, or deepseek_mtp.")
Comment on lines -1467 to -1479
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm, I think _is_v1_supported_oracle is still useful to exclude some unsupported arguments here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we could make an _is_config_supported_oracle, which doesn't dictate VLLM_USE_V1, it just throws errors if unsupported.

Alternatively, it might be better if this check happens in the speculative initialization rather than at an engine level?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we could make an _is_config_supported_oracle, which doesn't dictate VLLM_USE_V1, it just throws errors if unsupported.

Agree.

Alternatively, it might be better if this check happens in the speculative initialization rather than at an engine level?

In fact, these arguments are ever supported in v0 but haven't been implemented (some of them are deprecated) in v1. So I prefer to keep them here for clarity.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1, it's good raise NotImplementedError error instead of removing the check totally.


V1_BACKENDS = [
"FLASH_ATTN_VLLM_V1",
"FLASH_ATTN",
"PALLAS",
"PALLAS_VLLM_V1",
"TRITON_ATTN_VLLM_V1",
"TRITON_MLA",
"CUTLASS_MLA",
"FLASHMLA",
"FLASHMLA_VLLM_V1",
"FLASH_ATTN_MLA",
"FLASHINFER",
"FLASHINFER_VLLM_V1",
"FLASHINFER_MLA",
"ROCM_AITER_MLA",
"TORCH_SDPA_VLLM_V1",
"FLEX_ATTENTION",
"TREE_ATTN",
"XFORMERS_VLLM_V1",
"ROCM_ATTN_VLLM_V1",
]
if (envs.is_set("VLLM_ATTENTION_BACKEND")
and envs.VLLM_ATTENTION_BACKEND not in V1_BACKENDS):
name = f"VLLM_ATTENTION_BACKEND={envs.VLLM_ATTENTION_BACKEND}"
_raise_or_fallback(feature_name=name, recommend_to_remove=True)
return False

#############################################################
# Experimental Features - allow users to opt in.

if self.pipeline_parallel_size > 1:
supports_pp = getattr(self.distributed_executor_backend,
'supports_pp', False)
if not supports_pp and self.distributed_executor_backend not in (
ParallelConfig.distributed_executor_backend, "ray", "mp",
"external_launcher"):
name = "Pipeline Parallelism without Ray distributed " \
"executor or multiprocessing executor or external " \
"launcher"
_raise_or_fallback(feature_name=name,
recommend_to_remove=False)
return False

if (current_platform.is_cpu()
and model_config.get_sliding_window() is not None):
_raise_or_fallback(feature_name="sliding window (CPU backend)",
recommend_to_remove=False)
return False

#############################################################

return True

def _set_default_args(self, usage_context: UsageContext,
model_config: ModelConfig) -> None:
"""Set Default Arguments for V1 Engine."""
Expand Down
Loading