From c332ed48ae143d0260dbf83f5ea906c8f17b95cc Mon Sep 17 00:00:00 2001
From: Matthew Bonanni <mbonanni@redhat.com>
Date: Thu, 25 Sep 2025 09:26:27 -0400
Subject: [PATCH 1/2] remove oracle

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
---
 .buildkite/test-pipeline.yaml |   1 -
 vllm/engine/arg_utils.py      | 104 +---------------------------------
 2 files changed, 1 insertion(+), 104 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 200ed344c4e8..74ca7b21ae06 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -302,7 +302,6 @@ steps:
     - pytest -v -s v1/metrics
     - pytest -v -s v1/test_serial_utils.py
     - pytest -v -s v1/test_utils.py
-    - pytest -v -s v1/test_oracle.py
     - pytest -v -s v1/test_metrics_reader.py
     # Integration test for streaming correctness (requires special branch).
     - pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 3d48d2a0b22d..592fc2c98af5 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1131,15 +1131,7 @@ def create_engine_config(
          )
         model_config = self.create_model_config()
 
-        # * If VLLM_USE_V1 is unset, we enable V1 for "supported features"
-        #   and fall back to V0 for experimental or unsupported features.
-        # * If VLLM_USE_V1=1, we enable V1 for supported + experimental
-        #   features and raise error for unsupported features.
-        # * If VLLM_USE_V1=0, we disable V1.
-        use_v1 = False
-        try_v1 = envs.VLLM_USE_V1 or not envs.is_set("VLLM_USE_V1")
-        if try_v1 and self._is_v1_supported_oracle(model_config):
-            use_v1 = True
+        use_v1 = envs.VLLM_USE_V1 or not envs.is_set("VLLM_USE_V1")
 
         # If user explicitly set VLLM_USE_V1, sanity check we respect it.
         if envs.is_set("VLLM_USE_V1"):
@@ -1437,100 +1429,6 @@ def create_engine_config(
 
         return config
 
-    def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
-        """Oracle for whether to use V0 or V1 Engine by default."""
-
-        #############################################################
-        # Unsupported Feature Flags on V1.
-
-        if (self.logits_processor_pattern
-                != EngineArgs.logits_processor_pattern):
-            _raise_or_fallback(feature_name="--logits-processor-pattern",
-                               recommend_to_remove=False)
-            return False
-
-        # No Mamba or Encoder-Decoder so far.
-        if not model_config.is_v1_compatible:
-            _raise_or_fallback(feature_name=model_config.architectures,
-                               recommend_to_remove=False)
-            return False
-
-        # No Concurrent Partial Prefills so far.
-        if (self.max_num_partial_prefills
-                != SchedulerConfig.max_num_partial_prefills
-                or self.max_long_partial_prefills
-                != SchedulerConfig.max_long_partial_prefills):
-            _raise_or_fallback(feature_name="Concurrent Partial Prefill",
-                               recommend_to_remove=False)
-            return False
-
-        # V1 supports N-gram, Medusa, and Eagle speculative decoding.
-        if self.speculative_config is not None:
-            # speculative_config could still be a dict at this point
-            if isinstance(self.speculative_config, dict):
-                method = self.speculative_config.get("method", None)
-            else:
-                method = self.speculative_config.method
-
-            if method == "draft_model":
-                raise NotImplementedError(
-                    "Draft model speculative decoding is not supported yet. "
-                    "Please consider using other speculative decoding methods "
-                    "such as ngram, medusa, eagle, or deepseek_mtp.")
-
-        V1_BACKENDS = [
-            "FLASH_ATTN_VLLM_V1",
-            "FLASH_ATTN",
-            "PALLAS",
-            "PALLAS_VLLM_V1",
-            "TRITON_ATTN_VLLM_V1",
-            "TRITON_MLA",
-            "CUTLASS_MLA",
-            "FLASHMLA",
-            "FLASHMLA_VLLM_V1",
-            "FLASH_ATTN_MLA",
-            "FLASHINFER",
-            "FLASHINFER_VLLM_V1",
-            "FLASHINFER_MLA",
-            "ROCM_AITER_MLA",
-            "TORCH_SDPA_VLLM_V1",
-            "FLEX_ATTENTION",
-            "TREE_ATTN",
-            "XFORMERS_VLLM_V1",
-            "ROCM_ATTN_VLLM_V1",
-        ]
-        if (envs.is_set("VLLM_ATTENTION_BACKEND")
-                and envs.VLLM_ATTENTION_BACKEND not in V1_BACKENDS):
-            name = f"VLLM_ATTENTION_BACKEND={envs.VLLM_ATTENTION_BACKEND}"
-            _raise_or_fallback(feature_name=name, recommend_to_remove=True)
-            return False
-
-        #############################################################
-        # Experimental Features - allow users to opt in.
-
-        if self.pipeline_parallel_size > 1:
-            supports_pp = getattr(self.distributed_executor_backend,
-                                  'supports_pp', False)
-            if not supports_pp and self.distributed_executor_backend not in (
-                    ParallelConfig.distributed_executor_backend, "ray", "mp",
-                    "external_launcher"):
-                name = "Pipeline Parallelism without Ray distributed " \
-                        "executor or multiprocessing executor or external " \
-                        "launcher"
-                _raise_or_fallback(feature_name=name,
-                                   recommend_to_remove=False)
-                return False
-
-        if (current_platform.is_cpu()
-                and model_config.get_sliding_window() is not None):
-            _raise_or_fallback(feature_name="sliding window (CPU backend)",
-                               recommend_to_remove=False)
-            return False
-
-        #############################################################
-
-        return True
-
     def _set_default_args(self, usage_context: UsageContext,
                           model_config: ModelConfig) -> None:
         """Set Default Arguments for V1 Engine."""

From a43907de06a6f9a4d7a8568b510b40ba26b2bc32 Mon Sep 17 00:00:00 2001
From: Matthew Bonanni <mbonanni@redhat.com>
Date: Thu, 25 Sep 2025 09:34:22 -0400
Subject: [PATCH 2/2] remove test_oracle.py

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
---
 tests/v1/test_oracle.py | 79 -----------------------------------------
 1 file changed, 79 deletions(-)
 delete mode 100644 tests/v1/test_oracle.py

diff --git a/tests/v1/test_oracle.py b/tests/v1/test_oracle.py
deleted file mode 100644
index f6b8a18dd7c2..000000000000
--- a/tests/v1/test_oracle.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import os
-
-import pytest
-
-import vllm.envs as envs
-from vllm import LLM
-from vllm.engine.arg_utils import AsyncEngineArgs
-
-MODEL = "meta-llama/Llama-3.2-1B-Instruct"
-
-
-def test_reject_bad_config(monkeypatch):
-    with monkeypatch.context() as m:
-        m.setenv("VLLM_USE_V1", "0")
-
-
-def test_unsupported_configs(monkeypatch):
-
-    with monkeypatch.context() as m:
-        m.setenv("VLLM_USE_V1", "1")
-
-        with pytest.raises(NotImplementedError):
-            AsyncEngineArgs(
-                model=MODEL,
-                speculative_config={
-                    "model": MODEL,
-                },
-            ).create_engine_config()
-
-
-def test_enable_by_default_fallback(monkeypatch):
-    with monkeypatch.context() as m:
-        if os.getenv("VLLM_USE_V1", None):
-            m.delenv("VLLM_USE_V1")
-
-        # Should default to V1 for supported config.
-        _ = AsyncEngineArgs(
-            model=MODEL,
-            enforce_eager=True,
-        ).create_engine_config()
-        assert envs.VLLM_USE_V1
-        m.delenv("VLLM_USE_V1")
-
-
-def test_v1_llm_by_default(monkeypatch):
-    with monkeypatch.context() as m:
-        if os.getenv("VLLM_USE_V1", None):
-            m.delenv("VLLM_USE_V1")
-
-        # Should default to V1 for supported config.
-        llm = LLM(MODEL, enforce_eager=True, enable_lora=True)
-        print(llm.generate("Hello my name is"))
-        assert hasattr(llm.llm_engine, "engine_core")
-        m.delenv("VLLM_USE_V1")
-
-
-def test_v1_attn_backend(monkeypatch):
-    with monkeypatch.context() as m:
-        if os.getenv("VLLM_USE_V1", None):
-            m.delenv("VLLM_USE_V1")
-        m.setenv("VLLM_ATTENTION_BACKEND", "XFORMERS")
-
-        # Fall back to V0.
-        _ = AsyncEngineArgs(model=MODEL).create_engine_config()
-        assert not envs.VLLM_USE_V1
-        m.delenv("VLLM_USE_V1")
-
-        # Reject if V1.
-        m.setenv("VLLM_USE_V1", "1")
-        with pytest.raises(NotImplementedError):
-            AsyncEngineArgs(model=MODEL).create_engine_config()
-        m.delenv("VLLM_USE_V1")
-
-        m.setenv("VLLM_ATTENTION_BACKEND", "FLASHMLA")
-        _ = AsyncEngineArgs(model=MODEL).create_engine_config()
-        assert envs.VLLM_USE_V1
-        m.delenv("VLLM_USE_V1")