fix_test_auto_prefix_cache_support

hl475 · hl475 · commit 426d0e04254c · 2025-10-02T11:08:34.000-07:00
Signed-off-by: Huamin Li &lt;3ericli@gmail.com&gt;
diff --git a/tests/v1/core/test_scheduler.py b/tests/v1/core/test_scheduler.py
@@ -1917,7 +1917,7 @@ def test_priority_scheduling_preemption_when_out_of_kv():
 def test_chunked_prefill_disabled_for_encoder_decoder(
         enable_chunked_prefill: bool, is_encoder_decoder: bool,
         expect_enabled: bool) -> None:
-    """Validate that chunked prefill is appropriately disabled for 
+    """Validate that chunked prefill is appropriately disabled for
     encoder-decoder models."""
     scheduler_config = SchedulerConfig(
         enable_chunked_prefill=enable_chunked_prefill,
@@ -1942,7 +1942,7 @@ def test_chunked_prefill_disabled_for_encoder_decoder(
 def _validate_chunked_prefill_settings_for_encoder_decoder(
         scheduler_config: SchedulerConfig, is_encoder_decoder: bool,
         expect_enabled: bool) -> None:
-    """Validate chunked prefill settings in the scheduler config for 
+    """Validate chunked prefill settings in the scheduler config for
     encoder-decoder models."""
     assert scheduler_config.chunked_prefill_enabled is expect_enabled
     assert scheduler_config.enable_chunked_prefill is expect_enabled
@@ -1952,3 +1952,48 @@ def _validate_chunked_prefill_settings_for_encoder_decoder(
         assert scheduler_config.disable_chunked_mm_input is not expect_enabled
     if is_encoder_decoder and not expect_enabled:
         assert scheduler_config.long_prefill_token_threshold == 0
+
+
+@pytest.mark.parametrize(
+    ("enable_chunked_prefill", "apc", "is_encoder_decoder", "expect_cp",
+     "expect_apc"),
+    [
+        # (1) Default ON when unset and decoder-only (eligible)
+        (None, False, False, True, False),
+        # (2) APC implies CP for eligible decoder-only
+        (None, True, False, True, True),
+        (False, True, False, True, True),
+        # (3) Explicit CP=False and no APC → both off
+        (False, False, False, False, False),
+        # (4) Encoder-decoder stays off regardless of APC
+        (None, True, True, False, False),
+        (False, True, True, False, False),
+    ],
+)
+def test_chunked_prefill_resolution_and_apc_coupling(enable_chunked_prefill,
+                                                     apc, is_encoder_decoder,
+                                                     expect_cp,
+                                                     expect_apc) -> None:
+    """
+    Validate scheduler defaulting and APC↔CP coupling semantics:
+      - Unset CP defaults to ON for eligible decoder-only models.
+      - APC implies CP when eligible (choose a coherent config).
+      - Explicit CP=False without APC keeps both off.
+      - Encoder-decoder keeps CP/APC off regardless of APC.
+    """
+    # Pre-finalization: construct raw configs.
+    sched_cfg = SchedulerConfig(
+        enable_chunked_prefill=enable_chunked_prefill,
+        is_encoder_decoder=is_encoder_decoder,
+    )
+    cache_cfg = CacheConfig(enable_prefix_caching=apc)
+
+    # Post-finalization: pass through VllmConfig.
+    vcfg = VllmConfig(scheduler_config=sched_cfg, cache_config=cache_cfg)
+
+    # Verify resolved (post-init) semantics.
+    sc = vcfg.scheduler_config
+    cc = vcfg.cache_config
+    assert sc.chunked_prefill_enabled is expect_cp
+    assert sc.enable_chunked_prefill is expect_cp
+    assert cc.enable_prefix_caching is expect_apc
diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py
@@ -396,17 +396,9 @@ def __post_init__(self):
                         "try setting 'VLLM_WORKER_MULTIPROC_METHOD' "
                         "to 'spawn'.")
 
-        # Disable prefix caching only if chunked prefill is explicitly disabled
-        # (and not merely unset)
-        if (self.scheduler_config.chunked_prefill_enabled is False
-                or disable_chunked_prefill_reasons):
-            for reason in disable_chunked_prefill_reasons:
-                logger.info(reason)
-            self.scheduler_config.chunked_prefill_enabled = False
-            self.scheduler_config.long_prefill_token_threshold = 0
-
-            if self.cache_config is not None:
-                self.cache_config.enable_prefix_caching = False
+        # Finalize CP/APC based on flags, HF config (if present),
+        # and scheduler signals.
+        self._finalize_cp_apc(disable_chunked_prefill_reasons)
 
         if (self.kv_events_config is not None
                 and self.kv_events_config.enable_kv_cache_events
@@ -651,7 +643,7 @@ def try_verify_and_update_config(self):
                                  f"Model: {self.model_config.model}")
 
     def compile_debug_dump_path(self) -> Optional[Path]:
-        """Returns a rank-aware path for dumping 
+        """Returns a rank-aware path for dumping
         torch.compile debug information.
         """
         if self.compilation_config.debug_dump_path is None:
@@ -664,6 +656,83 @@ def compile_debug_dump_path(self) -> Optional[Path]:
         path = self.compilation_config.debug_dump_path / append_path
         return path
 
+    def _finalize_cp_apc(self,
+                         disable_chunked_prefill_reasons: list[str]) -> None:
+        """
+        Single source of truth for CP/APC finalization.
+
+        Semantics:
+        - INCOMPATIBLE (encoder–decoder or any collected blockers)
+          => disable CP & APC, set threshold=0.
+        - APC requested on eligible setup
+          => CP must be ON (APC ⇒ CP).
+        - CP knob unset (None) on eligible setup
+          => default CP=ON.
+        - Explicit CP=False and no APC request
+          => disable CP & APC, set threshold=0.
+
+        Notes:
+        * Encoder–decoder is detected via HF if present, otherwise via
+          scheduler InitVar signals.
+        * Robust when HF config is not yet attached (pooling fast paths).
+        """
+        # --- Detect encoder–decoder ---
+        hf_cfg = getattr(getattr(self, "model_config", None), "hf_config",
+                         None)
+        hf_is_encdec = (getattr(hf_cfg, "is_encoder_decoder", None) is True)
+
+        # Scheduler InitVar path (set in SchedulerConfig.__post_init__)
+        sched_looks_encdec = (
+            getattr(self.scheduler_config, "disable_chunked_mm_input", False)
+            and self.scheduler_config.enable_chunked_prefill is False
+            and self.scheduler_config.chunked_prefill_enabled is False
+            and getattr(self.scheduler_config, "long_prefill_token_threshold",
+                        None) == 0)
+
+        incompatible = (hf_is_encdec or sched_looks_encdec
+                        or bool(disable_chunked_prefill_reasons))
+
+        explicit_cp_off = (self.scheduler_config.enable_chunked_prefill
+                           is False)
+        apc_requested = (getattr(self, "cache_config", None) is not None
+                         and self.cache_config.enable_prefix_caching)
+
+        # --- (1) Authoritative disable: incompatible setups always disable ---
+        if incompatible:
+            for reason in disable_chunked_prefill_reasons:
+                logger.info(reason)
+            # Keep CP/APC off and set ED-safe threshold
+            self.scheduler_config.enable_chunked_prefill = False
+            self.scheduler_config.chunked_prefill_enabled = False
+            self.scheduler_config.long_prefill_token_threshold = 0
+            if self.cache_config is not None:
+                self.cache_config.enable_prefix_caching = False
+            return
+
+        # --- (2) APC ⇒ CP on eligible setups (override explicit CP=False) ---
+        if (apc_requested
+                and self.scheduler_config.enable_chunked_prefill is not True):
+            self.scheduler_config.enable_chunked_prefill = True
+            self.scheduler_config.chunked_prefill_enabled = True
+            return
+
+        # --- (3) Default CP=ON when knob is unset (eligible path) ---
+        if self.scheduler_config.enable_chunked_prefill is None:
+            self.scheduler_config.enable_chunked_prefill = True
+            self.scheduler_config.chunked_prefill_enabled = True
+            return
+
+        # --- (4) Respect explicit CP=False when APC is NOT requested ---
+        if explicit_cp_off and not apc_requested:
+            for reason in disable_chunked_prefill_reasons:
+                logger.info(reason)
+            self.scheduler_config.enable_chunked_prefill = False
+            self.scheduler_config.chunked_prefill_enabled = False
+            self.scheduler_config.long_prefill_token_threshold = 0
+            if self.cache_config is not None:
+                self.cache_config.enable_prefix_caching = False
+            return
+
     def __str__(self):
         return (
             f"model={self.model_config.model!r}, "