Skip to content

Commit 426d0e0

Browse files
committed
fix_test_auto_prefix_cache_support
Signed-off-by: Huamin Li <3ericli@gmail.com>
1 parent 1e50f1b commit 426d0e0

File tree

2 files changed

+128
-14
lines changed

2 files changed

+128
-14
lines changed

tests/v1/core/test_scheduler.py

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1917,7 +1917,7 @@ def test_priority_scheduling_preemption_when_out_of_kv():
19171917
def test_chunked_prefill_disabled_for_encoder_decoder(
19181918
enable_chunked_prefill: bool, is_encoder_decoder: bool,
19191919
expect_enabled: bool) -> None:
1920-
"""Validate that chunked prefill is appropriately disabled for
1920+
"""Validate that chunked prefill is appropriately disabled for
19211921
encoder-decoder models."""
19221922
scheduler_config = SchedulerConfig(
19231923
enable_chunked_prefill=enable_chunked_prefill,
@@ -1942,7 +1942,7 @@ def test_chunked_prefill_disabled_for_encoder_decoder(
19421942
def _validate_chunked_prefill_settings_for_encoder_decoder(
19431943
scheduler_config: SchedulerConfig, is_encoder_decoder: bool,
19441944
expect_enabled: bool) -> None:
1945-
"""Validate chunked prefill settings in the scheduler config for
1945+
"""Validate chunked prefill settings in the scheduler config for
19461946
encoder-decoder models."""
19471947
assert scheduler_config.chunked_prefill_enabled is expect_enabled
19481948
assert scheduler_config.enable_chunked_prefill is expect_enabled
@@ -1952,3 +1952,48 @@ def _validate_chunked_prefill_settings_for_encoder_decoder(
19521952
assert scheduler_config.disable_chunked_mm_input is not expect_enabled
19531953
if is_encoder_decoder and not expect_enabled:
19541954
assert scheduler_config.long_prefill_token_threshold == 0
1955+
1956+
1957+
@pytest.mark.parametrize(
1958+
("enable_chunked_prefill", "apc", "is_encoder_decoder", "expect_cp",
1959+
"expect_apc"),
1960+
[
1961+
# (1) Default ON when unset and decoder-only (eligible)
1962+
(None, False, False, True, False),
1963+
# (2) APC implies CP for eligible decoder-only
1964+
(None, True, False, True, True),
1965+
(False, True, False, True, True),
1966+
# (3) Explicit CP=False and no APC → both off
1967+
(False, False, False, False, False),
1968+
# (4) Encoder-decoder stays off regardless of APC
1969+
(None, True, True, False, False),
1970+
(False, True, True, False, False),
1971+
],
1972+
)
1973+
def test_chunked_prefill_resolution_and_apc_coupling(enable_chunked_prefill,
1974+
apc, is_encoder_decoder,
1975+
expect_cp,
1976+
expect_apc) -> None:
1977+
"""
1978+
Validate scheduler defaulting and APC↔CP coupling semantics:
1979+
- Unset CP defaults to ON for eligible decoder-only models.
1980+
- APC implies CP when eligible (choose a coherent config).
1981+
- Explicit CP=False without APC keeps both off.
1982+
- Encoder-decoder keeps CP/APC off regardless of APC.
1983+
"""
1984+
# Pre-finalization: construct raw configs.
1985+
sched_cfg = SchedulerConfig(
1986+
enable_chunked_prefill=enable_chunked_prefill,
1987+
is_encoder_decoder=is_encoder_decoder,
1988+
)
1989+
cache_cfg = CacheConfig(enable_prefix_caching=apc)
1990+
1991+
# Post-finalization: pass through VllmConfig.
1992+
vcfg = VllmConfig(scheduler_config=sched_cfg, cache_config=cache_cfg)
1993+
1994+
# Verify resolved (post-init) semantics.
1995+
sc = vcfg.scheduler_config
1996+
cc = vcfg.cache_config
1997+
assert sc.chunked_prefill_enabled is expect_cp
1998+
assert sc.enable_chunked_prefill is expect_cp
1999+
assert cc.enable_prefix_caching is expect_apc

vllm/config/vllm.py

Lines changed: 81 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -396,17 +396,9 @@ def __post_init__(self):
396396
"try setting 'VLLM_WORKER_MULTIPROC_METHOD' "
397397
"to 'spawn'.")
398398

399-
# Disable prefix caching only if chunked prefill is explicitly disabled
400-
# (and not merely unset)
401-
if (self.scheduler_config.chunked_prefill_enabled is False
402-
or disable_chunked_prefill_reasons):
403-
for reason in disable_chunked_prefill_reasons:
404-
logger.info(reason)
405-
self.scheduler_config.chunked_prefill_enabled = False
406-
self.scheduler_config.long_prefill_token_threshold = 0
407-
408-
if self.cache_config is not None:
409-
self.cache_config.enable_prefix_caching = False
399+
# Finalize CP/APC based on flags, HF config (if present),
400+
# and scheduler signals.
401+
self._finalize_cp_apc(disable_chunked_prefill_reasons)
410402

411403
if (self.kv_events_config is not None
412404
and self.kv_events_config.enable_kv_cache_events
@@ -651,7 +643,7 @@ def try_verify_and_update_config(self):
651643
f"Model: {self.model_config.model}")
652644

653645
def compile_debug_dump_path(self) -> Optional[Path]:
654-
"""Returns a rank-aware path for dumping
646+
"""Returns a rank-aware path for dumping
655647
torch.compile debug information.
656648
"""
657649
if self.compilation_config.debug_dump_path is None:
@@ -664,6 +656,83 @@ def compile_debug_dump_path(self) -> Optional[Path]:
664656
path = self.compilation_config.debug_dump_path / append_path
665657
return path
666658

659+
def _finalize_cp_apc(self,
660+
disable_chunked_prefill_reasons: list[str]) -> None:
661+
"""
662+
Single source of truth for CP/APC finalization.
663+
664+
Semantics:
665+
- INCOMPATIBLE (encoder–decoder or any collected blockers)
666+
=> disable CP & APC, set threshold=0.
667+
- APC requested on eligible setup
668+
=> CP must be ON (APC ⇒ CP).
669+
- CP knob unset (None) on eligible setup
670+
=> default CP=ON.
671+
- Explicit CP=False and no APC request
672+
=> disable CP & APC, set threshold=0.
673+
674+
Notes:
675+
* Encoder–decoder is detected via HF if present, otherwise via
676+
scheduler InitVar signals.
677+
* Robust when HF config is not yet attached (pooling fast paths).
678+
"""
679+
# --- Detect encoder–decoder ---
680+
hf_cfg = getattr(getattr(self, "model_config", None), "hf_config",
681+
None)
682+
hf_is_encdec = (getattr(hf_cfg, "is_encoder_decoder", None) is True)
683+
684+
# Scheduler InitVar path (set in SchedulerConfig.__post_init__)
685+
sched_looks_encdec = (
686+
getattr(self.scheduler_config, "disable_chunked_mm_input", False)
687+
and self.scheduler_config.enable_chunked_prefill is False
688+
and self.scheduler_config.chunked_prefill_enabled is False
689+
and getattr(self.scheduler_config, "long_prefill_token_threshold",
690+
None) == 0)
691+
692+
incompatible = (hf_is_encdec or sched_looks_encdec
693+
or bool(disable_chunked_prefill_reasons))
694+
695+
explicit_cp_off = (self.scheduler_config.enable_chunked_prefill
696+
is False)
697+
apc_requested = (getattr(self, "cache_config", None) is not None
698+
and self.cache_config.enable_prefix_caching)
699+
700+
# --- (1) Authoritative disable: incompatible setups always disable ---
701+
if incompatible:
702+
for reason in disable_chunked_prefill_reasons:
703+
logger.info(reason)
704+
# Keep CP/APC off and set ED-safe threshold
705+
self.scheduler_config.enable_chunked_prefill = False
706+
self.scheduler_config.chunked_prefill_enabled = False
707+
self.scheduler_config.long_prefill_token_threshold = 0
708+
if self.cache_config is not None:
709+
self.cache_config.enable_prefix_caching = False
710+
return
711+
712+
# --- (2) APC ⇒ CP on eligible setups (override explicit CP=False) ---
713+
if (apc_requested
714+
and self.scheduler_config.enable_chunked_prefill is not True):
715+
self.scheduler_config.enable_chunked_prefill = True
716+
self.scheduler_config.chunked_prefill_enabled = True
717+
return
718+
719+
# --- (3) Default CP=ON when knob is unset (eligible path) ---
720+
if self.scheduler_config.enable_chunked_prefill is None:
721+
self.scheduler_config.enable_chunked_prefill = True
722+
self.scheduler_config.chunked_prefill_enabled = True
723+
return
724+
725+
# --- (4) Respect explicit CP=False when APC is NOT requested ---
726+
if explicit_cp_off and not apc_requested:
727+
for reason in disable_chunked_prefill_reasons:
728+
logger.info(reason)
729+
self.scheduler_config.enable_chunked_prefill = False
730+
self.scheduler_config.chunked_prefill_enabled = False
731+
self.scheduler_config.long_prefill_token_threshold = 0
732+
if self.cache_config is not None:
733+
self.cache_config.enable_prefix_caching = False
734+
return
735+
667736
def __str__(self):
668737
return (
669738
f"model={self.model_config.model!r}, "

0 commit comments

Comments
 (0)