Skip to content

Commit 7d6b033

Browse files
authored
[CI Failure] fix_test_auto_prefix_cache_support (#26053)
Signed-off-by: Huamin Li <3ericli@gmail.com>
1 parent 7c2e91c commit 7d6b033

File tree

2 files changed

+14
-7
lines changed

2 files changed

+14
-7
lines changed

tests/v1/core/test_scheduler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1917,7 +1917,7 @@ def test_priority_scheduling_preemption_when_out_of_kv():
19171917
def test_chunked_prefill_disabled_for_encoder_decoder(
19181918
enable_chunked_prefill: bool, is_encoder_decoder: bool,
19191919
expect_enabled: bool) -> None:
1920-
"""Validate that chunked prefill is appropriately disabled for
1920+
"""Validate that chunked prefill is appropriately disabled for
19211921
encoder-decoder models."""
19221922
scheduler_config = SchedulerConfig(
19231923
enable_chunked_prefill=enable_chunked_prefill,
@@ -1942,7 +1942,7 @@ def test_chunked_prefill_disabled_for_encoder_decoder(
19421942
def _validate_chunked_prefill_settings_for_encoder_decoder(
19431943
scheduler_config: SchedulerConfig, is_encoder_decoder: bool,
19441944
expect_enabled: bool) -> None:
1945-
"""Validate chunked prefill settings in the scheduler config for
1945+
"""Validate chunked prefill settings in the scheduler config for
19461946
encoder-decoder models."""
19471947
assert scheduler_config.chunked_prefill_enabled is expect_enabled
19481948
assert scheduler_config.enable_chunked_prefill is expect_enabled

vllm/config/vllm.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -396,10 +396,17 @@ def __post_init__(self):
396396
"try setting 'VLLM_WORKER_MULTIPROC_METHOD' "
397397
"to 'spawn'.")
398398

399-
# Disable prefix caching only if chunked prefill is explicitly disabled
400-
# (and not merely unset)
401-
if (self.scheduler_config.chunked_prefill_enabled is False
402-
or disable_chunked_prefill_reasons):
399+
# Final off-switch for CP/APC:
400+
# Disable for (a) collected blockers, (b) encoder–decoder, or
401+
# (c) explicit CP=False when APC wasn't requested.
402+
# Do NOT disable merely because the resolved CP flag is False.
403+
apc_requested = (self.cache_config is not None
404+
and self.cache_config.enable_prefix_caching)
405+
if (disable_chunked_prefill_reasons
406+
or (self.model_config is not None
407+
and self.model_config.is_encoder_decoder)
408+
or (self.scheduler_config.enable_chunked_prefill is False
409+
and not apc_requested)):
403410
for reason in disable_chunked_prefill_reasons:
404411
logger.info(reason)
405412
self.scheduler_config.chunked_prefill_enabled = False
@@ -668,7 +675,7 @@ def try_verify_and_update_config(self):
668675
f"Model: {self.model_config.model}")
669676

670677
def compile_debug_dump_path(self) -> Optional[Path]:
671-
"""Returns a rank-aware path for dumping
678+
"""Returns a rank-aware path for dumping
672679
torch.compile debug information.
673680
"""
674681
if self.compilation_config.debug_dump_path is None:

0 commit comments

Comments
 (0)