@@ -396,17 +396,9 @@ def __post_init__(self):
396396 "try setting 'VLLM_WORKER_MULTIPROC_METHOD' "
397397 "to 'spawn'." )
398398
399- # Disable prefix caching only if chunked prefill is explicitly disabled
400- # (and not merely unset)
401- if (self .scheduler_config .chunked_prefill_enabled is False
402- or disable_chunked_prefill_reasons ):
403- for reason in disable_chunked_prefill_reasons :
404- logger .info (reason )
405- self .scheduler_config .chunked_prefill_enabled = False
406- self .scheduler_config .long_prefill_token_threshold = 0
407-
408- if self .cache_config is not None :
409- self .cache_config .enable_prefix_caching = False
399+ # Finalize CP/APC based on flags, HF config (if present),
400+ # and scheduler signals.
401+ self ._finalize_cp_apc (disable_chunked_prefill_reasons )
410402
411403 if (self .kv_events_config is not None
412404 and self .kv_events_config .enable_kv_cache_events
@@ -651,7 +643,7 @@ def try_verify_and_update_config(self):
651643 f"Model: { self .model_config .model } " )
652644
653645 def compile_debug_dump_path (self ) -> Optional [Path ]:
654- """Returns a rank-aware path for dumping
646+ """Returns a rank-aware path for dumping
655647 torch.compile debug information.
656648 """
657649 if self .compilation_config .debug_dump_path is None :
@@ -664,6 +656,83 @@ def compile_debug_dump_path(self) -> Optional[Path]:
664656 path = self .compilation_config .debug_dump_path / append_path
665657 return path
666658
659+ def _finalize_cp_apc (self ,
660+ disable_chunked_prefill_reasons : list [str ]) -> None :
661+ """
662+ Single source of truth for CP/APC finalization.
663+
664+ Semantics:
665+ - INCOMPATIBLE (encoder–decoder or any collected blockers)
666+ => disable CP & APC, set threshold=0.
667+ - APC requested on eligible setup
668+ => CP must be ON (APC ⇒ CP).
669+ - CP knob unset (None) on eligible setup
670+ => default CP=ON.
671+ - Explicit CP=False and no APC request
672+ => disable CP & APC, set threshold=0.
673+
674+ Notes:
675+ * Encoder–decoder is detected via HF if present, otherwise via
676+ scheduler InitVar signals.
677+ * Robust when HF config is not yet attached (pooling fast paths).
678+ """
679+ # --- Detect encoder–decoder ---
680+ hf_cfg = getattr (getattr (self , "model_config" , None ), "hf_config" ,
681+ None )
682+ hf_is_encdec = (getattr (hf_cfg , "is_encoder_decoder" , None ) is True )
683+
684+ # Scheduler InitVar path (set in SchedulerConfig.__post_init__)
685+ sched_looks_encdec = (
686+ getattr (self .scheduler_config , "disable_chunked_mm_input" , False )
687+ and self .scheduler_config .enable_chunked_prefill is False
688+ and self .scheduler_config .chunked_prefill_enabled is False
689+ and getattr (self .scheduler_config , "long_prefill_token_threshold" ,
690+ None ) == 0 )
691+
692+ incompatible = (hf_is_encdec or sched_looks_encdec
693+ or bool (disable_chunked_prefill_reasons ))
694+
695+ explicit_cp_off = (self .scheduler_config .enable_chunked_prefill
696+ is False )
697+ apc_requested = (getattr (self , "cache_config" , None ) is not None
698+ and self .cache_config .enable_prefix_caching )
699+
700+ # --- (1) Authoritative disable: incompatible setups always disable ---
701+ if incompatible :
702+ for reason in disable_chunked_prefill_reasons :
703+ logger .info (reason )
704+ # Keep CP/APC off and set ED-safe threshold
705+ self .scheduler_config .enable_chunked_prefill = False
706+ self .scheduler_config .chunked_prefill_enabled = False
707+ self .scheduler_config .long_prefill_token_threshold = 0
708+ if self .cache_config is not None :
709+ self .cache_config .enable_prefix_caching = False
710+ return
711+
712+ # --- (2) APC ⇒ CP on eligible setups (override explicit CP=False) ---
713+ if (apc_requested
714+ and self .scheduler_config .enable_chunked_prefill is not True ):
715+ self .scheduler_config .enable_chunked_prefill = True
716+ self .scheduler_config .chunked_prefill_enabled = True
717+ return
718+
719+ # --- (3) Default CP=ON when knob is unset (eligible path) ---
720+ if self .scheduler_config .enable_chunked_prefill is None :
721+ self .scheduler_config .enable_chunked_prefill = True
722+ self .scheduler_config .chunked_prefill_enabled = True
723+ return
724+
725+ # --- (4) Respect explicit CP=False when APC is NOT requested ---
726+ if explicit_cp_off and not apc_requested :
727+ for reason in disable_chunked_prefill_reasons :
728+ logger .info (reason )
729+ self .scheduler_config .enable_chunked_prefill = False
730+ self .scheduler_config .chunked_prefill_enabled = False
731+ self .scheduler_config .long_prefill_token_threshold = 0
732+ if self .cache_config is not None :
733+ self .cache_config .enable_prefix_caching = False
734+ return
735+
667736 def __str__ (self ):
668737 return (
669738 f"model={ self .model_config .model !r} , "
0 commit comments