diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 5f076f05d046..a20ae76f3a23 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1196,6 +1196,9 @@ def create_engine_config(self, msg = "Chunked prefill is not supported for pooling models" raise ValueError(msg) + if model_config.use_mla: + logger.info("MLA is enabled; forcing chunked prefill disabled.") + self.enable_chunked_prefill = False speculative_config = SpeculativeConfig.maybe_create_spec_config( target_model_config=model_config,