Skip to content

Commit 1d6e568

Browse files
committed
[main][bugfix] disable the chunked prefill feature in Non-MLA models
Signed-off-by: rjg-lyh <1318825571@qq.com>
1 parent 40c2c05 commit 1d6e568

File tree

1 file changed

+13
-0
lines changed

1 file changed

+13
-0
lines changed

vllm_ascend/platform.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,19 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
127127
model_config = vllm_config.model_config
128128
parallel_config = vllm_config.parallel_config
129129
cache_config = vllm_config.cache_config
130+
scheduler_config = vllm_config.scheduler_config
131+
ascend_scheduler_config = ascend_config.ascend_scheduler_config
132+
133+
if not model_config.use_mla:
134+
logger.info(
135+
"Non-MLA models forcibly disable the chunked prefill feature,"
136+
"as the performance of operators supporting this feature "
137+
"functionality is currently suboptimal.")
138+
scheduler_config.enable_chunked_prefill = False
139+
scheduler_config.chunked_prefill_enabled = False
140+
ascend_scheduler_config.enabled = True
141+
if hasattr(ascend_scheduler_config, "enable_chunked_prefill"):
142+
ascend_scheduler_config.enable_chunked_prefill = False
130143

131144
if parallel_config:
132145
if parallel_config.enable_expert_parallel:

0 commit comments

Comments
 (0)