Skip to content

Commit e34ddcb

Browse files
committed
[main][bugfix] disable the chunked prefill feature in Non-MLA models
Signed-off-by: rjg-lyh <1318825571@qq.com>
1 parent 40c2c05 commit e34ddcb

File tree

2 files changed

+21
-0
lines changed

2 files changed

+21
-0
lines changed

tests/conftest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
# we not explicitly patch here, some of them might be effectiveless
4040
# in pytest scenario
4141
from vllm_ascend.utils import adapt_patch # noqa E402
42+
from vllm_ascend.ascend_config import clear_ascend_config
4243

4344
adapt_patch(True)
4445

@@ -348,6 +349,7 @@ def __enter__(self):
348349

349350
def __exit__(self, exc_type, exc_value, traceback):
350351
del self.model
352+
clear_ascend_config()
351353
cleanup_dist_env_and_memory()
352354

353355

vllm_ascend/platform.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,25 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
127127
model_config = vllm_config.model_config
128128
parallel_config = vllm_config.parallel_config
129129
cache_config = vllm_config.cache_config
130+
scheduler_config = vllm_config.scheduler_config
131+
ascend_scheduler_config = ascend_config.ascend_scheduler_config
132+
133+
if not model_config.use_mla:
134+
logger.info(
135+
"Non-MLA LLMs forcibly disable the chunked prefill feature,"
136+
"as the performance of operators supporting this feature "
137+
"functionality is currently suboptimal.")
138+
scheduler_config.enable_chunked_prefill = False
139+
scheduler_config.chunked_prefill_enabled = False
140+
if envs.VLLM_USE_V1 and \
141+
not model_config.is_multimodal_model and \
142+
not scheduler_config.delay_factor > 0 and \
143+
not scheduler_config.send_delta_data and \
144+
scheduler_config.policy == "fcfs" and \
145+
scheduler_config.num_scheduler_steps == 1:
146+
ascend_scheduler_config.enabled = True
147+
if hasattr(ascend_scheduler_config, "enable_chunked_prefill"):
148+
ascend_scheduler_config.enable_chunked_prefill = False
130149

131150
if parallel_config:
132151
if parallel_config.enable_expert_parallel:

0 commit comments

Comments
 (0)