File tree Expand file tree Collapse file tree 1 file changed +13
-0
lines changed Expand file tree Collapse file tree 1 file changed +13
-0
lines changed Original file line number Diff line number Diff line change @@ -127,6 +127,19 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
127127 model_config = vllm_config .model_config
128128 parallel_config = vllm_config .parallel_config
129129 cache_config = vllm_config .cache_config
130+ scheduler_config = vllm_config .scheduler_config
131+ ascend_scheduler_config = ascend_config .ascend_scheduler_config
132+
133+ if not model_config .use_mla :
134+ logger .info (
135+ "Non-MLA models forcibly disable the chunked prefill feature,"
136+ "as the performance of operators supporting this feature "
137+ "functionality is currently suboptimal." )
138+ scheduler_config .enable_chunked_prefill = False
139+ scheduler_config .chunked_prefill_enabled = False
140+ ascend_scheduler_config .enabled = True
141+ if hasattr (ascend_scheduler_config , "enable_chunked_prefill" ):
142+ ascend_scheduler_config .enable_chunked_prefill = False
130143
131144 if parallel_config :
132145 if parallel_config .enable_expert_parallel :
You can’t perform that action at this time.
0 commit comments