File tree Expand file tree Collapse file tree 2 files changed +21
-0
lines changed Expand file tree Collapse file tree 2 files changed +21
-0
lines changed Original file line number Diff line number Diff line change 3939# we not explicitly patch here, some of them might be effectiveless
4040# in pytest scenario
4141from vllm_ascend .utils import adapt_patch # noqa E402
42+ from vllm_ascend .ascend_config import clear_ascend_config
4243
4344adapt_patch (True )
4445
@@ -348,6 +349,7 @@ def __enter__(self):
348349
349350 def __exit__ (self , exc_type , exc_value , traceback ):
350351 del self .model
352+ clear_ascend_config ()
351353 cleanup_dist_env_and_memory ()
352354
353355
Original file line number Diff line number Diff line change @@ -127,6 +127,25 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
127127 model_config = vllm_config .model_config
128128 parallel_config = vllm_config .parallel_config
129129 cache_config = vllm_config .cache_config
130+ scheduler_config = vllm_config .scheduler_config
131+ ascend_scheduler_config = ascend_config .ascend_scheduler_config
132+
133+ if not model_config .use_mla :
134+ logger .info (
135+ "Non-MLA LLMs forcibly disable the chunked prefill feature,"
136+ "as the performance of operators supporting this feature "
137+ "functionality is currently suboptimal." )
138+ scheduler_config .enable_chunked_prefill = False
139+ scheduler_config .chunked_prefill_enabled = False
140+ if envs .VLLM_USE_V1 and \
141+ not model_config .is_multimodal_model and \
142+ not scheduler_config .delay_factor > 0 and \
143+ not scheduler_config .send_delta_data and \
144+ scheduler_config .policy == "fcfs" and \
145+ scheduler_config .num_scheduler_steps == 1 :
146+ ascend_scheduler_config .enabled = True
147+ if hasattr (ascend_scheduler_config , "enable_chunked_prefill" ):
148+ ascend_scheduler_config .enable_chunked_prefill = False
130149
131150 if parallel_config :
132151 if parallel_config .enable_expert_parallel :
You can’t perform that action at this time.
0 commit comments