feat: add ascend scheduler config to control ascend scheduler

hw_whx · hw_whx · commit 692435aa380b · 2025-04-14T14:47:13.000+08:00
Signed-off-by: hw_whx &lt;wanghexiang7@huawei.com&gt;
diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py
@@ -148,7 +148,7 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
             # Activate custom ops for v1.
             vllm_config.compilation_config.custom_ops = ["all"]
             additional_config = vllm_config.additional_config
-            if additional_config and additional_config.get("use_v0_style_scheduler", False):
+            if additional_config and additional_config.get("ascend_scheduler_config", None) is not None:
                 from vllm.v1.engine.core import EngineCore
                 from vllm_ascend.core.v1_engine_core_init import engine_core_init_with_v0style_scheduler
                 EngineCore.__init__ = engine_core_init_with_v0style_scheduler
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
@@ -256,8 +256,10 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
         # leading to performance degradation.
         # Therefore, an environment variable is added here to dynamically set
         # the size of the pre-constructed mask matrix based on requirements.
+        mask_len = os.getenv("PAGED_ATTENTION_MASK_LEN", 10000)
+        self.attn_mask_len = min(self.max_model_len, int(mask_len))
         additional_config = vllm_config.additional_config
-        if additional_config and additional_config.get("use_v0_style_scheduler", False):
+        if additional_config and additional_config.get("ascend_scheduler_config", None) is not None:
             mask_value = None
         else:
             mask_value = -10000