Skip to content

Commit 692435a

Browse files
author
hw_whx
committed
feat: add ascend scheduler config to control ascend scheduler
Signed-off-by: hw_whx <wanghexiang7@huawei.com>
1 parent 03fbc3c commit 692435a

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

vllm_ascend/platform.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
148148
# Activate custom ops for v1.
149149
vllm_config.compilation_config.custom_ops = ["all"]
150150
additional_config = vllm_config.additional_config
151-
if additional_config and additional_config.get("use_v0_style_scheduler", False):
151+
if additional_config and additional_config.get("ascend_scheduler_config", None) is not None:
152152
from vllm.v1.engine.core import EngineCore
153153
from vllm_ascend.core.v1_engine_core_init import engine_core_init_with_v0style_scheduler
154154
EngineCore.__init__ = engine_core_init_with_v0style_scheduler

vllm_ascend/worker/model_runner_v1.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,8 +256,10 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
256256
# leading to performance degradation.
257257
# Therefore, an environment variable is added here to dynamically set
258258
# the size of the pre-constructed mask matrix based on requirements.
259+
mask_len = os.getenv("PAGED_ATTENTION_MASK_LEN", 10000)
260+
self.attn_mask_len = min(self.max_model_len, int(mask_len))
259261
additional_config = vllm_config.additional_config
260-
if additional_config and additional_config.get("use_v0_style_scheduler", False):
262+
if additional_config and additional_config.get("ascend_scheduler_config", None) is not None:
261263
mask_value = None
262264
else:
263265
mask_value = -10000

0 commit comments

Comments
 (0)