Skip to content

Commit b9a4e95

Browse files
committed
[BugFix] Fix ascend config check
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
1 parent 908a851 commit b9a4e95

File tree

4 files changed

+80
-7
lines changed

4 files changed

+80
-7
lines changed

docs/source/user_guide/additional_config.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ The following table lists the additional configuration options available in vLLM
2929
| `torchair_graph_config` | dict | `{}` | The config options for torchair graph mode |
3030
| `ascend_scheduler_config` | dict | `{}` | The config options for ascend scheduler |
3131
| `expert_tensor_parallel_size` | str | `1` | Expert tensor parallel size the model to use. |
32+
| `refresh` | bool | `false` | Whether to refesh global ascend config content. This value is usually used by rlhf case. |
3233

3334
The details of each config option are as follows:
3435

@@ -59,12 +60,13 @@ A full example of additional configuration is as follows:
5960
"enabled": true,
6061
"use_cached_graph": true,
6162
"graph_batch_sizes": [1, 2, 4, 8],
62-
"graph_batch_sizes_init": true
63+
"graph_batch_sizes_init": false
6364
},
6465
"ascend_scheduler_config": {
6566
"enabled": true,
6667
"chunked_prefill_enabled": true,
6768
},
68-
"expert_tensor_parallel_size": 1
69+
"expert_tensor_parallel_size": 1,
70+
"refresh": false,
6971
}
7072
```

tests/singlecard/test_ascend_config.py

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import pytest
1717

1818
from tests.conftest import VllmRunner
19-
from vllm_ascend.ascend_config import clear_ascend_config, get_ascend_config
19+
from vllm_ascend.ascend_config import clear_ascend_config, get_ascend_config, init_ascend_config
2020

2121

2222
def _clean_up_ascend_config(func):
@@ -59,7 +59,25 @@ def test_run_with_ascend_config():
5959
},
6060
"expert_tensor_parallel_size": 1
6161
}
62+
63+
# check passed with eager mode
64+
with VllmRunner("facebook/opt-125m",
65+
additional_config=input_additional_config):
66+
ascend_config = get_ascend_config()
67+
68+
assert not ascend_config.torchair_graph_config.enabled
69+
assert ascend_config.torchair_graph_config.use_cached_graph
70+
assert ascend_config.torchair_graph_config.graph_batch_sizes == [
71+
1, 2, 4, 8
72+
]
73+
assert not ascend_config.torchair_graph_config.graph_batch_sizes_init
74+
assert ascend_config.ascend_scheduler_config.enabled
75+
assert ascend_config.ascend_scheduler_config.enable_chunked_prefill
76+
assert ascend_config.expert_tensor_parallel_size == 1
77+
78+
# check passed with aclgraph mode
6279
with VllmRunner("facebook/opt-125m",
80+
enforce_eager=False,
6381
additional_config=input_additional_config):
6482
ascend_config = get_ascend_config()
6583

@@ -114,5 +132,56 @@ def test_ascend_config_load_error():
114132
},
115133
}
116134
with VllmRunner("facebook/opt-125m",
135+
enforce_eager=False,
117136
additional_config=input_additional_config_fake_2):
118137
pass
138+
139+
# torchair graph should not be enabled with eager mode
140+
with pytest.raises(RuntimeError):
141+
input_additional_config_fake_1 = {
142+
"torchair_graph_config": {
143+
"enabled": True,
144+
},
145+
}
146+
with VllmRunner("facebook/opt-125m",
147+
enforce_eager=True,
148+
additional_config=input_additional_config_fake_1):
149+
pass
150+
151+
152+
@_clean_up_ascend_config
153+
def test_ascend_config_refresh():
154+
from vllm.config import get_current_vllm_config
155+
vllm_config = get_current_vllm_config()
156+
# set additional_config with none
157+
init_ascend_config(vllm_config)
158+
159+
input_additional_config = {
160+
"torchair_graph_config": {
161+
"enabled": False,
162+
"use_cached_graph": True,
163+
"graph_batch_sizes": [1, 2, 4, 8],
164+
"graph_batch_sizes_init": False,
165+
},
166+
"ascend_scheduler_config": {
167+
"enabled": True,
168+
"enable_chunked_prefill": True,
169+
},
170+
"expert_tensor_parallel_size": 1,
171+
"refresh": True,
172+
}
173+
174+
# refresh ascend config
175+
with VllmRunner("facebook/opt-125m",
176+
additional_config=input_additional_config):
177+
ascend_config = get_ascend_config()
178+
179+
assert not ascend_config.torchair_graph_config.enabled
180+
assert ascend_config.torchair_graph_config.use_cached_graph
181+
assert ascend_config.torchair_graph_config.graph_batch_sizes == [
182+
1, 2, 4, 8
183+
]
184+
assert not ascend_config.torchair_graph_config.graph_batch_sizes_init
185+
assert ascend_config.ascend_scheduler_config.enabled
186+
assert ascend_config.ascend_scheduler_config.enable_chunked_prefill
187+
assert ascend_config.expert_tensor_parallel_size == 1

vllm_ascend/ascend_config.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,10 @@ def __init__(self, ascend_scheduler_config: dict):
8080

8181

8282
def init_ascend_config(vllm_config):
83+
additional_config = vllm_config.additional_config if vllm_config.additional_config is not None else {}
84+
refresh = additional_config.get("refresh", False) if additional_config else False
8385
global _ASCEND_CONFIG
84-
if _ASCEND_CONFIG is not None:
86+
if _ASCEND_CONFIG is not None and not refresh:
8587
return _ASCEND_CONFIG
8688
_ASCEND_CONFIG = AscendConfig(vllm_config)
8789
return _ASCEND_CONFIG
@@ -105,7 +107,7 @@ def check_ascend_config(vllm_config, enforce_eager):
105107
ascend_config = get_ascend_config()
106108

107109
# Both for V0 and V1 Engine, torchair_graph cannot be enabled with eager mode.
108-
if ascend_config.torchair_graph_config.enabled and not enforce_eager:
110+
if ascend_config.torchair_graph_config.enabled and enforce_eager:
109111
raise RuntimeError(
110112
"Can't enable graph mode and eager mode at the same time. Please set `enforce_eager=False` if you attempt to enable NPU graph mode."
111113
)
@@ -124,7 +126,7 @@ def check_ascend_config(vllm_config, enforce_eager):
124126
"Torchair graph mode only works with deepseek model.")
125127

126128
# for V1 Engine, aclgraph doesn't work with deepseek model and only qwen model is well tested.
127-
if envs.VLLM_USE_V1 and vllm_config.model_config is not None and not enforce_eager:
129+
if envs.VLLM_USE_V1 and vllm_config.model_config is not None and not enforce_eager and not ascend_config.torchair_graph_config.enabled:
128130
model_type = vllm_config.model_config.hf_config.model_type
129131
if "deepseek" in model_type:
130132
raise NotImplementedError(

vllm_ascend/worker/model_runner_v1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
323323

324324
ascend_config = get_ascend_config()
325325
self.torchair_graph_enabled = ascend_config.torchair_graph_config.enabled and self.vllm_config.model_config.use_mla
326-
self.torchair_graph_use_cached_npu_graph = ascend_config.torchair_graph_config.use_cached_graph
326+
self.use_cached_npu_graph = ascend_config.torchair_graph_config.use_cached_graph
327327
self.torchair_graph_batch_sizes = ascend_config.torchair_graph_config.graph_batch_sizes
328328

329329
if ascend_config.torchair_graph_config.graph_batch_sizes_init:

0 commit comments

Comments
 (0)