|
16 | 16 | import pytest |
17 | 17 |
|
18 | 18 | from tests.conftest import VllmRunner |
19 | | -from vllm_ascend.ascend_config import clear_ascend_config, get_ascend_config |
| 19 | +from vllm_ascend.ascend_config import clear_ascend_config, get_ascend_config, init_ascend_config |
20 | 20 |
|
21 | 21 |
|
22 | 22 | def _clean_up_ascend_config(func): |
@@ -59,7 +59,25 @@ def test_run_with_ascend_config(): |
59 | 59 | }, |
60 | 60 | "expert_tensor_parallel_size": 1 |
61 | 61 | } |
| 62 | + |
| 63 | + # check passed with eager mode |
| 64 | + with VllmRunner("facebook/opt-125m", |
| 65 | + additional_config=input_additional_config): |
| 66 | + ascend_config = get_ascend_config() |
| 67 | + |
| 68 | + assert not ascend_config.torchair_graph_config.enabled |
| 69 | + assert ascend_config.torchair_graph_config.use_cached_graph |
| 70 | + assert ascend_config.torchair_graph_config.graph_batch_sizes == [ |
| 71 | + 1, 2, 4, 8 |
| 72 | + ] |
| 73 | + assert not ascend_config.torchair_graph_config.graph_batch_sizes_init |
| 74 | + assert ascend_config.ascend_scheduler_config.enabled |
| 75 | + assert ascend_config.ascend_scheduler_config.enable_chunked_prefill |
| 76 | + assert ascend_config.expert_tensor_parallel_size == 1 |
| 77 | + |
| 78 | + # check passed with aclgraph mode |
62 | 79 | with VllmRunner("facebook/opt-125m", |
| 80 | + enforce_eager=False, |
63 | 81 | additional_config=input_additional_config): |
64 | 82 | ascend_config = get_ascend_config() |
65 | 83 |
|
@@ -114,5 +132,56 @@ def test_ascend_config_load_error(): |
114 | 132 | }, |
115 | 133 | } |
116 | 134 | with VllmRunner("facebook/opt-125m", |
| 135 | + enforce_eager=False, |
117 | 136 | additional_config=input_additional_config_fake_2): |
118 | 137 | pass |
| 138 | + |
| 139 | + # torchair graph should not be enabled with eager mode |
| 140 | + with pytest.raises(RuntimeError): |
| 141 | + input_additional_config_fake_1 = { |
| 142 | + "torchair_graph_config": { |
| 143 | + "enabled": True, |
| 144 | + }, |
| 145 | + } |
| 146 | + with VllmRunner("facebook/opt-125m", |
| 147 | + enforce_eager=True, |
| 148 | + additional_config=input_additional_config_fake_1): |
| 149 | + pass |
| 150 | + |
| 151 | + |
| 152 | +@_clean_up_ascend_config |
| 153 | +def test_ascend_config_refresh(): |
| 154 | + from vllm.config import get_current_vllm_config |
| 155 | + vllm_config = get_current_vllm_config() |
| 156 | + # set additional_config with none |
| 157 | + init_ascend_config(vllm_config) |
| 158 | + |
| 159 | + input_additional_config = { |
| 160 | + "torchair_graph_config": { |
| 161 | + "enabled": False, |
| 162 | + "use_cached_graph": True, |
| 163 | + "graph_batch_sizes": [1, 2, 4, 8], |
| 164 | + "graph_batch_sizes_init": False, |
| 165 | + }, |
| 166 | + "ascend_scheduler_config": { |
| 167 | + "enabled": True, |
| 168 | + "enable_chunked_prefill": True, |
| 169 | + }, |
| 170 | + "expert_tensor_parallel_size": 1, |
| 171 | + "refresh": True, |
| 172 | + } |
| 173 | + |
| 174 | + # refresh ascend config |
| 175 | + with VllmRunner("facebook/opt-125m", |
| 176 | + additional_config=input_additional_config): |
| 177 | + ascend_config = get_ascend_config() |
| 178 | + |
| 179 | + assert not ascend_config.torchair_graph_config.enabled |
| 180 | + assert ascend_config.torchair_graph_config.use_cached_graph |
| 181 | + assert ascend_config.torchair_graph_config.graph_batch_sizes == [ |
| 182 | + 1, 2, 4, 8 |
| 183 | + ] |
| 184 | + assert not ascend_config.torchair_graph_config.graph_batch_sizes_init |
| 185 | + assert ascend_config.ascend_scheduler_config.enabled |
| 186 | + assert ascend_config.ascend_scheduler_config.enable_chunked_prefill |
| 187 | + assert ascend_config.expert_tensor_parallel_size == 1 |
0 commit comments