1313# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1414# See the License for the specific language governing permissions and
1515# limitations under the License.
16+ import os
17+
1618import pytest
1719
1820from tests .conftest import VllmRunner
19- from vllm_ascend .ascend_config import clear_ascend_config , get_ascend_config
21+ from vllm_ascend .ascend_config import (clear_ascend_config , get_ascend_config ,
22+ init_ascend_config )
2023
2124
2225def _clean_up_ascend_config (func ):
@@ -39,28 +42,35 @@ def test_run_without_ascend_config():
3942 assert ascend_config .torchair_graph_config .graph_batch_sizes == []
4043 assert not ascend_config .torchair_graph_config .graph_batch_sizes_init
4144 assert not ascend_config .ascend_scheduler_config .enabled
42- assert ascend_config .expert_tensor_parallel_size == 1
45+ assert ascend_config .expert_tensor_parallel_size == 0
4346
4447
4548@_clean_up_ascend_config
4649def test_run_with_ascend_config ():
47- input_additional_config = {
50+ if os .getenv ("VLLM_USE_V1" ) == "0" :
51+ pytest .skip ("graph only works on v1" )
52+
53+ input_additional_config_1 = {
4854 "torchair_graph_config" : {
4955 # torchair graph only works with deepseek. The e2e test should be added
5056 # in multicard test with deepseek models.
5157 "enabled" : False ,
5258 "use_cached_graph" : True ,
5359 "graph_batch_sizes" : [1 , 2 , 4 , 8 ],
5460 "graph_batch_sizes_init" : False ,
61+ "enable_multistream_shared_expert" : True ,
5562 },
5663 "ascend_scheduler_config" : {
5764 "enabled" : True ,
5865 "enable_chunked_prefill" : True ,
5966 },
6067 "expert_tensor_parallel_size" : 1
6168 }
69+
70+ # check passed with eager mode
6271 with VllmRunner ("facebook/opt-125m" ,
63- additional_config = input_additional_config ):
72+ enforce_eager = True ,
73+ additional_config = input_additional_config_1 ):
6474 ascend_config = get_ascend_config ()
6575
6676 assert not ascend_config .torchair_graph_config .enabled
@@ -69,6 +79,7 @@ def test_run_with_ascend_config():
6979 1 , 2 , 4 , 8
7080 ]
7181 assert not ascend_config .torchair_graph_config .graph_batch_sizes_init
82+ assert ascend_config .torchair_graph_config .enable_multistream_shared_expert
7283 assert ascend_config .ascend_scheduler_config .enabled
7384 assert ascend_config .ascend_scheduler_config .enable_chunked_prefill
7485 assert ascend_config .expert_tensor_parallel_size == 1
@@ -83,6 +94,8 @@ def test_ascend_config_init_error():
8394
8495@_clean_up_ascend_config
8596def test_ascend_config_load_error ():
97+ if os .getenv ("VLLM_USE_V1" ) == "0" :
98+ pytest .skip ("graph only works on v1" )
8699 # graph_batch_sizes should be list.
87100 with pytest .raises (TypeError ):
88101 input_additional_config_fake_1 = {
@@ -117,3 +130,60 @@ def test_ascend_config_load_error():
117130 enforce_eager = False ,
118131 additional_config = input_additional_config_fake_2 ):
119132 pass
133+
134+ # torchair graph should not be enabled with eager mode
135+ with pytest .raises (RuntimeError ):
136+ input_additional_config_fake_3 = {
137+ "torchair_graph_config" : {
138+ "enabled" : True ,
139+ },
140+ }
141+ with VllmRunner ("facebook/opt-125m" ,
142+ enforce_eager = True ,
143+ additional_config = input_additional_config_fake_3 ):
144+ pass
145+
146+
147+ @_clean_up_ascend_config
148+ def test_check_ascend_config_v0 ():
149+ if os .getenv ("VLLM_USE_V1" ) == "1" :
150+ pytest .skip ("graph only works on v1, this is the test for v0" )
151+ with pytest .raises (NotImplementedError ):
152+ input_additional_config_fake_1 = {
153+ "torchair_graph_config" : {
154+ "enabled" : True ,
155+ },
156+ }
157+ with VllmRunner ("facebook/opt-125m" ,
158+ additional_config = input_additional_config_fake_1 ):
159+ pass
160+
161+
162+ @_clean_up_ascend_config
163+ def test_ascend_config_refresh ():
164+ from vllm .config import get_current_vllm_config
165+ vllm_config = get_current_vllm_config ()
166+ # set additional_config with none
167+ init_ascend_config (vllm_config )
168+
169+ input_additional_config = {
170+ "torchair_graph_config" : {
171+ "enabled" : False ,
172+ "use_cached_graph" : True ,
173+ "graph_batch_sizes" : [1 , 2 , 4 , 8 ],
174+ "graph_batch_sizes_init" : False ,
175+ },
176+ "refresh" : True ,
177+ }
178+
179+ # refresh ascend config
180+ with VllmRunner ("facebook/opt-125m" ,
181+ additional_config = input_additional_config ):
182+ ascend_config = get_ascend_config ()
183+
184+ assert not ascend_config .torchair_graph_config .enabled
185+ assert ascend_config .torchair_graph_config .use_cached_graph
186+ assert ascend_config .torchair_graph_config .graph_batch_sizes == [
187+ 1 , 2 , 4 , 8
188+ ]
189+ assert not ascend_config .torchair_graph_config .graph_batch_sizes_init
0 commit comments