2525
2626from modelscope import snapshot_download # type: ignore
2727from vllm import SamplingParams
28+ import pytest
2829
2930from tests .conftest import VllmRunner
31+ from tests .model_utils import check_outputs_equal
3032
3133os .environ ["PYTORCH_NPU_ALLOC_CONF" ] = "max_split_size_mb:256"
3234
@@ -46,21 +48,6 @@ def test_models_distributed_QwQ():
4648 vllm_model .generate_greedy (example_prompts , max_tokens )
4749
4850
49- def test_models_distributed_DeepSeek ():
50- example_prompts = [
51- "Hello, my name is" ,
52- ]
53- dtype = "half"
54- max_tokens = 5
55- with VllmRunner (
56- "deepseek-ai/DeepSeek-V2-Lite" ,
57- dtype = dtype ,
58- tensor_parallel_size = 4 ,
59- distributed_executor_backend = "mp" ,
60- ) as vllm_model :
61- vllm_model .generate_greedy (example_prompts , max_tokens )
62-
63-
6451@patch .dict (os .environ , {"VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE" : "1" })
6552def test_models_distributed_topk () -> None :
6653 example_prompts = [
@@ -83,18 +70,36 @@ def test_models_distributed_topk() -> None:
8370 vllm_model .generate (example_prompts , sampling_params )
8471
8572
86- @patch .dict (os .environ , {"VLLM_ASCEND_ENABLE_DBO" : "1" })
87- def test_models_distributed_DeepSeek_dbo ():
88- example_prompts = ["The president of the United States is" ] * 41
89- dtype = "half"
90- sampling_params = SamplingParams (max_tokens = 100 , temperature = 0.0 )
91- with VllmRunner (
92- "deepseek-ai/DeepSeek-V2-Lite" ,
93- dtype = dtype ,
94- tensor_parallel_size = 4 ,
95- distributed_executor_backend = "mp" ,
96- ) as vllm_model :
97- vllm_model .generate (example_prompts , sampling_params )
73+ def test_models_distributed_DeepSeek_dbo (monkeypatch : pytest .MonkeyPatch ):
74+ with monkeypatch .context () as m :
75+ m .setenv ("VLLM_ASCEND_ENABLE_DBO" , "1" )
76+
77+ example_prompts = ["The president of the United States is" ] * 41
78+ dtype = "half"
79+ sampling_params = SamplingParams (max_tokens = 100 , temperature = 0.0 )
80+ with VllmRunner (
81+ "deepseek-ai/DeepSeek-V2-Lite" ,
82+ dtype = dtype ,
83+ tensor_parallel_size = 4 ,
84+ distributed_executor_backend = "mp" ,
85+ ) as vllm_model :
86+ dpo_output = vllm_model .generate (example_prompts , sampling_params )
87+
88+ m .setenv ("VLLM_ASCEND_ENABLE_DBO" , "0" )
89+ with VllmRunner (
90+ "deepseek-ai/DeepSeek-V2-Lite" ,
91+ dtype = dtype ,
92+ tensor_parallel_size = 4 ,
93+ distributed_executor_backend = "mp" ,
94+ ) as vllm_model :
95+ output = vllm_model .generate (example_prompts , sampling_params )
96+
97+ check_outputs_equal (
98+ outputs_0_lst = output ,
99+ outputs_1_lst = dpo_output ,
100+ name_0 = "vllm_outputs" ,
101+ name_1 = "vllm_dbo_outputs" ,
102+ )
98103
99104
100105def test_models_distributed_DeepSeek_W8A8 ():
0 commit comments