File tree Expand file tree Collapse file tree 2 files changed +8
-6
lines changed Expand file tree Collapse file tree 2 files changed +8
-6
lines changed Original file line number Diff line number Diff line change 2121Run `pytest tests/test_offline_inference.py`.
2222"""
2323import os
24+ import pytest
2425
2526import vllm # noqa: F401
2627
@@ -46,7 +47,8 @@ def test_models_distributed_QwQ():
4647 vllm_model .generate_greedy (example_prompts , max_tokens )
4748
4849
49- def test_models_distributed_DeepSeek ():
50+ @pytest .mark .parametrize ("enable_expert_parallel" , [True , False ])
51+ def test_models_distributed_DeepSeek (enable_expert_parallel ):
5052 example_prompts = [
5153 "vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs." ,
5254 "Briefly describe the major milestones in the development of artificial intelligence from 1950 to 2020." ,
@@ -58,6 +60,7 @@ def test_models_distributed_DeepSeek():
5860 "deepseek-ai/DeepSeek-V2-Lite" ,
5961 dtype = dtype ,
6062 tensor_parallel_size = 4 ,
63+ enable_expert_parallel = enable_expert_parallel ,
6164 distributed_executor_backend = "mp" ,
6265 ) as vllm_model :
6366 vllm_model .generate_greedy (example_prompts , max_tokens )
Original file line number Diff line number Diff line change @@ -88,15 +88,14 @@ def fused_experts_with_mc2(
8888 0 :5 ]
8989
9090 w1 = w1 .transpose (1 , 2 )
91- expert_token_nums = torch .cumsum (expert_token_nums ,
92- dim = 0 ,
93- dtype = torch .int64 )
91+
9492 group_list = expert_token_nums .to (torch .int64 )
9593 gate_up_out_list = torch_npu .npu_grouped_matmul (
9694 x = [expand_x ],
9795 weight = [w1 ],
9896 split_item = 2 ,
99- group_list_type = 0 ,
97+ # 1 means count mode, to avoid cumulative operation of the group list
98+ group_list_type = 1 ,
10099 group_type = 0 ,
101100 group_list = group_list ,
102101 )
@@ -110,7 +109,7 @@ def fused_experts_with_mc2(
110109 x = [gate_up_out ],
111110 weight = [w2 ],
112111 split_item = 2 ,
113- group_list_type = 0 ,
112+ group_list_type = 1 ,
114113 group_type = 0 ,
115114 group_list = group_list ,
116115 )
You can’t perform that action at this time.
0 commit comments