File tree Expand file tree Collapse file tree 2 files changed +22
-5
lines changed Expand file tree Collapse file tree 2 files changed +22
-5
lines changed Original file line number Diff line number Diff line change @@ -61,3 +61,21 @@ def test_models_distributed_DeepSeek():
6161 distributed_executor_backend = "mp" ,
6262 ) as vllm_model :
6363 vllm_model .generate_greedy (example_prompts , max_tokens )
64+
65+
66+ def test_models_distributed_ep_DeepSeek ():
67+ example_prompts = [
68+ "vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs." ,
69+ "Briefly describe the major milestones in the development of artificial intelligence from 1950 to 2020." ,
70+ "Compare and contrast artificial intelligence with human intelligence in terms of processing information." ,
71+ ]
72+ dtype = "half"
73+ max_tokens = 5
74+ with VllmRunner (
75+ "deepseek-ai/DeepSeek-V2-Lite" ,
76+ dtype = dtype ,
77+ tensor_parallel_size = 8 ,
78+ enable_expert_parallel = True ,
79+ distributed_executor_backend = "mp" ,
80+ ) as vllm_model :
81+ vllm_model .generate_greedy (example_prompts , max_tokens )
Original file line number Diff line number Diff line change @@ -88,15 +88,14 @@ def fused_experts_with_mc2(
8888 0 :5 ]
8989
9090 w1 = w1 .transpose (1 , 2 )
91- expert_token_nums = torch .cumsum (expert_token_nums ,
92- dim = 0 ,
93- dtype = torch .int64 )
91+
9492 group_list = expert_token_nums .to (torch .int64 )
9593 gate_up_out_list = torch_npu .npu_grouped_matmul (
9694 x = [expand_x ],
9795 weight = [w1 ],
9896 split_item = 2 ,
99- group_list_type = 0 ,
97+ # 1 means count mode, to avoid cumulative operation of the group list
98+ group_list_type = 1 ,
10099 group_type = 0 ,
101100 group_list = group_list ,
102101 )
@@ -110,7 +109,7 @@ def fused_experts_with_mc2(
110109 x = [gate_up_out ],
111110 weight = [w2 ],
112111 split_item = 2 ,
113- group_list_type = 0 ,
112+ group_list_type = 1 ,
114113 group_type = 0 ,
115114 group_list = group_list ,
116115 )
You can’t perform that action at this time.
0 commit comments