Simplifies MoE comm; removes unused MC2 params

yiz-liu · yiz-liu · commit 0dfb10d6350f · 2025-08-12T00:16:12.000+08:00
Removes dead/commented paths in the MoE communication implementation and cleans up legacy chunking/gather remnants.

Signed-off-by: Yizhou Liu &lt;liu_yizhou@outlook.com&gt;
diff --git a/vllm_ascend/distributed/moe_comm_method.py b/vllm_ascend/distributed/moe_comm_method.py
@@ -305,23 +305,6 @@ def _pre_process(
         self.topk_weights = topk_weights.to(torch.float32)
         self.mc2_mask = get_forward_context().mc2_mask
 
-        # tp_size = get_tensor_model_parallel_world_size()
-        # self.chunked_hidden_states = torch.tensor_split(hidden_states,
-        #                                             tp_size,
-        #                                             dim=0)
-        # chunked_topk_ids = torch.tensor_split(self.topk_ids,
-        #                                             tp_size,
-        #                                             dim=0)
-        # chunked_topk_weights = torch.tensor_split(self.topk_weights,
-        #                                             tp_size,
-        #                                             dim=0)
-        # chunked_mc2_mask = torch.tensor_split(self.mc2_mask, tp_size, dim=0)
-        # tp_rank = get_tensor_model_parallel_rank()
-        # hidden_states = self.chunked_hidden_states[tp_rank]
-        # self.topk_ids = chunked_topk_ids[tp_rank]
-        # self.topk_weights = chunked_topk_weights[tp_rank]
-        # self.mc2_mask = chunked_mc2_mask[tp_rank]
-
         dispatch_kwargs = {
             "x": hidden_states,
             "expert_ids": self.topk_ids,
@@ -400,14 +383,6 @@ def _post_process(self, mlp_output: torch.Tensor,
 
         hidden_states[:] = combine(**combine_kwargs)
 
-        # final_hidden_states = combine(**combine_kwargs)
-
-        # dist.all_gather(list(self.chunked_hidden_states), final_hidden_states, get_tp_group().device_group)
-
-        # final_hidden_states = torch.cat(self.chunked_hidden_states, dim=0)
-
-        # hidden_states[:] = final_hidden_states
-
 
 def moe_comm_pre_process(
     hidden_states: torch.Tensor,
diff --git a/vllm_ascend/ops/fused_moe.py b/vllm_ascend/ops/fused_moe.py
@@ -77,10 +77,6 @@ def unified_fused_experts(
     moe_comm_method: Optional[MoECommMethod] = None,
     # For TorchAir graph
     is_torchair: bool = False,
-    # For communication
-    use_mc2: bool = False,
-    moe_all_to_all_group_name: str = "",
-    mc2_mask: Optional[torch.Tensor] = None,
     # For Cube/Vector parallel
     shared_experts: Optional[Any] = None,
     quantized_x_for_share: Optional[Any] = None,
@@ -104,9 +100,6 @@ def unified_fused_experts(
 
     num_experts = w1.shape[0]
 
-    # permuted_hidden_states, expert_tokens, group_list_type = moe_comm_method._pre_process(
-    #     hidden_states, topk_ids, topk_weights, expert_map, num_experts
-    # )
     permuted_hidden_states, expert_tokens, group_list_type = torch.ops.vllm.moe_comm_pre_process(
         hidden_states, topk_ids, topk_weights, expert_map, num_experts)
     mlp_output = apply_mlp(
@@ -116,7 +109,6 @@ def unified_fused_experts(
         expert_tokens,
         group_list_type=group_list_type,
     )
-    # moe_comm_method._post_process(mlp_output, hidden_states)
     torch.ops.vllm.moe_comm_post_process(mlp_output, hidden_states)
 
     return hidden_states