linting

Pr0Wh1teGivee · Pr0Wh1teGivee · commit 885d9ce1ed30 · 2025-09-05T09:48:06.000+08:00
diff --git a/vllm_ascend/ascend_forward_context.py b/vllm_ascend/ascend_forward_context.py
@@ -95,8 +95,7 @@ def set_ascend_forward_context(
         forward_context.fused_moe_state = fused_moe_state
         forward_context.in_profile_run = in_profile_run
 
-        from vllm_ascend.ops.moe.token_dispatcher import \
-            get_token_dispatcher
+        from vllm_ascend.ops.moe.token_dispatcher import get_token_dispatcher
         dispatcher_name = get_dispatcher_name(ep_size, with_prefill)
         dispatcher = get_token_dispatcher(dispatcher_name)
         forward_context.token_dispatcher = dispatcher
diff --git a/vllm_ascend/ops/common_fused_moe.py b/vllm_ascend/ops/common_fused_moe.py
@@ -27,13 +27,11 @@
 from vllm.model_executor.layers.fused_moe.layer import (
     FusedMoE, UnquantizedFusedMoEMethod)
 from vllm_ascend.ascend_config import get_ascend_config
-from vllm_ascend.ops.moe.moe_comm_method import (AllGatherCommImpl,
-                                                     AlltoAllCommImpl,
-                                                     MC2CommImpl)
 from vllm_ascend.distributed.parallel_state import get_mc2_group
 from vllm_ascend.ops.moe.experts_selector import select_experts
-from vllm_ascend.ops.moe.token_dispatcher import \
-    setup_token_dispatchers
+from vllm_ascend.ops.moe.moe_comm_method import (AllGatherCommImpl,
+                                                 AlltoAllCommImpl, MC2CommImpl)
+from vllm_ascend.ops.moe.token_dispatcher import setup_token_dispatchers
 from vllm_ascend.utils import ACL_FORMAT_FRACTAL_NZ, is_310p, vllm_version_is
 
 original_unquantized_fused_moe_init_func = UnquantizedFusedMoEMethod.__init__
diff --git a/vllm_ascend/ops/moe/fused_moe_prepare_and_finalize.py b/vllm_ascend/ops/moe/fused_moe_prepare_and_finalize.py
@@ -9,6 +9,7 @@
     get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size)
 from vllm.forward_context import get_forward_context
 from vllm.model_executor.layers.fused_moe import FusedMoEConfig
+
 from vllm_ascend.distributed.communication_op import \
     data_parallel_reduce_scatter
 
@@ -19,11 +20,17 @@ def __init__(self, moe_config: Optional[FusedMoEConfig]):
         self.moe_config = moe_config
 
     @abstractmethod
-    def prepare(self):
+    def prepare(self,
+                hidden_states: torch.Tensor,
+                router_logits: torch.Tensor,
+                enable_shared_expert_dp: bool = False,
+                rm_router_logits: bool = False,
+                replace_allreduce: bool = False,
+                gate=None) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         raise NotImplementedError("Prepare not implemented.")
 
-    @abstractmethod
-    def finalize(self):
+    def finalize(self, hidden_states: torch.Tensor,
+                 reduce_results: bool) -> torch.Tensor:
         raise NotImplementedError("Combine function not implemented.")
 
 
@@ -91,6 +98,8 @@ def finalize(self, hidden_states: torch.Tensor,
         
         Also, unpad the hidden states if needed.
         """
+        assert self.moe_config.tp_group is not None, "tp_group cannot be None."
+
         if not (self.enable_shared_expert_dp or self.replace_all_reduce):
             if self.tp_size > 1:
                 dist.all_gather(list(self.split_hidden_states), hidden_states,
@@ -155,6 +164,8 @@ def finalize(self, hidden_states: torch.Tensor,
 
         Also, unpad the hidden states if needed.
         """
+        assert self.moe_config.tp_group is not None, "tp_group cannot be None."
+
         if not (self.enable_shared_expert_dp or self.replace_all_reduce):
             if self.tp_size > 1:
                 dist.all_gather(list(self.split_hidden_states), hidden_states,
@@ -180,9 +191,12 @@ def prepare(self,
                 replace_all_reduce: bool = False,
                 gate=None) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         """When DP size > 1, pad the hidden states and router logits for communication."""
+        assert self.moe_config.dp_size is not None, "dp_size cannot be None."
+        assert self.moe_config.dp_group is not None, "dp_group cannot be None."
+
         self.rm_router_logits = rm_router_logits
         self.enable_shared_expert_dp = enable_shared_expert_dp
-
+        
         if self.moe_config.dp_size > 1:
             forward_context = get_forward_context()
             max_tokens_across_dp = forward_context.max_tokens_across_dp
diff --git a/vllm_ascend/ops/moe/moe_comm_method.py b/vllm_ascend/ops/moe/moe_comm_method.py
@@ -4,6 +4,7 @@
 import torch
 from vllm.forward_context import get_forward_context
 from vllm.model_executor.layers.fused_moe import FusedMoEConfig
+
 from vllm_ascend.ops.moe.fused_moe_prepare_and_finalize import (
     FusedMoEPrepareAndFinalizeWithAll2All,
     FusedMoEPrepareAndFinalizeWithAllGather, FusedMoEPrepareAndFinalizeWithMC2)
diff --git a/vllm_ascend/ops/moe/moe_mlp.py b/vllm_ascend/ops/moe/moe_mlp.py
@@ -20,6 +20,7 @@
 import torch_npu
 from torch.nn.functional import pad
 from vllm.forward_context import get_forward_context
+
 from vllm_ascend.ascend_forward_context import FusedMoEState
 from vllm_ascend.utils import dispose_tensor, is_310p
 
diff --git a/vllm_ascend/ops/moe/token_dispatcher.py b/vllm_ascend/ops/moe/token_dispatcher.py
@@ -27,6 +27,7 @@
 import torch
 import torch_npu
 from vllm.distributed.parallel_state import get_ep_group
+
 from vllm_ascend.distributed.parallel_state import get_mc2_group
 from vllm_ascend.distributed.tensor_parallel import \
     gather_from_sequence_parallel_region