fix lint

whx-sjtu · whx-sjtu · commit 251b6089b5e6 · 2025-07-02T16:15:37.000+08:00
Signed-off-by: whx-sjtu &lt;2952154980@qq.com&gt;
diff --git a/vllm_ascend/ops/fused_moe.py b/vllm_ascend/ops/fused_moe.py
@@ -1139,13 +1139,17 @@ def forward(self,
         fused_moe_state = get_forward_context().fused_moe_state
         # For w8a8 dynamic we can do npu_dynamic_quant and gate in parallel.
         quantized_x_for_share, dynamic_scale_for_share = None, None
-        from vllm_ascend.quantization.w8a8_dynamic import AscendW8A8DynamicFusedMoEMethod
+        from vllm_ascend.quantization.w8a8_dynamic import \
+            AscendW8A8DynamicFusedMoEMethod
         if self.enable_multistream_moe:
             assert gate is not None
             router_logits, _ = gate(hidden_states)
-            if isinstance(self.quant_method.quant_method, AscendW8A8DynamicFusedMoEMethod) and fused_moe_state == FusedMoEState.MC2:
+            if isinstance(self.quant_method.quant_method,
+                          AscendW8A8DynamicFusedMoEMethod
+                          ) and fused_moe_state == FusedMoEState.MC2:
                 with npu_stream_switch("moe_secondary", 0):
-                    quantized_x_for_share, dynamic_scale_for_share = torch_npu.npu_dynamic_quant(hidden_states)
+                    quantized_x_for_share, dynamic_scale_for_share = torch_npu.npu_dynamic_quant(
+                        hidden_states)
 
         if shared_experts:
             if not self.enable_multistream_moe or fused_moe_state != FusedMoEState.MC2:
diff --git a/vllm_ascend/quantization/w8a8_dynamic.py b/vllm_ascend/quantization/w8a8_dynamic.py
@@ -16,7 +16,7 @@
 #
 
 import math
-from typing import Any, Callable, Dict, Optional, Tuple, Union, List
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import torch
 import torch.distributed as dist
@@ -31,6 +31,7 @@
                                dispose_tensor, get_ascend_soc_version,
                                npu_stream_switch, npu_wait_tensor)
 
+
 def apply_mlp_decode(hidden_states_wrapper: List[torch.Tensor],
                      w1: torch.Tensor,
                      w1_scale: torch.Tensor,
@@ -80,7 +81,7 @@ def apply_mlp_decode(hidden_states_wrapper: List[torch.Tensor],
 
     # act_fn: swiglu
     hidden_states, swiglu_out_scale = torch_npu.npu_dequant_swiglu_quant(
-        x=hidden_states,         
+        x=hidden_states,
         weight_scale=w1_scale,
         activation_scale=pertoken_scale,
         bias=None,
@@ -269,17 +270,18 @@ def fused_experts_with_mc2(
     if shared_experts is not None:
         with npu_stream_switch("moe_secondary", 0):
             npu_wait_tensor(quantized_x_for_share, expand_x)
-            shared_act_out = shared_experts.act_fn((quantized_x_for_share, dynamic_scale_for_share))
+            shared_act_out = shared_experts.act_fn(
+                (quantized_x_for_share, dynamic_scale_for_share))
             shared_act, swiglu_out_scale = shared_act_out[0], shared_act_out[1]
 
     # `expand_x` will be disposed in the `apply_mlp` function
     down_out_list = apply_mlp_decode([expand_x],
-                              w1,
-                              w1_scale,
-                              w2,
-                              w2_scale,
-                              expert_token_nums,
-                              dynamic_scale=dynamic_scale)
+                                     w1,
+                                     w1_scale,
+                                     w2,
+                                     w2_scale,
+                                     expert_token_nums,
+                                     dynamic_scale=dynamic_scale)
 
     # moeCombine
     kwargs_mc2 = {
@@ -317,7 +319,8 @@ def fused_experts_with_mc2(
     else:
         with npu_stream_switch("moe_secondary", 0):
             npu_wait_tensor(shared_act, down_out_list)
-            shared_output, _ = shared_experts.down_proj((shared_act, swiglu_out_scale))
+            shared_output, _ = shared_experts.down_proj(
+                (shared_act, swiglu_out_scale))
         return hidden_states, shared_output
 
 
@@ -774,8 +777,10 @@ def apply(
         if shared_experts is not None and fused_moe_state == FusedMoEState.MC2:
             with npu_stream_switch("moe_secondary", 0):
                 npu_wait_tensor(quantized_x_for_share, router_logits)
-                share_up_out, _ = shared_experts.gate_up_proj((quantized_x_for_share, dynamic_scale_for_share))
-                shared_gate_up, shared_dequant_scale = share_up_out[0], share_up_out[1]
+                share_up_out, _ = shared_experts.gate_up_proj(
+                    (quantized_x_for_share, dynamic_scale_for_share))
+                shared_gate_up, shared_dequant_scale = share_up_out[
+                    0], share_up_out[1]
 
         # this is a naive implementation for experts load balance so as
         # to avoid accumulating too much tokens on a single rank.