update

Pr0Wh1teGivee · Pr0Wh1teGivee · commit 6f33557f48ba · 2025-09-05T09:48:05.000+08:00
diff --git a/vllm_ascend/ops/moe/moe_mlp.py b/vllm_ascend/ops/moe/moe_mlp.py
@@ -227,7 +227,8 @@ def unified_apply_mlp(hidden_states: torch.Tensor,
                       w2_scale_bias: torch.Tensor = None,
                       topk_scales: Optional[torch.Tensor] = None,
                       with_quant: bool = False,
-                      fusion: bool = False) -> torch.Tensor:
+                      fusion: bool = False,
+                      need_trans: bool = True) -> torch.Tensor:
     if with_quant:
         return quant_apply_mlp(hidden_states=hidden_states,
                                w1=w1,
diff --git a/vllm_ascend/quantization/w8a8_dynamic.py b/vllm_ascend/quantization/w8a8_dynamic.py
@@ -28,7 +28,7 @@
 from vllm_ascend.ascend_forward_context import FusedMoEState
 from vllm_ascend.distributed.parallel_state import get_mc2_group
 from vllm_ascend.ops.fused_moe import unified_fused_experts_eager
-from vllm_ascend.ops.layers.experts_selector import select_experts
+from vllm_ascend.ops.moe.experts_selector import select_experts
 from vllm_ascend.utils import ACL_FORMAT_FRACTAL_NZ