Skip to content

Commit f8f09df

Browse files
author
wangxiaoxin-sherie
committed
xx
1 parent 6a8b38e commit f8f09df

File tree

1 file changed

+1
-14
lines changed

1 file changed

+1
-14
lines changed

vllm_ascend/ops/token_dispatch.py

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -667,7 +667,7 @@ def token_permutation(
667667
return hidden_states, expert_tokens
668668

669669
def token_unpermutation(self, hidden_states: torch.Tensor, topk_weights: torch.Tensor, topk_ids: torch.Tensor,
670-
w1_scale: torch.Tensor, w2: torch.Tensor, w2_scale: torch.Tensor, expert_tokens: torch.Tensor):
670+
pertoken_scale: torch.Tensor, w2: torch.Tensor, w2_scale: torch.Tensor, expert_tokens: torch.Tensor):
671671
sorted_topk_weight = torch.index_select(topk_weights.view(-1), 0,
672672
self.expanded_x_idx)
673673
row_index = self.expanded_x_idx // topk_ids.shape[-1]
@@ -676,19 +676,6 @@ def token_unpermutation(self, hidden_states: torch.Tensor, topk_weights: torch.T
676676
dtype=torch.bfloat16,
677677
device="npu")
678678

679-
# act_fn: swiglu
680-
hidden_states, pertoken_scale = torch_npu.npu_dequant_swiglu_quant(
681-
x=hidden_states,
682-
weight_scale=w1_scale.to(torch.float32),
683-
activation_scale=pertoken_scale,
684-
bias=None,
685-
quant_scale=None,
686-
quant_offset=None,
687-
group_index=expert_tokens,
688-
activate_left=True,
689-
quant_mode=1,
690-
)
691-
692679
final_hidden_states = torch_npu.npu_grouped_matmul_finalize_routing(
693680
hidden_states,
694681
w2,

0 commit comments

Comments
 (0)