We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent afe1767 commit 0ea7d83Copy full SHA for 0ea7d83
vllm_ascend/quantization/w8a8_dynamic.py
@@ -285,7 +285,8 @@ def fused_experts(hidden_states: torch.Tensor,
285
valid_token_mask = torch.arange(
286
0, sorted_token_indices.shape[0],
287
device=device).unsqueeze(1) < num_valid_tokens
288
- down_out_list.mul_(valid_token_mask)
+ down_out_list = down_out_list.masked_fill_(~valid_token_mask,
289
+ 0).to(dtype)
290
final_hidden_states.index_add_(0, sorted_token_indices, down_out_list)
291
else:
292
# TODO: Reorder device memory 2 times here, replace the current
0 commit comments