Skip to content

Commit dd197d0

Browse files
yewentao256usberkeley
authored andcommitted
[Bug] Fix DeepSeek-V2.5-1210-FP8 issue (vllm-project#27267)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
1 parent 8d514e4 commit dd197d0

File tree

1 file changed

+11
-5
lines changed

1 file changed

+11
-5
lines changed

vllm/model_executor/layers/fused_moe/cutlass_moe.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -511,13 +511,19 @@ def cutlass_moe_fp8(
511511
assert quant_config is not None
512512

513513
if quant_config.a1_scale is not None:
514-
assert quant_config.per_act_token_quant == quant_config.a1_scale.numel() != 1
514+
assert quant_config.per_act_token_quant == (quant_config.a1_scale.numel() != 1)
515515
if quant_config.a2_scale is not None:
516-
assert quant_config.per_act_token_quant == quant_config.a2_scale.numel() != 1
516+
assert quant_config.per_act_token_quant == (quant_config.a2_scale.numel() != 1)
517517

518-
assert quant_config.w1_scale is None or (
519-
quant_config.per_out_ch_quant == (quant_config.w1_scale.size(1) == w1_q.size(1))
520-
)
518+
if quant_config.w1_scale is not None:
519+
if quant_config.per_out_ch_quant:
520+
assert quant_config.w1_scale.dim() > 1 and quant_config.w1_scale.size(
521+
1
522+
) == w1_q.size(1)
523+
else:
524+
assert (
525+
quant_config.w1_scale.dim() == 1 or quant_config.w1_scale.size(1) == 1
526+
)
521527

522528
num_experts = global_num_experts if global_num_experts != -1 else w1_q.size(0)
523529

0 commit comments

Comments
 (0)