File tree Expand file tree Collapse file tree 1 file changed +11
-5
lines changed
vllm/model_executor/layers/fused_moe Expand file tree Collapse file tree 1 file changed +11
-5
lines changed Original file line number Diff line number Diff line change @@ -511,13 +511,19 @@ def cutlass_moe_fp8(
511511 assert quant_config is not None
512512
513513 if quant_config .a1_scale is not None :
514- assert quant_config .per_act_token_quant == quant_config .a1_scale .numel () != 1
514+ assert quant_config .per_act_token_quant == ( quant_config .a1_scale .numel () != 1 )
515515 if quant_config .a2_scale is not None :
516- assert quant_config .per_act_token_quant == quant_config .a2_scale .numel () != 1
516+ assert quant_config .per_act_token_quant == ( quant_config .a2_scale .numel () != 1 )
517517
518- assert quant_config .w1_scale is None or (
519- quant_config .per_out_ch_quant == (quant_config .w1_scale .size (1 ) == w1_q .size (1 ))
520- )
518+ if quant_config .w1_scale is not None :
519+ if quant_config .per_out_ch_quant :
520+ assert quant_config .w1_scale .dim () > 1 and quant_config .w1_scale .size (
521+ 1
522+ ) == w1_q .size (1 )
523+ else :
524+ assert (
525+ quant_config .w1_scale .dim () == 1 or quant_config .w1_scale .size (1 ) == 1
526+ )
521527
522528 num_experts = global_num_experts if global_num_experts != - 1 else w1_q .size (0 )
523529
You can’t perform that action at this time.
0 commit comments