@@ -293,8 +293,8 @@ def apply(
293293 custom_routing_function : Optional [Callable ] = None ,
294294 scoring_func : str = "softmax" ,
295295 e_score_correction_bias : Optional [torch .Tensor ] = None ,
296- activation : str = "silu" ,
297296 apply_router_weight_on_input : bool = False ,
297+ activation : str = "silu" ,
298298 ) -> torch .Tensor :
299299 from vllm .model_executor .layers .fused_moe import fused_experts
300300 assert activation == "silu" , "Only SiLU activation is supported."
@@ -313,22 +313,23 @@ def apply(
313313 weight_bits = self .quant_config .weight_bits
314314 has_zp = self .quant_config .has_zp
315315
316- return fused_experts (x ,
317- layer .w13_qweight ,
318- layer .w2_qweight ,
319- topk_weights = topk_weights ,
320- topk_ids = topk_ids ,
321- inplace = True ,
322- use_int4_w4a16 = weight_bits == 4 ,
323- use_int8_w8a16 = weight_bits == 8 ,
324- global_num_experts = global_num_experts ,
325- apply_router_weight_on_input = apply_router_weight_on_input ,
326- expert_map = expert_map ,
327- w1_scale = layer .w13_scales ,
328- w2_scale = layer .w2_scales ,
329- w1_zp = layer .w13_qzeros if has_zp else None ,
330- w2_zp = layer .w2_qzeros if has_zp else None ,
331- block_shape = [0 , layer .group_size ])
316+ return fused_experts (
317+ x ,
318+ layer .w13_qweight ,
319+ layer .w2_qweight ,
320+ topk_weights = topk_weights ,
321+ topk_ids = topk_ids ,
322+ inplace = True ,
323+ use_int4_w4a16 = weight_bits == 4 ,
324+ use_int8_w8a16 = weight_bits == 8 ,
325+ global_num_experts = global_num_experts ,
326+ apply_router_weight_on_input = apply_router_weight_on_input ,
327+ expert_map = expert_map ,
328+ w1_scale = layer .w13_scales ,
329+ w2_scale = layer .w2_scales ,
330+ w1_zp = layer .w13_qzeros if has_zp else None ,
331+ w2_zp = layer .w2_qzeros if has_zp else None ,
332+ block_shape = [0 , layer .group_size ])
332333
333334 @staticmethod
334335 def get_weight_loader (layer , weight_loader ):
0 commit comments