File tree Expand file tree Collapse file tree 2 files changed +5
-4
lines changed
vllm/model_executor/layers/quantization Expand file tree Collapse file tree 2 files changed +5
-4
lines changed Original file line number Diff line number Diff line change @@ -57,9 +57,10 @@ def get_moe_method(
5757 "input_activations" )
5858
5959 if quant_config ._is_wNa16_group_channel (weight_quant , input_quant ):
60+ # group_size=None means channelwise
61+ group_size = weight_quant .group_size or - 1
6062 # Prefer to use the MarlinMoE kernel when it is supported.
61- if not check_moe_marlin_supports_layer (layer ,
62- weight_quant .group_size ):
63+ if not check_moe_marlin_supports_layer (layer , group_size ):
6364 if (weight_quant .strategy in QuantizationStrategy .GROUP and
6465 weight_quant .actorder in (ActivationOrdering .GROUP ,
6566 ActivationOrdering .DYNAMIC )):
Original file line number Diff line number Diff line change @@ -610,9 +610,9 @@ def apply(
610610 activation : str = "silu" ,
611611 ) -> torch .Tensor :
612612 assert activation == "silu" , "Only SiLU activation is supported."
613- if apply_router_weight_on_input is not None :
613+ if apply_router_weight_on_input :
614614 raise NotImplementedError (
615- "Apply router weight on input is not supported for"
615+ "Apply router weight on input is not supported for "
616616 "fused Marlin MoE method." )
617617
618618 topk_weights , topk_ids = FusedMoE .select_experts (
You can’t perform that action at this time.
0 commit comments