Skip to content

Commit 10df398

Browse files
committed
Make linter happy
1 parent 59f8e46 commit 10df398

File tree

7 files changed

+54
-48
lines changed

7 files changed

+54
-48
lines changed

vllm/model_executor/layers/fused_moe/layer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ def apply(
6666
custom_routing_function: Optional[Callable] = None,
6767
scoring_func: str = "softmax",
6868
e_score_correction_bias: Optional[torch.Tensor] = None,
69+
apply_router_weight_on_input: bool = False,
70+
activation: str = "silu",
6971
) -> torch.Tensor:
7072
raise NotImplementedError
7173

vllm/model_executor/layers/quantization/awq_marlin.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -469,8 +469,8 @@ def apply(
469469
custom_routing_function: Optional[Callable] = None,
470470
scoring_func: str = "softmax",
471471
e_score_correction_bias: Optional[torch.Tensor] = None,
472-
activation: str = "silu",
473472
apply_router_weight_on_input: bool = False,
473+
activation: str = "silu",
474474
) -> torch.Tensor:
475475
assert activation == "silu", "Only SiLU activation is supported."
476476
if expert_map is not None:

vllm/model_executor/layers/quantization/experts_int8.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,8 @@ def apply(
113113
custom_routing_function: Optional[Callable] = None,
114114
scoring_func: str = "softmax",
115115
e_score_correction_bias: Optional[torch.Tensor] = None,
116-
activation: str = "silu",
117116
apply_router_weight_on_input: bool = False,
117+
activation: str = "silu",
118118
) -> torch.Tensor:
119119
from vllm.model_executor.layers.fused_moe import fused_experts
120120

@@ -130,19 +130,20 @@ def apply(
130130
scoring_func=scoring_func,
131131
e_score_correction_bias=e_score_correction_bias)
132132

133-
return fused_experts(x,
134-
layer.w13_weight,
135-
layer.w2_weight,
136-
topk_weights=topk_weights,
137-
topk_ids=topk_ids,
138-
inplace=True,
139-
activation=activation,
140-
use_int8_w8a16=True,
141-
global_num_experts=global_num_experts,
142-
apply_router_weight_on_input=apply_router_weight_on_input,
143-
expert_map=expert_map,
144-
w1_scale=layer.w13_scale,
145-
w2_scale=layer.w2_scale)
133+
return fused_experts(
134+
x,
135+
layer.w13_weight,
136+
layer.w2_weight,
137+
topk_weights=topk_weights,
138+
topk_ids=topk_ids,
139+
inplace=True,
140+
activation=activation,
141+
use_int8_w8a16=True,
142+
global_num_experts=global_num_experts,
143+
apply_router_weight_on_input=apply_router_weight_on_input,
144+
expert_map=expert_map,
145+
w1_scale=layer.w13_scale,
146+
w2_scale=layer.w2_scale)
146147

147148
@staticmethod
148149
def quantizing_weight_loader(layer, weight_loader):

vllm/model_executor/layers/quantization/gguf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,8 +338,8 @@ def apply(
338338
custom_routing_function: Optional[Callable] = None,
339339
scoring_func: str = "softmax",
340340
e_score_correction_bias: Optional[torch.Tensor] = None,
341-
activation: str = "silu",
342341
apply_router_weight_on_input: bool = False,
342+
activation: str = "silu",
343343
):
344344
assert activation == "silu", "Only SiLU activation is supported."
345345
if apply_router_weight_on_input:

vllm/model_executor/layers/quantization/gptq_marlin.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -592,8 +592,8 @@ def apply(
592592
custom_routing_function: Optional[Callable] = None,
593593
scoring_func: str = "softmax",
594594
e_score_correction_bias: Optional[torch.Tensor] = None,
595-
activation: str = "silu",
596595
apply_router_weight_on_input: bool = False,
596+
activation: str = "silu",
597597
) -> torch.Tensor:
598598
assert activation == "silu", "Only SiLU activation is supported."
599599
if apply_router_weight_on_input is not None:

vllm/model_executor/layers/quantization/moe_wna16.py

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -293,8 +293,8 @@ def apply(
293293
custom_routing_function: Optional[Callable] = None,
294294
scoring_func: str = "softmax",
295295
e_score_correction_bias: Optional[torch.Tensor] = None,
296-
activation: str = "silu",
297296
apply_router_weight_on_input: bool = False,
297+
activation: str = "silu",
298298
) -> torch.Tensor:
299299
from vllm.model_executor.layers.fused_moe import fused_experts
300300
assert activation == "silu", "Only SiLU activation is supported."
@@ -313,22 +313,23 @@ def apply(
313313
weight_bits = self.quant_config.weight_bits
314314
has_zp = self.quant_config.has_zp
315315

316-
return fused_experts(x,
317-
layer.w13_qweight,
318-
layer.w2_qweight,
319-
topk_weights=topk_weights,
320-
topk_ids=topk_ids,
321-
inplace=True,
322-
use_int4_w4a16=weight_bits == 4,
323-
use_int8_w8a16=weight_bits == 8,
324-
global_num_experts=global_num_experts,
325-
apply_router_weight_on_input=apply_router_weight_on_input,
326-
expert_map=expert_map,
327-
w1_scale=layer.w13_scales,
328-
w2_scale=layer.w2_scales,
329-
w1_zp=layer.w13_qzeros if has_zp else None,
330-
w2_zp=layer.w2_qzeros if has_zp else None,
331-
block_shape=[0, layer.group_size])
316+
return fused_experts(
317+
x,
318+
layer.w13_qweight,
319+
layer.w2_qweight,
320+
topk_weights=topk_weights,
321+
topk_ids=topk_ids,
322+
inplace=True,
323+
use_int4_w4a16=weight_bits == 4,
324+
use_int8_w8a16=weight_bits == 8,
325+
global_num_experts=global_num_experts,
326+
apply_router_weight_on_input=apply_router_weight_on_input,
327+
expert_map=expert_map,
328+
w1_scale=layer.w13_scales,
329+
w2_scale=layer.w2_scales,
330+
w1_zp=layer.w13_qzeros if has_zp else None,
331+
w2_zp=layer.w2_qzeros if has_zp else None,
332+
block_shape=[0, layer.group_size])
332333

333334
@staticmethod
334335
def get_weight_loader(layer, weight_loader):

vllm/model_executor/layers/quantization/quark/quark_moe.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ def apply(
203203
scoring_func: str = "softmax",
204204
e_score_correction_bias: Optional[torch.Tensor] = None,
205205
apply_router_weight_on_input: bool = False,
206+
activation: str = "silu",
206207
) -> torch.Tensor:
207208
from vllm.model_executor.layers.fused_moe import fused_experts
208209

@@ -218,17 +219,18 @@ def apply(
218219
scoring_func=scoring_func,
219220
e_score_correction_bias=e_score_correction_bias)
220221

221-
return fused_experts(x,
222-
layer.w13_weight,
223-
layer.w2_weight,
224-
topk_weights=topk_weights,
225-
topk_ids=topk_ids,
226-
inplace=True,
227-
use_fp8_w8a8=True,
228-
global_num_experts=global_num_experts,
229-
apply_router_weight_on_input=apply_router_weight_on_input,
230-
expert_map=expert_map,
231-
w1_scale=layer.w13_weight_scale,
232-
w2_scale=layer.w2_weight_scale,
233-
a1_scale=layer.w13_input_scale,
234-
a2_scale=layer.w2_input_scale)
222+
return fused_experts(
223+
x,
224+
layer.w13_weight,
225+
layer.w2_weight,
226+
topk_weights=topk_weights,
227+
topk_ids=topk_ids,
228+
inplace=True,
229+
use_fp8_w8a8=True,
230+
global_num_experts=global_num_experts,
231+
apply_router_weight_on_input=apply_router_weight_on_input,
232+
expert_map=expert_map,
233+
w1_scale=layer.w13_weight_scale,
234+
w2_scale=layer.w2_weight_scale,
235+
a1_scale=layer.w13_input_scale,
236+
a2_scale=layer.w2_input_scale)

0 commit comments

Comments
 (0)