We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a1dd0d5 commit 6dfda81Copy full SHA for 6dfda81
vllm/lora/layers/fused_moe.py
@@ -136,7 +136,7 @@ def wrapper(*args, **kwargs):
136
M = min(num_tokens, CHUNK_SIZE)
137
138
shrink_config, expand_config = self._get_lora_moe_configs(
139
- op_prefix="gate_up",
+ op_prefix="w13",
140
lora_a_stacked=self.w1_lora_a_stacked,
141
lora_b_stacked=self.w1_lora_b_stacked,
142
num_slices=2,
@@ -214,7 +214,7 @@ def wrapper(*args, **kwargs):
214
215
216
217
- op_prefix="down",
+ op_prefix="w2",
218
lora_a_stacked=self.w2_lora_a_stacked,
219
lora_b_stacked=self.w2_lora_b_stacked,
220
num_slices=1,
0 commit comments