We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ca90f50 commit ca6f755Copy full SHA for ca6f755
vllm/lora/layers/fused_moe.py
@@ -25,6 +25,7 @@
25
modular_triton_fused_moe,
26
try_get_optimal_moe_config,
27
)
28
+from vllm.model_executor.layers.fused_moe.layer import FusedMoEModularMethod
29
30
31
class FusedMoEWithLoRA(BaseLayerWithLoRA):
@@ -280,10 +281,9 @@ def wrapper(*args, **kwargs):
280
281
self.base_layer, fused_experts.moe_sum
282
283
- self.base_layer.quant_method.old_fused_experts = (
284
- self.base_layer.quant_method.fused_experts
+ self.base_layer.quant_method = FusedMoEModularMethod(
285
+ self.base_layer.quant_method, m_fused_moe_fn
286
- self.base_layer.quant_method.fused_experts = m_fused_moe_fn
287
288
def create_lora_weights(
289
self,
0 commit comments