Skip to content

Commit ca6f755

Browse files
varun-sundar-rabindranathVarun Sundar Rabindranath
andauthored
[BugFix] Fix FusedMoELoRA + ModularKernel Integration (#28237)
Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com> Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
1 parent ca90f50 commit ca6f755

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

vllm/lora/layers/fused_moe.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
modular_triton_fused_moe,
2626
try_get_optimal_moe_config,
2727
)
28+
from vllm.model_executor.layers.fused_moe.layer import FusedMoEModularMethod
2829

2930

3031
class FusedMoEWithLoRA(BaseLayerWithLoRA):
@@ -280,10 +281,9 @@ def wrapper(*args, **kwargs):
280281
self.base_layer, fused_experts.moe_sum
281282
)
282283

283-
self.base_layer.quant_method.old_fused_experts = (
284-
self.base_layer.quant_method.fused_experts
284+
self.base_layer.quant_method = FusedMoEModularMethod(
285+
self.base_layer.quant_method, m_fused_moe_fn
285286
)
286-
self.base_layer.quant_method.fused_experts = m_fused_moe_fn
287287

288288
def create_lora_weights(
289289
self,

0 commit comments

Comments
 (0)