remove gated mlp

jenchen13 · jenchen13 · commit 110b78ef0a87 · 2025-10-20T07:58:43.000-07:00
Signed-off-by: jenchen13 &lt;jennifchen@nvidia.com&gt;
diff --git a/modelopt/torch/export/plugins/mcore_nemotron.py b/modelopt/torch/export/plugins/mcore_nemotron.py
@@ -69,9 +69,9 @@
     "linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj.", ROW_TP),
     # MoE
     "router": NameRemapping("model.layers.{}.mlp.gate.", REPLICATE),
-    "local_experts.linear_fc1": GatedMLPMerging("model.layers.{}.mlp.experts.{}.", COL_ETP),
+    "local_experts.linear_fc1": NameRemapping("model.layers.{}.mlp.experts.{}.up_proj", COL_ETP),
     "local_experts.linear_fc2": NameRemapping("model.layers.{}.mlp.experts.{}.down_proj.", ROW_ETP),
-    "shared_experts.linear_fc1": GatedMLPMerging("model.layers.{}.mlp.shared_experts.", COL_TP),
+    "shared_experts.linear_fc1": NameRemapping("model.layers.{}.mlp.shared_experts.up_proj.", COL_TP),
     "shared_experts.linear_fc2": NameRemapping(
         "model.layers.{}.mlp.shared_experts.down_proj.", ROW_TP
     ),
@@ -102,9 +102,9 @@
     "linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj."),
     # MoE
     "router": NameRemapping("model.layers.{}.mlp.gate."),
-    "local_experts.linear_fc1": GatedMLPSlicing("model.layers.{}.mlp.experts.{}."),
+    "local_experts.linear_fc1": NameRemapping("model.layers.{}.mlp.experts.{}.up_proj."),
     "local_experts.linear_fc2": NameRemapping("model.layers.{}.mlp.experts.{}.down_proj."),
-    "shared_experts.linear_fc1": GatedMLPSlicing("model.layers.{}.mlp.shared_experts."),
+    "shared_experts.linear_fc1": NameRemapping("model.layers.{}.mlp.shared_experts.up_proj."),
     "shared_experts.linear_fc2": NameRemapping("model.layers.{}.mlp.shared_experts.down_proj."),
 
 }