Skip to content

Commit 110b78e

Browse files
committed
remove gated mlp
Signed-off-by: jenchen13 <jennifchen@nvidia.com>
1 parent 1a9add6 commit 110b78e

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

modelopt/torch/export/plugins/mcore_nemotron.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,9 @@
6969
"linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj.", ROW_TP),
7070
# MoE
7171
"router": NameRemapping("model.layers.{}.mlp.gate.", REPLICATE),
72-
"local_experts.linear_fc1": GatedMLPMerging("model.layers.{}.mlp.experts.{}.", COL_ETP),
72+
"local_experts.linear_fc1": NameRemapping("model.layers.{}.mlp.experts.{}.up_proj", COL_ETP),
7373
"local_experts.linear_fc2": NameRemapping("model.layers.{}.mlp.experts.{}.down_proj.", ROW_ETP),
74-
"shared_experts.linear_fc1": GatedMLPMerging("model.layers.{}.mlp.shared_experts.", COL_TP),
74+
"shared_experts.linear_fc1": NameRemapping("model.layers.{}.mlp.shared_experts.up_proj.", COL_TP),
7575
"shared_experts.linear_fc2": NameRemapping(
7676
"model.layers.{}.mlp.shared_experts.down_proj.", ROW_TP
7777
),
@@ -102,9 +102,9 @@
102102
"linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj."),
103103
# MoE
104104
"router": NameRemapping("model.layers.{}.mlp.gate."),
105-
"local_experts.linear_fc1": GatedMLPSlicing("model.layers.{}.mlp.experts.{}."),
105+
"local_experts.linear_fc1": NameRemapping("model.layers.{}.mlp.experts.{}.up_proj."),
106106
"local_experts.linear_fc2": NameRemapping("model.layers.{}.mlp.experts.{}.down_proj."),
107-
"shared_experts.linear_fc1": GatedMLPSlicing("model.layers.{}.mlp.shared_experts."),
107+
"shared_experts.linear_fc1": NameRemapping("model.layers.{}.mlp.shared_experts.up_proj."),
108108
"shared_experts.linear_fc2": NameRemapping("model.layers.{}.mlp.shared_experts.down_proj."),
109109

110110
}

0 commit comments

Comments
 (0)