Skip to content

Commit 15a8351

Browse files
committed
nano3 import export HF
Signed-off-by: Jennifer Chen <jennifchen@nvidia.com>
1 parent ae78b9f commit 15a8351

File tree

1 file changed

+24
-0
lines changed

1 file changed

+24
-0
lines changed

modelopt/torch/export/plugins/mcore_nemotron.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717
"""Custom mapping from Nemotron Hugging Face models to Megatron Core models."""
1818

1919
from .mcore_custom import (
20+
COL_ETP,
2021
COL_TP,
2122
REPLICATE,
23+
ROW_ETP,
2224
ROW_TP,
2325
CustomModuleMapping,
2426
NameRemapping,
@@ -63,6 +65,20 @@
6365
"pre_mlp_layernorm": NameRemapping("backbone.layers.{}.norm.", REPLICATE),
6466
"linear_fc1": NameRemapping("backbone.layers.{}.mixer.up_proj.", COL_TP),
6567
"linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj.", ROW_TP),
68+
# MoE
69+
"router": NameRemapping("model.layers.{}.mlp.gate.", REPLICATE),
70+
"local_experts.linear_fc1": NameRemapping(
71+
"backbone.layers.{}.mixer.experts.{}.up_proj", COL_ETP
72+
),
73+
"local_experts.linear_fc2": NameRemapping(
74+
"backbone.layers.{}.mixer.experts.{}.down_proj.", ROW_ETP
75+
),
76+
"shared_experts.linear_fc1": NameRemapping(
77+
"backbone.layers.{}.mixer.shared_experts.up_proj.", COL_TP
78+
),
79+
"shared_experts.linear_fc2": NameRemapping(
80+
"backbone.layers.{}.mixer.shared_experts.down_proj.", ROW_TP
81+
),
6682
}
6783

6884

@@ -87,4 +103,12 @@
87103
"pre_mlp_layernorm": NameRemapping("backbone.layers.{}.norm."),
88104
"linear_fc1": NameRemapping("backbone.layers.{}.mixer.up_proj."),
89105
"linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj."),
106+
# MoE
107+
"router": NameRemapping("backbone.layers.{}.mlp.gate."),
108+
"local_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.experts.{}.up_proj."),
109+
"local_experts.linear_fc2": NameRemapping("backbone.layers.{}.mixer.experts.{}.down_proj."),
110+
"shared_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.shared_experts.up_proj."),
111+
"shared_experts.linear_fc2": NameRemapping(
112+
"backbone.layers.{}.mixer.shared_experts.down_proj."
113+
),
90114
}

0 commit comments

Comments
 (0)