|
17 | 17 | """Custom mapping from Nemotron Hugging Face models to Megatron Core models.""" |
18 | 18 |
|
19 | 19 | from .mcore_custom import ( |
| 20 | + COL_ETP, |
20 | 21 | COL_TP, |
21 | 22 | REPLICATE, |
| 23 | + ROW_ETP, |
22 | 24 | ROW_TP, |
23 | 25 | CustomModuleMapping, |
24 | 26 | NameRemapping, |
|
63 | 65 | "pre_mlp_layernorm": NameRemapping("backbone.layers.{}.norm.", REPLICATE), |
64 | 66 | "linear_fc1": NameRemapping("backbone.layers.{}.mixer.up_proj.", COL_TP), |
65 | 67 | "linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj.", ROW_TP), |
| 68 | + # MoE |
| 69 | + "router": NameRemapping("model.layers.{}.mlp.gate.", REPLICATE), |
| 70 | + "local_experts.linear_fc1": NameRemapping( |
| 71 | + "backbone.layers.{}.mixer.experts.{}.up_proj", COL_ETP |
| 72 | + ), |
| 73 | + "local_experts.linear_fc2": NameRemapping( |
| 74 | + "backbone.layers.{}.mixer.experts.{}.down_proj.", ROW_ETP |
| 75 | + ), |
| 76 | + "shared_experts.linear_fc1": NameRemapping( |
| 77 | + "backbone.layers.{}.mixer.shared_experts.up_proj.", COL_TP |
| 78 | + ), |
| 79 | + "shared_experts.linear_fc2": NameRemapping( |
| 80 | + "backbone.layers.{}.mixer.shared_experts.down_proj.", ROW_TP |
| 81 | + ), |
66 | 82 | } |
67 | 83 |
|
68 | 84 |
|
|
87 | 103 | "pre_mlp_layernorm": NameRemapping("backbone.layers.{}.norm."), |
88 | 104 | "linear_fc1": NameRemapping("backbone.layers.{}.mixer.up_proj."), |
89 | 105 | "linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj."), |
| 106 | + # MoE |
| 107 | + "router": NameRemapping("backbone.layers.{}.mlp.gate."), |
| 108 | + "local_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.experts.{}.up_proj."), |
| 109 | + "local_experts.linear_fc2": NameRemapping("backbone.layers.{}.mixer.experts.{}.down_proj."), |
| 110 | + "shared_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.shared_experts.up_proj."), |
| 111 | + "shared_experts.linear_fc2": NameRemapping( |
| 112 | + "backbone.layers.{}.mixer.shared_experts.down_proj." |
| 113 | + ), |
90 | 114 | } |
0 commit comments