|
69 | 69 | "linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj.", ROW_TP), |
70 | 70 | # MoE |
71 | 71 | "router": NameRemapping("model.layers.{}.mlp.gate.", REPLICATE), |
72 | | - "local_experts.linear_fc1": GatedMLPMerging("model.layers.{}.mlp.experts.{}.", COL_ETP), |
| 72 | + "local_experts.linear_fc1": NameRemapping("model.layers.{}.mlp.experts.{}.up_proj", COL_ETP), |
73 | 73 | "local_experts.linear_fc2": NameRemapping("model.layers.{}.mlp.experts.{}.down_proj.", ROW_ETP), |
74 | | - "shared_experts.linear_fc1": GatedMLPMerging("model.layers.{}.mlp.shared_experts.", COL_TP), |
| 74 | + "shared_experts.linear_fc1": NameRemapping("model.layers.{}.mlp.shared_experts.up_proj.", COL_TP), |
75 | 75 | "shared_experts.linear_fc2": NameRemapping( |
76 | 76 | "model.layers.{}.mlp.shared_experts.down_proj.", ROW_TP |
77 | 77 | ), |
|
102 | 102 | "linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj."), |
103 | 103 | # MoE |
104 | 104 | "router": NameRemapping("model.layers.{}.mlp.gate."), |
105 | | - "local_experts.linear_fc1": GatedMLPSlicing("model.layers.{}.mlp.experts.{}."), |
| 105 | + "local_experts.linear_fc1": NameRemapping("model.layers.{}.mlp.experts.{}.up_proj."), |
106 | 106 | "local_experts.linear_fc2": NameRemapping("model.layers.{}.mlp.experts.{}.down_proj."), |
107 | | - "shared_experts.linear_fc1": GatedMLPSlicing("model.layers.{}.mlp.shared_experts."), |
| 107 | + "shared_experts.linear_fc1": NameRemapping("model.layers.{}.mlp.shared_experts.up_proj."), |
108 | 108 | "shared_experts.linear_fc2": NameRemapping("model.layers.{}.mlp.shared_experts.down_proj."), |
109 | 109 |
|
110 | 110 | } |
0 commit comments