|
66 | 66 | "linear_fc1": NameRemapping("backbone.layers.{}.mixer.up_proj.", COL_TP), |
67 | 67 | "linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj.", ROW_TP), |
68 | 68 | # MoE |
69 | | - "router": NameRemapping("model.layers.{}.mlp.gate.", REPLICATE), |
| 69 | + "router": NameRemapping( |
| 70 | + "backbone.layers.{}.mixer.gate.", {"mapping": {"expert_bias": "e_score_correction_bias"}} |
| 71 | + ), |
70 | 72 | "local_experts.linear_fc1": NameRemapping( |
71 | | - "backbone.layers.{}.mixer.experts.{}.up_proj", COL_ETP |
| 73 | + "backbone.layers.{}.mixer.experts.{}.up_proj.", COL_ETP |
72 | 74 | ), |
73 | 75 | "local_experts.linear_fc2": NameRemapping( |
74 | 76 | "backbone.layers.{}.mixer.experts.{}.down_proj.", ROW_ETP |
|
104 | 106 | "linear_fc1": NameRemapping("backbone.layers.{}.mixer.up_proj."), |
105 | 107 | "linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj."), |
106 | 108 | # MoE |
107 | | - "router": NameRemapping("backbone.layers.{}.mlp.gate."), |
| 109 | + "router": NameRemapping( |
| 110 | + "backbone.layers.{}.mixer.gate.", {"mapping": {"expert_bias": "e_score_correction_bias"}} |
| 111 | + ), |
108 | 112 | "local_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.experts.{}.up_proj."), |
109 | 113 | "local_experts.linear_fc2": NameRemapping("backbone.layers.{}.mixer.experts.{}.down_proj."), |
110 | 114 | "shared_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.shared_experts.up_proj."), |
|
0 commit comments