@@ -1611,8 +1611,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
16111611 {
16121612 ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
16131613 if (arch == LLM_ARCH_ERNIE4_5_MOE) {
1614- ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
1615- ml.get_key(LLM_KV_INTERLEAVE_MOE_LAYER_STEP, hparams.n_moe_layer_step);
1614+ ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
1615+ ml.get_key(LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, hparams.n_ff_shexp, false);
1616+ ml.get_key(LLM_KV_INTERLEAVE_MOE_LAYER_STEP, hparams.n_moe_layer_step);
16161617 }
16171618 switch (hparams.n_layer) {
16181619 case 18: type = LLM_TYPE_0_3B; break;
@@ -4787,7 +4788,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
47874788 int n_ff_exp = hparams.n_ff_exp;
47884789
47894790 layer.ffn_gate_inp = create_tensor(tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert}, 0);
4790- layer.ffn_gate_exps = create_tensor(tn(LLM_TENSOR_FFN_GATE_EXPS, "weight", i), {n_embd, n_ff_exp, n_expert}, 0);
4791+ layer.ffn_exp_probs_b = create_tensor(tn(LLM_TENSOR_FFN_EXP_PROBS_B, "bias", i), {n_expert}, TENSOR_NOT_REQUIRED);
4792+ layer.ffn_gate_exps = create_tensor(tn(LLM_TENSOR_FFN_GATE_EXPS, "weight", i), {n_embd, n_ff_exp, n_expert}, TENSOR_NOT_REQUIRED);
47914793 layer.ffn_down_exps = create_tensor(tn(LLM_TENSOR_FFN_DOWN_EXPS, "weight", i), { n_ff_exp, n_embd, n_expert}, 0);
47924794 layer.ffn_up_exps = create_tensor(tn(LLM_TENSOR_FFN_UP_EXPS, "weight", i), {n_embd, n_ff_exp, n_expert}, 0);
47934795
@@ -8433,7 +8435,9 @@ struct llm_build_ernie4_5_moe : public llm_graph_context {
84338435 cb(ffn_inp, "ffn_inp", il);
84348436
84358437 // feed-forward network
8436- if (model.layers[il].ffn_gate_inp == nullptr) {
8438+ bool is_moe_layer = arch == LLM_ARCH_ERNIE4_5_MOE && hparams.n_moe_layer_step > 0 && (il + 1) % hparams.n_moe_layer_step == 0;
8439+
8440+ if (!is_moe_layer) {
84378441 cur = build_norm(ffn_inp,
84388442 model.layers[il].ffn_norm, NULL,
84398443 LLM_NORM_RMS, il);
@@ -8458,7 +8462,7 @@ struct llm_build_ernie4_5_moe : public llm_graph_context {
84588462 model.layers[il].ffn_up_exps,
84598463 model.layers[il].ffn_gate_exps,
84608464 model.layers[il].ffn_down_exps,
8461- nullptr ,
8465+ model.layers[il].ffn_exp_probs_b ,
84628466 n_expert, n_expert_used,
84638467 LLM_FFN_SILU, true,
84648468 false, 0.0,
0 commit comments