Skip to content

Commit 4115aa4

Browse files
ikawrakowIwan Kawrakow
authored andcommitted
Use fused mul - unary op also for MoE models (#111)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
1 parent 2ca7c43 commit 4115aa4

File tree

1 file changed

+18
-16
lines changed

1 file changed

+18
-16
lines changed

src/llama.cpp

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8538,22 +8538,24 @@ static struct ggml_tensor * llm_build_moe_ffn(
85388538
ggml_tensor * gate = llm_build_lora_mm_id(lctx, ctx, gate_exps, cur, selected_experts); // [n_ff, n_expert_used, n_tokens]
85398539
cb(gate, "ffn_moe_gate", il);
85408540

8541-
switch (type_op) {
8542-
case LLM_FFN_SILU:
8543-
{
8544-
gate = ggml_silu(ctx, gate);
8545-
cb(gate, "ffn_moe_silu", il);
8546-
} break;
8547-
case LLM_FFN_GELU:
8548-
{
8549-
gate = ggml_gelu(ctx, gate);
8550-
cb(gate, "ffn_moe_gelu", il);
8551-
} break;
8552-
default:
8553-
GGML_ABORT("fatal error");
8554-
}
8555-
8556-
ggml_tensor * par = ggml_mul(ctx, up, gate); // [n_ff, n_expert_used, n_tokens]
8541+
// This is equivalent to the commented out code below
8542+
ggml_tensor * par = ggml_fused_mul_unary(ctx, gate, up, type_op == LLM_FFN_SILU ? GGML_UNARY_OP_SILU : GGML_UNARY_OP_GELU);
8543+
8544+
//switch (type_op) {
8545+
// case LLM_FFN_SILU:
8546+
// {
8547+
// gate = ggml_silu(ctx, gate);
8548+
// cb(gate, "ffn_moe_silu", il);
8549+
// } break;
8550+
// case LLM_FFN_GELU:
8551+
// {
8552+
// gate = ggml_gelu(ctx, gate);
8553+
// cb(gate, "ffn_moe_gelu", il);
8554+
// } break;
8555+
// default:
8556+
// GGML_ABORT("fatal error");
8557+
//}
8558+
//ggml_tensor * par = ggml_mul(ctx, up, gate); // [n_ff, n_expert_used, n_tokens]
85578559
cb(par, "ffn_moe_gate_par", il);
85588560

85598561
ggml_tensor * experts = llm_build_lora_mm_id(lctx, ctx, down_exps, par, selected_experts); // [n_embd, n_expert_used, n_tokens]

0 commit comments

Comments
 (0)