Skip to content

Commit b164259

Browse files
authored
chore : fix models indent after refactor (#16992)
1 parent 1f5accb commit b164259

39 files changed

+4244
-4255
lines changed

src/models/gemma2-iswa.cpp

Lines changed: 113 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -1,125 +1,125 @@
11
#include "models.h"
22

33
llm_build_gemma2_iswa::llm_build_gemma2_iswa(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
4-
const int64_t n_embd_head = hparams.n_embd_head_k;
5-
6-
ggml_tensor * cur;
7-
ggml_tensor * inpL;
8-
9-
inpL = build_inp_embd(model.tok_embd);
10-
11-
inpL = ggml_scale(ctx0, inpL, sqrtf(n_embd));
12-
cb(inpL, "inp_scaled", -1);
13-
14-
// inp_pos - contains the positions
15-
ggml_tensor * inp_pos = build_inp_pos();
16-
17-
auto * inp_attn = build_attn_inp_kv_iswa();
18-
19-
ggml_tensor * inp_out_ids = build_inp_out_ids();
20-
21-
for (int il = 0; il < n_layer; ++il) {
22-
// norm
23-
cur = build_norm(inpL,
24-
model.layers[il].attn_norm, NULL,
25-
LLM_NORM_RMS, il);
26-
cb(cur, "attn_norm", il);
27-
28-
// self-attention
29-
{
30-
// compute Q and K and RoPE them
31-
ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur);
32-
cb(Qcur, "Qcur", il);
33-
34-
ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur);
35-
cb(Kcur, "Kcur", il);
36-
37-
ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur);
38-
cb(Vcur, "Vcur", il);
39-
40-
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
41-
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
42-
Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
43-
44-
Qcur = ggml_rope_ext(
45-
ctx0, Qcur, inp_pos, nullptr,
46-
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
47-
ext_factor, attn_factor, beta_fast, beta_slow);
48-
49-
Kcur = ggml_rope_ext(
50-
ctx0, Kcur, inp_pos, nullptr,
51-
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
52-
ext_factor, attn_factor, beta_fast, beta_slow);
53-
54-
cb(Qcur, "Qcur", il);
55-
cb(Kcur, "Kcur", il);
56-
cb(Vcur, "Vcur", il);
57-
58-
Qcur = ggml_scale(ctx0, Qcur, hparams.f_attention_scale);
59-
60-
cur = build_attn(inp_attn,
61-
model.layers[il].wo, NULL,
62-
Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f, il);
63-
}
64-
if (il == n_layer - 1 && inp_out_ids) {
65-
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
66-
inpL = ggml_get_rows(ctx0, inpL, inp_out_ids);
67-
}
68-
cur = build_norm(cur,
69-
model.layers[il].attn_post_norm, NULL,
70-
LLM_NORM_RMS, il);
71-
cb(cur, "attn_post_norm", il);
72-
73-
ggml_tensor * sa_out = ggml_add(ctx0, cur, inpL);
74-
cb(sa_out, "sa_out", il);
75-
76-
cur = build_norm(sa_out,
77-
model.layers[il].ffn_norm, NULL,
78-
LLM_NORM_RMS, il);
79-
cb(cur, "ffn_norm", il);
80-
81-
// feed-forward network
82-
{
83-
cur = build_ffn(cur,
84-
model.layers[il].ffn_up, NULL, NULL,
85-
model.layers[il].ffn_gate, NULL, NULL,
86-
model.layers[il].ffn_down, NULL, NULL,
87-
NULL,
88-
LLM_FFN_GELU, LLM_FFN_PAR, il);
89-
cb(cur, "ffn_out", il);
90-
}
91-
cur = build_norm(cur,
92-
model.layers[il].ffn_post_norm, NULL,
93-
LLM_NORM_RMS, -1);
94-
cb(cur, "ffn_post_norm", -1);
95-
96-
cur = ggml_add(ctx0, cur, sa_out);
97-
98-
cur = build_cvec(cur, il);
99-
cb(cur, "l_out", il);
100-
101-
// input for next layer
102-
inpL = cur;
103-
}
104-
cur = inpL;
4+
const int64_t n_embd_head = hparams.n_embd_head_k;
5+
6+
ggml_tensor * cur;
7+
ggml_tensor * inpL;
8+
9+
inpL = build_inp_embd(model.tok_embd);
10+
11+
inpL = ggml_scale(ctx0, inpL, sqrtf(n_embd));
12+
cb(inpL, "inp_scaled", -1);
13+
14+
// inp_pos - contains the positions
15+
ggml_tensor * inp_pos = build_inp_pos();
16+
17+
auto * inp_attn = build_attn_inp_kv_iswa();
18+
19+
ggml_tensor * inp_out_ids = build_inp_out_ids();
20+
21+
for (int il = 0; il < n_layer; ++il) {
22+
// norm
23+
cur = build_norm(inpL,
24+
model.layers[il].attn_norm, NULL,
25+
LLM_NORM_RMS, il);
26+
cb(cur, "attn_norm", il);
27+
28+
// self-attention
29+
{
30+
// compute Q and K and RoPE them
31+
ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur);
32+
cb(Qcur, "Qcur", il);
33+
34+
ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur);
35+
cb(Kcur, "Kcur", il);
36+
37+
ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur);
38+
cb(Vcur, "Vcur", il);
39+
40+
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
41+
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
42+
Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
10543

44+
Qcur = ggml_rope_ext(
45+
ctx0, Qcur, inp_pos, nullptr,
46+
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
47+
ext_factor, attn_factor, beta_fast, beta_slow);
48+
49+
Kcur = ggml_rope_ext(
50+
ctx0, Kcur, inp_pos, nullptr,
51+
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
52+
ext_factor, attn_factor, beta_fast, beta_slow);
53+
54+
cb(Qcur, "Qcur", il);
55+
cb(Kcur, "Kcur", il);
56+
cb(Vcur, "Vcur", il);
57+
58+
Qcur = ggml_scale(ctx0, Qcur, hparams.f_attention_scale);
59+
60+
cur = build_attn(inp_attn,
61+
model.layers[il].wo, NULL,
62+
Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f, il);
63+
}
64+
if (il == n_layer - 1 && inp_out_ids) {
65+
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
66+
inpL = ggml_get_rows(ctx0, inpL, inp_out_ids);
67+
}
10668
cur = build_norm(cur,
107-
model.output_norm, NULL,
69+
model.layers[il].attn_post_norm, NULL,
70+
LLM_NORM_RMS, il);
71+
cb(cur, "attn_post_norm", il);
72+
73+
ggml_tensor * sa_out = ggml_add(ctx0, cur, inpL);
74+
cb(sa_out, "sa_out", il);
75+
76+
cur = build_norm(sa_out,
77+
model.layers[il].ffn_norm, NULL,
78+
LLM_NORM_RMS, il);
79+
cb(cur, "ffn_norm", il);
80+
81+
// feed-forward network
82+
{
83+
cur = build_ffn(cur,
84+
model.layers[il].ffn_up, NULL, NULL,
85+
model.layers[il].ffn_gate, NULL, NULL,
86+
model.layers[il].ffn_down, NULL, NULL,
87+
NULL,
88+
LLM_FFN_GELU, LLM_FFN_PAR, il);
89+
cb(cur, "ffn_out", il);
90+
}
91+
cur = build_norm(cur,
92+
model.layers[il].ffn_post_norm, NULL,
10893
LLM_NORM_RMS, -1);
94+
cb(cur, "ffn_post_norm", -1);
95+
96+
cur = ggml_add(ctx0, cur, sa_out);
97+
98+
cur = build_cvec(cur, il);
99+
cb(cur, "l_out", il);
100+
101+
// input for next layer
102+
inpL = cur;
103+
}
104+
cur = inpL;
105+
106+
cur = build_norm(cur,
107+
model.output_norm, NULL,
108+
LLM_NORM_RMS, -1);
109109

110-
cb(cur, "result_norm", -1);
111-
res->t_embd = cur;
110+
cb(cur, "result_norm", -1);
111+
res->t_embd = cur;
112112

113-
// lm_head
114-
cur = build_lora_mm(model.output, cur);
113+
// lm_head
114+
cur = build_lora_mm(model.output, cur);
115115

116-
// final logit soft-capping
117-
cur = ggml_scale(ctx0, cur, 1.0f / hparams.f_final_logit_softcapping);
118-
cur = ggml_tanh(ctx0, cur);
119-
cur = ggml_scale(ctx0, cur, hparams.f_final_logit_softcapping);
116+
// final logit soft-capping
117+
cur = ggml_scale(ctx0, cur, 1.0f / hparams.f_final_logit_softcapping);
118+
cur = ggml_tanh(ctx0, cur);
119+
cur = ggml_scale(ctx0, cur, hparams.f_final_logit_softcapping);
120120

121-
cb(cur, "result_output", -1);
122-
res->t_logits = cur;
121+
cb(cur, "result_output", -1);
122+
res->t_logits = cur;
123123

124-
ggml_build_forward_expand(gf, cur);
124+
ggml_build_forward_expand(gf, cur);
125125
}

0 commit comments

Comments
 (0)