Skip to content

Commit e45aecb

Browse files
committed
qwen3 deepstack use layernorm
1 parent de0e3d3 commit e45aecb

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

tools/mtmd/clip.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,7 @@ struct clip_model {
363363
// qwen3vl deepstack (multi-level feature fusion)
364364
struct deepstack_merger {
365365
ggml_tensor * norm_w = nullptr;
366+
ggml_tensor * norm_b = nullptr;
366367
ggml_tensor * fc1_w = nullptr;
367368
ggml_tensor * fc1_b = nullptr;
368369
ggml_tensor * fc2_w = nullptr;
@@ -660,7 +661,7 @@ struct clip_graph {
660661
return gf;
661662
}
662663

663-
// Qwen2VL and Qwen2.5VL use M-RoPE
664+
// Qwen2VL, Qwen2.5VL and Qwen3VL use M-RoPE
664665
ggml_cgraph * build_qwen2vl() {
665666
GGML_ASSERT(model.class_embedding == nullptr);
666667

@@ -913,8 +914,7 @@ struct clip_graph {
913914
LOG_INF("%s: DeepStack merger %zu weights: norm_w=[%lld], fc1_w=[%lld,%lld], fc2_w=[%lld,%lld]\n",
914915
__func__, i, merger.norm_w->ne[0], merger.fc1_w->ne[0], merger.fc1_w->ne[1], merger.fc2_w->ne[0], merger.fc2_w->ne[1]);
915916

916-
feat = ggml_rms_norm(ctx0, feat, eps);
917-
feat = ggml_mul(ctx0, feat, merger.norm_w);
917+
feat = build_norm(feat, merger.norm_w, merger.norm_b, norm_t, eps, -1);
918918

919919
feat = ggml_mul_mat(ctx0, merger.fc1_w, feat);
920920
feat = ggml_add(ctx0, feat, merger.fc1_b);
@@ -2851,6 +2851,7 @@ struct clip_model_loader {
28512851
for (size_t i = 0; i < hparams.deepstack_layers.size(); i++) {
28522852
auto & merger = model.deepstack_mergers[i];
28532853
merger.norm_w = get_tensor(string_format("v.deepstack.%d.norm.weight", (int)i), false);
2854+
merger.norm_b = get_tensor(string_format("v.deepstack.%d.norm.bias", (int)i), false);
28542855
merger.fc1_w = get_tensor(string_format("v.deepstack.%d.fc1.weight", (int)i), false);
28552856
merger.fc1_b = get_tensor(string_format("v.deepstack.%d.fc1.bias", (int)i), false);
28562857
merger.fc2_w = get_tensor(string_format("v.deepstack.%d.fc2.weight", (int)i), false);

0 commit comments

Comments
 (0)