@@ -363,6 +363,7 @@ struct clip_model {
363363 // qwen3vl deepstack (multi-level feature fusion)
364364 struct deepstack_merger {
365365 ggml_tensor * norm_w = nullptr ;
366+ ggml_tensor * norm_b = nullptr ;
366367 ggml_tensor * fc1_w = nullptr ;
367368 ggml_tensor * fc1_b = nullptr ;
368369 ggml_tensor * fc2_w = nullptr ;
@@ -660,7 +661,7 @@ struct clip_graph {
660661 return gf;
661662 }
662663
663- // Qwen2VL and Qwen2.5VL use M-RoPE
664+ // Qwen2VL, Qwen2.5VL and Qwen3VL use M-RoPE
664665 ggml_cgraph * build_qwen2vl () {
665666 GGML_ASSERT (model.class_embedding == nullptr );
666667
@@ -913,8 +914,7 @@ struct clip_graph {
913914 LOG_INF (" %s: DeepStack merger %zu weights: norm_w=[%lld], fc1_w=[%lld,%lld], fc2_w=[%lld,%lld]\n " ,
914915 __func__, i, merger.norm_w ->ne [0 ], merger.fc1_w ->ne [0 ], merger.fc1_w ->ne [1 ], merger.fc2_w ->ne [0 ], merger.fc2_w ->ne [1 ]);
915916
916- feat = ggml_rms_norm (ctx0, feat, eps);
917- feat = ggml_mul (ctx0, feat, merger.norm_w );
917+ feat = build_norm (feat, merger.norm_w , merger.norm_b , norm_t , eps, -1 );
918918
919919 feat = ggml_mul_mat (ctx0, merger.fc1_w , feat);
920920 feat = ggml_add (ctx0, feat, merger.fc1_b );
@@ -2851,6 +2851,7 @@ struct clip_model_loader {
28512851 for (size_t i = 0 ; i < hparams.deepstack_layers .size (); i++) {
28522852 auto & merger = model.deepstack_mergers [i];
28532853 merger.norm_w = get_tensor (string_format (" v.deepstack.%d.norm.weight" , (int )i), false );
2854+ merger.norm_b = get_tensor (string_format (" v.deepstack.%d.norm.bias" , (int )i), false );
28542855 merger.fc1_w = get_tensor (string_format (" v.deepstack.%d.fc1.weight" , (int )i), false );
28552856 merger.fc1_b = get_tensor (string_format (" v.deepstack.%d.fc1.bias" , (int )i), false );
28562857 merger.fc2_w = get_tensor (string_format (" v.deepstack.%d.fc2.weight" , (int )i), false );
0 commit comments