@@ -462,7 +462,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
462462 clip_skip,
463463 &chunk_hidden_states2, work_ctx);
464464 // concat
465- chunk_hidden_states = ggml_tensor_concat (work_ctx, chunk_hidden_states1, chunk_hidden_states2, 0 );
465+ chunk_hidden_states = ggml_ext_tensor_concat (work_ctx, chunk_hidden_states1, chunk_hidden_states2, 0 );
466466
467467 if (chunk_idx == 0 ) {
468468 text_model2->compute (n_threads,
@@ -484,18 +484,18 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
484484 LOG_DEBUG (" computing condition graph completed, taking %" PRId64 " ms" , t1 - t0);
485485 ggml_tensor* result = ggml_dup_tensor (work_ctx, chunk_hidden_states);
486486 {
487- float original_mean = ggml_tensor_mean (chunk_hidden_states);
487+ float original_mean = ggml_ext_tensor_mean (chunk_hidden_states);
488488 for (int i2 = 0 ; i2 < chunk_hidden_states->ne [2 ]; i2++) {
489489 for (int i1 = 0 ; i1 < chunk_hidden_states->ne [1 ]; i1++) {
490490 for (int i0 = 0 ; i0 < chunk_hidden_states->ne [0 ]; i0++) {
491- float value = ggml_tensor_get_f32 (chunk_hidden_states, i0, i1, i2);
491+ float value = ggml_ext_tensor_get_f32 (chunk_hidden_states, i0, i1, i2);
492492 value *= chunk_weights[i1];
493- ggml_tensor_set_f32 (result, value, i0, i1, i2);
493+ ggml_ext_tensor_set_f32 (result, value, i0, i1, i2);
494494 }
495495 }
496496 }
497- float new_mean = ggml_tensor_mean (result);
498- ggml_tensor_scale (result, (original_mean / new_mean));
497+ float new_mean = ggml_ext_tensor_mean (result);
498+ ggml_ext_tensor_scale_inplace (result, (original_mean / new_mean));
499499 }
500500 if (zero_out_masked) {
501501 float * vec = (float *)result->data ;
@@ -874,18 +874,18 @@ struct SD3CLIPEmbedder : public Conditioner {
874874 work_ctx);
875875 {
876876 auto tensor = chunk_hidden_states_l;
877- float original_mean = ggml_tensor_mean (tensor);
877+ float original_mean = ggml_ext_tensor_mean (tensor);
878878 for (int i2 = 0 ; i2 < tensor->ne [2 ]; i2++) {
879879 for (int i1 = 0 ; i1 < tensor->ne [1 ]; i1++) {
880880 for (int i0 = 0 ; i0 < tensor->ne [0 ]; i0++) {
881- float value = ggml_tensor_get_f32 (tensor, i0, i1, i2);
881+ float value = ggml_ext_tensor_get_f32 (tensor, i0, i1, i2);
882882 value *= chunk_weights[i1];
883- ggml_tensor_set_f32 (tensor, value, i0, i1, i2);
883+ ggml_ext_tensor_set_f32 (tensor, value, i0, i1, i2);
884884 }
885885 }
886886 }
887- float new_mean = ggml_tensor_mean (tensor);
888- ggml_tensor_scale (tensor, (original_mean / new_mean));
887+ float new_mean = ggml_ext_tensor_mean (tensor);
888+ ggml_ext_tensor_scale_inplace (tensor, (original_mean / new_mean));
889889 }
890890
891891 if (chunk_idx == 0 ) {
@@ -932,18 +932,18 @@ struct SD3CLIPEmbedder : public Conditioner {
932932
933933 {
934934 auto tensor = chunk_hidden_states_g;
935- float original_mean = ggml_tensor_mean (tensor);
935+ float original_mean = ggml_ext_tensor_mean (tensor);
936936 for (int i2 = 0 ; i2 < tensor->ne [2 ]; i2++) {
937937 for (int i1 = 0 ; i1 < tensor->ne [1 ]; i1++) {
938938 for (int i0 = 0 ; i0 < tensor->ne [0 ]; i0++) {
939- float value = ggml_tensor_get_f32 (tensor, i0, i1, i2);
939+ float value = ggml_ext_tensor_get_f32 (tensor, i0, i1, i2);
940940 value *= chunk_weights[i1];
941- ggml_tensor_set_f32 (tensor, value, i0, i1, i2);
941+ ggml_ext_tensor_set_f32 (tensor, value, i0, i1, i2);
942942 }
943943 }
944944 }
945- float new_mean = ggml_tensor_mean (tensor);
946- ggml_tensor_scale (tensor, (original_mean / new_mean));
945+ float new_mean = ggml_ext_tensor_mean (tensor);
946+ ggml_ext_tensor_scale_inplace (tensor, (original_mean / new_mean));
947947 }
948948
949949 if (chunk_idx == 0 ) {
@@ -984,18 +984,18 @@ struct SD3CLIPEmbedder : public Conditioner {
984984 work_ctx);
985985 {
986986 auto tensor = chunk_hidden_states_t5;
987- float original_mean = ggml_tensor_mean (tensor);
987+ float original_mean = ggml_ext_tensor_mean (tensor);
988988 for (int i2 = 0 ; i2 < tensor->ne [2 ]; i2++) {
989989 for (int i1 = 0 ; i1 < tensor->ne [1 ]; i1++) {
990990 for (int i0 = 0 ; i0 < tensor->ne [0 ]; i0++) {
991- float value = ggml_tensor_get_f32 (tensor, i0, i1, i2);
991+ float value = ggml_ext_tensor_get_f32 (tensor, i0, i1, i2);
992992 value *= chunk_weights[i1];
993- ggml_tensor_set_f32 (tensor, value, i0, i1, i2);
993+ ggml_ext_tensor_set_f32 (tensor, value, i0, i1, i2);
994994 }
995995 }
996996 }
997- float new_mean = ggml_tensor_mean (tensor);
998- ggml_tensor_scale (tensor, (original_mean / new_mean));
997+ float new_mean = ggml_ext_tensor_mean (tensor);
998+ ggml_ext_tensor_scale_inplace (tensor, (original_mean / new_mean));
999999 }
10001000 } else {
10011001 chunk_hidden_states_t5 = ggml_new_tensor_2d (work_ctx, GGML_TYPE_F32, 4096 , chunk_len);
@@ -1013,19 +1013,19 @@ struct SD3CLIPEmbedder : public Conditioner {
10131013 for (int i0 = 0 ; i0 < chunk_hidden_states_lg_pad->ne [0 ]; i0++) {
10141014 float value = 0 .f ;
10151015 if (i0 < chunk_hidden_states_l->ne [0 ]) {
1016- value = ggml_tensor_get_f32 (chunk_hidden_states_l, i0, i1, i2);
1016+ value = ggml_ext_tensor_get_f32 (chunk_hidden_states_l, i0, i1, i2);
10171017 } else if (i0 < chunk_hidden_states_l->ne [0 ] + chunk_hidden_states_g->ne [0 ]) {
1018- value = ggml_tensor_get_f32 (chunk_hidden_states_g, i0 - chunk_hidden_states_l->ne [0 ], i1, i2);
1018+ value = ggml_ext_tensor_get_f32 (chunk_hidden_states_g, i0 - chunk_hidden_states_l->ne [0 ], i1, i2);
10191019 }
1020- ggml_tensor_set_f32 (chunk_hidden_states_lg_pad, value, i0, i1, i2);
1020+ ggml_ext_tensor_set_f32 (chunk_hidden_states_lg_pad, value, i0, i1, i2);
10211021 }
10221022 }
10231023 }
10241024
1025- chunk_hidden_states = ggml_tensor_concat (work_ctx, chunk_hidden_states_lg_pad, chunk_hidden_states_t5, 1 ); // [n_token*2, 4096]
1025+ chunk_hidden_states = ggml_ext_tensor_concat (work_ctx, chunk_hidden_states_lg_pad, chunk_hidden_states_t5, 1 ); // [n_token*2, 4096]
10261026
10271027 if (chunk_idx == 0 ) {
1028- pooled = ggml_tensor_concat (work_ctx, pooled_l, pooled_g, 0 ); // [768 + 1280]
1028+ pooled = ggml_ext_tensor_concat (work_ctx, pooled_l, pooled_g, 0 ); // [768 + 1280]
10291029 }
10301030
10311031 int64_t t1 = ggml_time_ms ();
@@ -1269,18 +1269,18 @@ struct FluxCLIPEmbedder : public Conditioner {
12691269 work_ctx);
12701270 {
12711271 auto tensor = chunk_hidden_states;
1272- float original_mean = ggml_tensor_mean (tensor);
1272+ float original_mean = ggml_ext_tensor_mean (tensor);
12731273 for (int i2 = 0 ; i2 < tensor->ne [2 ]; i2++) {
12741274 for (int i1 = 0 ; i1 < tensor->ne [1 ]; i1++) {
12751275 for (int i0 = 0 ; i0 < tensor->ne [0 ]; i0++) {
1276- float value = ggml_tensor_get_f32 (tensor, i0, i1, i2);
1276+ float value = ggml_ext_tensor_get_f32 (tensor, i0, i1, i2);
12771277 value *= chunk_weights[i1];
1278- ggml_tensor_set_f32 (tensor, value, i0, i1, i2);
1278+ ggml_ext_tensor_set_f32 (tensor, value, i0, i1, i2);
12791279 }
12801280 }
12811281 }
1282- float new_mean = ggml_tensor_mean (tensor);
1283- ggml_tensor_scale (tensor, (original_mean / new_mean));
1282+ float new_mean = ggml_ext_tensor_mean (tensor);
1283+ ggml_ext_tensor_scale_inplace (tensor, (original_mean / new_mean));
12841284 }
12851285 } else {
12861286 chunk_hidden_states = ggml_new_tensor_2d (work_ctx, GGML_TYPE_F32, 4096 , chunk_len);
@@ -1483,18 +1483,18 @@ struct T5CLIPEmbedder : public Conditioner {
14831483 work_ctx);
14841484 {
14851485 auto tensor = chunk_hidden_states;
1486- float original_mean = ggml_tensor_mean (tensor);
1486+ float original_mean = ggml_ext_tensor_mean (tensor);
14871487 for (int i2 = 0 ; i2 < tensor->ne [2 ]; i2++) {
14881488 for (int i1 = 0 ; i1 < tensor->ne [1 ]; i1++) {
14891489 for (int i0 = 0 ; i0 < tensor->ne [0 ]; i0++) {
1490- float value = ggml_tensor_get_f32 (tensor, i0, i1, i2);
1490+ float value = ggml_ext_tensor_get_f32 (tensor, i0, i1, i2);
14911491 value *= chunk_weights[i1];
1492- ggml_tensor_set_f32 (tensor, value, i0, i1, i2);
1492+ ggml_ext_tensor_set_f32 (tensor, value, i0, i1, i2);
14931493 }
14941494 }
14951495 }
1496- float new_mean = ggml_tensor_mean (tensor);
1497- ggml_tensor_scale (tensor, (original_mean / new_mean));
1496+ float new_mean = ggml_ext_tensor_mean (tensor);
1497+ ggml_ext_tensor_scale_inplace (tensor, (original_mean / new_mean));
14981498 }
14991499
15001500 int64_t t1 = ggml_time_ms ();
@@ -1505,7 +1505,7 @@ struct T5CLIPEmbedder : public Conditioner {
15051505 for (int i1 = 0 ; i1 < tensor->ne [1 ]; i1++) {
15061506 for (int i0 = 0 ; i0 < tensor->ne [0 ]; i0++) {
15071507 if (chunk_mask[i1] < 0 .f ) {
1508- ggml_tensor_set_f32 (tensor, 0 .f , i0, i1, i2);
1508+ ggml_ext_tensor_set_f32 (tensor, 0 .f , i0, i1, i2);
15091509 }
15101510 }
15111511 }
@@ -1664,7 +1664,7 @@ struct Qwen2_5_VLCLIPEmbedder : public Conditioner {
16641664 image.data = nullptr ;
16651665
16661666 ggml_tensor* image_tensor = ggml_new_tensor_4d (work_ctx, GGML_TYPE_F32, resized_image.width , resized_image.height , 3 , 1 );
1667- sd_image_f32_to_tensor (resized_image, image_tensor, false );
1667+ sd_image_f32_to_ggml_tensor (resized_image, image_tensor, false );
16681668 free (resized_image.data );
16691669 resized_image.data = nullptr ;
16701670
@@ -1709,18 +1709,18 @@ struct Qwen2_5_VLCLIPEmbedder : public Conditioner {
17091709 work_ctx);
17101710 {
17111711 auto tensor = hidden_states;
1712- float original_mean = ggml_tensor_mean (tensor);
1712+ float original_mean = ggml_ext_tensor_mean (tensor);
17131713 for (int i2 = 0 ; i2 < tensor->ne [2 ]; i2++) {
17141714 for (int i1 = 0 ; i1 < tensor->ne [1 ]; i1++) {
17151715 for (int i0 = 0 ; i0 < tensor->ne [0 ]; i0++) {
1716- float value = ggml_tensor_get_f32 (tensor, i0, i1, i2);
1716+ float value = ggml_ext_tensor_get_f32 (tensor, i0, i1, i2);
17171717 value *= weights[i1];
1718- ggml_tensor_set_f32 (tensor, value, i0, i1, i2);
1718+ ggml_ext_tensor_set_f32 (tensor, value, i0, i1, i2);
17191719 }
17201720 }
17211721 }
1722- float new_mean = ggml_tensor_mean (tensor);
1723- ggml_tensor_scale (tensor, (original_mean / new_mean));
1722+ float new_mean = ggml_ext_tensor_mean (tensor);
1723+ ggml_ext_tensor_scale_inplace (tensor, (original_mean / new_mean));
17241724 }
17251725
17261726 GGML_ASSERT (hidden_states->ne [1 ] > prompt_template_encode_start_idx);
@@ -1731,9 +1731,9 @@ struct Qwen2_5_VLCLIPEmbedder : public Conditioner {
17311731 hidden_states->ne [1 ] - prompt_template_encode_start_idx,
17321732 hidden_states->ne [2 ]);
17331733
1734- ggml_tensor_iter (new_hidden_states, [&](ggml_tensor* new_hidden_states, int64_t i0, int64_t i1, int64_t i2, int64_t i3) {
1735- float value = ggml_tensor_get_f32 (hidden_states, i0, i1 + prompt_template_encode_start_idx, i2, i3);
1736- ggml_tensor_set_f32 (new_hidden_states, value, i0, i1, i2, i3);
1734+ ggml_ext_tensor_iter (new_hidden_states, [&](ggml_tensor* new_hidden_states, int64_t i0, int64_t i1, int64_t i2, int64_t i3) {
1735+ float value = ggml_ext_tensor_get_f32 (hidden_states, i0, i1 + prompt_template_encode_start_idx, i2, i3);
1736+ ggml_ext_tensor_set_f32 (new_hidden_states, value, i0, i1, i2, i3);
17371737 });
17381738
17391739 int64_t t1 = ggml_time_ms ();
0 commit comments