Skip to content

Commit dd75fc0

Browse files
authored
refactor: unify the naming style of ggml extension functions (#921)
1 parent 77eb95f commit dd75fc0

20 files changed

+600
-601
lines changed

clip.hpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -641,10 +641,10 @@ class CLIPVisionEmbeddings : public GGMLBlock {
641641
// concat(patch_embedding, class_embedding) + position_embedding
642642
struct ggml_tensor* patch_embedding;
643643
int64_t N = pixel_values->ne[3];
644-
patch_embedding = ggml_nn_conv_2d(ctx, pixel_values, patch_embed_weight, nullptr, patch_size, patch_size); // [N, embed_dim, image_size // pacht_size, image_size // pacht_size]
645-
patch_embedding = ggml_reshape_3d(ctx, patch_embedding, num_patches, embed_dim, N); // [N, embed_dim, num_patches]
646-
patch_embedding = ggml_cont(ctx, ggml_permute(ctx, patch_embedding, 1, 0, 2, 3)); // [N, num_patches, embed_dim]
647-
patch_embedding = ggml_reshape_4d(ctx, patch_embedding, 1, embed_dim, num_patches, N); // [N, num_patches, embed_dim, 1]
644+
patch_embedding = ggml_ext_conv_2d(ctx, pixel_values, patch_embed_weight, nullptr, patch_size, patch_size); // [N, embed_dim, image_size // pacht_size, image_size // pacht_size]
645+
patch_embedding = ggml_reshape_3d(ctx, patch_embedding, num_patches, embed_dim, N); // [N, embed_dim, num_patches]
646+
patch_embedding = ggml_cont(ctx, ggml_permute(ctx, patch_embedding, 1, 0, 2, 3)); // [N, num_patches, embed_dim]
647+
patch_embedding = ggml_reshape_4d(ctx, patch_embedding, 1, embed_dim, num_patches, N); // [N, num_patches, embed_dim, 1]
648648

649649
struct ggml_tensor* class_embedding = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, embed_dim, N);
650650
class_embedding = ggml_repeat(ctx, class_embed_weight, class_embedding); // [N, embed_dim]
@@ -736,7 +736,7 @@ class CLIPTextModel : public GGMLBlock {
736736
auto text_projection = params["text_projection"];
737737
ggml_tensor* pooled = ggml_view_1d(ctx, x, hidden_size, x->nb[1] * max_token_idx);
738738
if (text_projection != nullptr) {
739-
pooled = ggml_nn_linear(ctx, pooled, text_projection, nullptr);
739+
pooled = ggml_ext_linear(ctx, pooled, text_projection, nullptr);
740740
} else {
741741
LOG_DEBUG("identity projection");
742742
}
@@ -836,7 +836,7 @@ class CLIPProjection : public UnaryBlock {
836836
if (transpose_weight) {
837837
w = ggml_cont(ctx, ggml_transpose(ctx, w));
838838
}
839-
return ggml_nn_linear(ctx, x, w, nullptr);
839+
return ggml_ext_linear(ctx, x, w, nullptr);
840840
}
841841
};
842842

common.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,8 @@ class GEGLU : public UnaryBlock {
205205
auto gate_b = ggml_view_1d(ctx, b, b->ne[0] / 2, b->nb[0] * b->ne[0] / 2); // [dim_out, ]
206206

207207
auto x_in = x;
208-
x = ggml_nn_linear(ctx, x_in, x_w, x_b); // [ne3, ne2, ne1, dim_out]
209-
auto gate = ggml_nn_linear(ctx, x_in, gate_w, gate_b); // [ne3, ne2, ne1, dim_out]
208+
x = ggml_ext_linear(ctx, x_in, x_w, x_b); // [ne3, ne2, ne1, dim_out]
209+
auto gate = ggml_ext_linear(ctx, x_in, gate_w, gate_b); // [ne3, ne2, ne1, dim_out]
210210

211211
gate = ggml_gelu_inplace(ctx, gate);
212212

@@ -325,7 +325,7 @@ class CrossAttention : public GGMLBlock {
325325
auto k = to_k->forward(ctx, context); // [N, n_context, inner_dim]
326326
auto v = to_v->forward(ctx, context); // [N, n_context, inner_dim]
327327

328-
x = ggml_nn_attention_ext(ctx, backend, q, k, v, n_head, nullptr, false, false, flash_attn); // [N, n_token, inner_dim]
328+
x = ggml_ext_attention_ext(ctx, backend, q, k, v, n_head, nullptr, false, false, flash_attn); // [N, n_token, inner_dim]
329329

330330
x = to_out_0->forward(ctx, x); // [N, n_token, query_dim]
331331
return x;
@@ -492,7 +492,7 @@ class AlphaBlender : public GGMLBlock {
492492
float get_alpha() {
493493
// image_only_indicator is always tensor([0.]) and since mix_factor.shape is [1,]
494494
// so learned_with_images is same as learned
495-
float alpha = ggml_backend_tensor_get_f32(params["mix_factor"]);
495+
float alpha = ggml_ext_backend_tensor_get_f32(params["mix_factor"]);
496496
return sigmoid(alpha);
497497
}
498498

conditioner.hpp

Lines changed: 46 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
462462
clip_skip,
463463
&chunk_hidden_states2, work_ctx);
464464
// concat
465-
chunk_hidden_states = ggml_tensor_concat(work_ctx, chunk_hidden_states1, chunk_hidden_states2, 0);
465+
chunk_hidden_states = ggml_ext_tensor_concat(work_ctx, chunk_hidden_states1, chunk_hidden_states2, 0);
466466

467467
if (chunk_idx == 0) {
468468
text_model2->compute(n_threads,
@@ -484,18 +484,18 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
484484
LOG_DEBUG("computing condition graph completed, taking %" PRId64 " ms", t1 - t0);
485485
ggml_tensor* result = ggml_dup_tensor(work_ctx, chunk_hidden_states);
486486
{
487-
float original_mean = ggml_tensor_mean(chunk_hidden_states);
487+
float original_mean = ggml_ext_tensor_mean(chunk_hidden_states);
488488
for (int i2 = 0; i2 < chunk_hidden_states->ne[2]; i2++) {
489489
for (int i1 = 0; i1 < chunk_hidden_states->ne[1]; i1++) {
490490
for (int i0 = 0; i0 < chunk_hidden_states->ne[0]; i0++) {
491-
float value = ggml_tensor_get_f32(chunk_hidden_states, i0, i1, i2);
491+
float value = ggml_ext_tensor_get_f32(chunk_hidden_states, i0, i1, i2);
492492
value *= chunk_weights[i1];
493-
ggml_tensor_set_f32(result, value, i0, i1, i2);
493+
ggml_ext_tensor_set_f32(result, value, i0, i1, i2);
494494
}
495495
}
496496
}
497-
float new_mean = ggml_tensor_mean(result);
498-
ggml_tensor_scale(result, (original_mean / new_mean));
497+
float new_mean = ggml_ext_tensor_mean(result);
498+
ggml_ext_tensor_scale_inplace(result, (original_mean / new_mean));
499499
}
500500
if (zero_out_masked) {
501501
float* vec = (float*)result->data;
@@ -874,18 +874,18 @@ struct SD3CLIPEmbedder : public Conditioner {
874874
work_ctx);
875875
{
876876
auto tensor = chunk_hidden_states_l;
877-
float original_mean = ggml_tensor_mean(tensor);
877+
float original_mean = ggml_ext_tensor_mean(tensor);
878878
for (int i2 = 0; i2 < tensor->ne[2]; i2++) {
879879
for (int i1 = 0; i1 < tensor->ne[1]; i1++) {
880880
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
881-
float value = ggml_tensor_get_f32(tensor, i0, i1, i2);
881+
float value = ggml_ext_tensor_get_f32(tensor, i0, i1, i2);
882882
value *= chunk_weights[i1];
883-
ggml_tensor_set_f32(tensor, value, i0, i1, i2);
883+
ggml_ext_tensor_set_f32(tensor, value, i0, i1, i2);
884884
}
885885
}
886886
}
887-
float new_mean = ggml_tensor_mean(tensor);
888-
ggml_tensor_scale(tensor, (original_mean / new_mean));
887+
float new_mean = ggml_ext_tensor_mean(tensor);
888+
ggml_ext_tensor_scale_inplace(tensor, (original_mean / new_mean));
889889
}
890890

891891
if (chunk_idx == 0) {
@@ -932,18 +932,18 @@ struct SD3CLIPEmbedder : public Conditioner {
932932

933933
{
934934
auto tensor = chunk_hidden_states_g;
935-
float original_mean = ggml_tensor_mean(tensor);
935+
float original_mean = ggml_ext_tensor_mean(tensor);
936936
for (int i2 = 0; i2 < tensor->ne[2]; i2++) {
937937
for (int i1 = 0; i1 < tensor->ne[1]; i1++) {
938938
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
939-
float value = ggml_tensor_get_f32(tensor, i0, i1, i2);
939+
float value = ggml_ext_tensor_get_f32(tensor, i0, i1, i2);
940940
value *= chunk_weights[i1];
941-
ggml_tensor_set_f32(tensor, value, i0, i1, i2);
941+
ggml_ext_tensor_set_f32(tensor, value, i0, i1, i2);
942942
}
943943
}
944944
}
945-
float new_mean = ggml_tensor_mean(tensor);
946-
ggml_tensor_scale(tensor, (original_mean / new_mean));
945+
float new_mean = ggml_ext_tensor_mean(tensor);
946+
ggml_ext_tensor_scale_inplace(tensor, (original_mean / new_mean));
947947
}
948948

949949
if (chunk_idx == 0) {
@@ -984,18 +984,18 @@ struct SD3CLIPEmbedder : public Conditioner {
984984
work_ctx);
985985
{
986986
auto tensor = chunk_hidden_states_t5;
987-
float original_mean = ggml_tensor_mean(tensor);
987+
float original_mean = ggml_ext_tensor_mean(tensor);
988988
for (int i2 = 0; i2 < tensor->ne[2]; i2++) {
989989
for (int i1 = 0; i1 < tensor->ne[1]; i1++) {
990990
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
991-
float value = ggml_tensor_get_f32(tensor, i0, i1, i2);
991+
float value = ggml_ext_tensor_get_f32(tensor, i0, i1, i2);
992992
value *= chunk_weights[i1];
993-
ggml_tensor_set_f32(tensor, value, i0, i1, i2);
993+
ggml_ext_tensor_set_f32(tensor, value, i0, i1, i2);
994994
}
995995
}
996996
}
997-
float new_mean = ggml_tensor_mean(tensor);
998-
ggml_tensor_scale(tensor, (original_mean / new_mean));
997+
float new_mean = ggml_ext_tensor_mean(tensor);
998+
ggml_ext_tensor_scale_inplace(tensor, (original_mean / new_mean));
999999
}
10001000
} else {
10011001
chunk_hidden_states_t5 = ggml_new_tensor_2d(work_ctx, GGML_TYPE_F32, 4096, chunk_len);
@@ -1013,19 +1013,19 @@ struct SD3CLIPEmbedder : public Conditioner {
10131013
for (int i0 = 0; i0 < chunk_hidden_states_lg_pad->ne[0]; i0++) {
10141014
float value = 0.f;
10151015
if (i0 < chunk_hidden_states_l->ne[0]) {
1016-
value = ggml_tensor_get_f32(chunk_hidden_states_l, i0, i1, i2);
1016+
value = ggml_ext_tensor_get_f32(chunk_hidden_states_l, i0, i1, i2);
10171017
} else if (i0 < chunk_hidden_states_l->ne[0] + chunk_hidden_states_g->ne[0]) {
1018-
value = ggml_tensor_get_f32(chunk_hidden_states_g, i0 - chunk_hidden_states_l->ne[0], i1, i2);
1018+
value = ggml_ext_tensor_get_f32(chunk_hidden_states_g, i0 - chunk_hidden_states_l->ne[0], i1, i2);
10191019
}
1020-
ggml_tensor_set_f32(chunk_hidden_states_lg_pad, value, i0, i1, i2);
1020+
ggml_ext_tensor_set_f32(chunk_hidden_states_lg_pad, value, i0, i1, i2);
10211021
}
10221022
}
10231023
}
10241024

1025-
chunk_hidden_states = ggml_tensor_concat(work_ctx, chunk_hidden_states_lg_pad, chunk_hidden_states_t5, 1); // [n_token*2, 4096]
1025+
chunk_hidden_states = ggml_ext_tensor_concat(work_ctx, chunk_hidden_states_lg_pad, chunk_hidden_states_t5, 1); // [n_token*2, 4096]
10261026

10271027
if (chunk_idx == 0) {
1028-
pooled = ggml_tensor_concat(work_ctx, pooled_l, pooled_g, 0); // [768 + 1280]
1028+
pooled = ggml_ext_tensor_concat(work_ctx, pooled_l, pooled_g, 0); // [768 + 1280]
10291029
}
10301030

10311031
int64_t t1 = ggml_time_ms();
@@ -1269,18 +1269,18 @@ struct FluxCLIPEmbedder : public Conditioner {
12691269
work_ctx);
12701270
{
12711271
auto tensor = chunk_hidden_states;
1272-
float original_mean = ggml_tensor_mean(tensor);
1272+
float original_mean = ggml_ext_tensor_mean(tensor);
12731273
for (int i2 = 0; i2 < tensor->ne[2]; i2++) {
12741274
for (int i1 = 0; i1 < tensor->ne[1]; i1++) {
12751275
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
1276-
float value = ggml_tensor_get_f32(tensor, i0, i1, i2);
1276+
float value = ggml_ext_tensor_get_f32(tensor, i0, i1, i2);
12771277
value *= chunk_weights[i1];
1278-
ggml_tensor_set_f32(tensor, value, i0, i1, i2);
1278+
ggml_ext_tensor_set_f32(tensor, value, i0, i1, i2);
12791279
}
12801280
}
12811281
}
1282-
float new_mean = ggml_tensor_mean(tensor);
1283-
ggml_tensor_scale(tensor, (original_mean / new_mean));
1282+
float new_mean = ggml_ext_tensor_mean(tensor);
1283+
ggml_ext_tensor_scale_inplace(tensor, (original_mean / new_mean));
12841284
}
12851285
} else {
12861286
chunk_hidden_states = ggml_new_tensor_2d(work_ctx, GGML_TYPE_F32, 4096, chunk_len);
@@ -1483,18 +1483,18 @@ struct T5CLIPEmbedder : public Conditioner {
14831483
work_ctx);
14841484
{
14851485
auto tensor = chunk_hidden_states;
1486-
float original_mean = ggml_tensor_mean(tensor);
1486+
float original_mean = ggml_ext_tensor_mean(tensor);
14871487
for (int i2 = 0; i2 < tensor->ne[2]; i2++) {
14881488
for (int i1 = 0; i1 < tensor->ne[1]; i1++) {
14891489
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
1490-
float value = ggml_tensor_get_f32(tensor, i0, i1, i2);
1490+
float value = ggml_ext_tensor_get_f32(tensor, i0, i1, i2);
14911491
value *= chunk_weights[i1];
1492-
ggml_tensor_set_f32(tensor, value, i0, i1, i2);
1492+
ggml_ext_tensor_set_f32(tensor, value, i0, i1, i2);
14931493
}
14941494
}
14951495
}
1496-
float new_mean = ggml_tensor_mean(tensor);
1497-
ggml_tensor_scale(tensor, (original_mean / new_mean));
1496+
float new_mean = ggml_ext_tensor_mean(tensor);
1497+
ggml_ext_tensor_scale_inplace(tensor, (original_mean / new_mean));
14981498
}
14991499

15001500
int64_t t1 = ggml_time_ms();
@@ -1505,7 +1505,7 @@ struct T5CLIPEmbedder : public Conditioner {
15051505
for (int i1 = 0; i1 < tensor->ne[1]; i1++) {
15061506
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
15071507
if (chunk_mask[i1] < 0.f) {
1508-
ggml_tensor_set_f32(tensor, 0.f, i0, i1, i2);
1508+
ggml_ext_tensor_set_f32(tensor, 0.f, i0, i1, i2);
15091509
}
15101510
}
15111511
}
@@ -1664,7 +1664,7 @@ struct Qwen2_5_VLCLIPEmbedder : public Conditioner {
16641664
image.data = nullptr;
16651665

16661666
ggml_tensor* image_tensor = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, resized_image.width, resized_image.height, 3, 1);
1667-
sd_image_f32_to_tensor(resized_image, image_tensor, false);
1667+
sd_image_f32_to_ggml_tensor(resized_image, image_tensor, false);
16681668
free(resized_image.data);
16691669
resized_image.data = nullptr;
16701670

@@ -1709,18 +1709,18 @@ struct Qwen2_5_VLCLIPEmbedder : public Conditioner {
17091709
work_ctx);
17101710
{
17111711
auto tensor = hidden_states;
1712-
float original_mean = ggml_tensor_mean(tensor);
1712+
float original_mean = ggml_ext_tensor_mean(tensor);
17131713
for (int i2 = 0; i2 < tensor->ne[2]; i2++) {
17141714
for (int i1 = 0; i1 < tensor->ne[1]; i1++) {
17151715
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
1716-
float value = ggml_tensor_get_f32(tensor, i0, i1, i2);
1716+
float value = ggml_ext_tensor_get_f32(tensor, i0, i1, i2);
17171717
value *= weights[i1];
1718-
ggml_tensor_set_f32(tensor, value, i0, i1, i2);
1718+
ggml_ext_tensor_set_f32(tensor, value, i0, i1, i2);
17191719
}
17201720
}
17211721
}
1722-
float new_mean = ggml_tensor_mean(tensor);
1723-
ggml_tensor_scale(tensor, (original_mean / new_mean));
1722+
float new_mean = ggml_ext_tensor_mean(tensor);
1723+
ggml_ext_tensor_scale_inplace(tensor, (original_mean / new_mean));
17241724
}
17251725

17261726
GGML_ASSERT(hidden_states->ne[1] > prompt_template_encode_start_idx);
@@ -1731,9 +1731,9 @@ struct Qwen2_5_VLCLIPEmbedder : public Conditioner {
17311731
hidden_states->ne[1] - prompt_template_encode_start_idx,
17321732
hidden_states->ne[2]);
17331733

1734-
ggml_tensor_iter(new_hidden_states, [&](ggml_tensor* new_hidden_states, int64_t i0, int64_t i1, int64_t i2, int64_t i3) {
1735-
float value = ggml_tensor_get_f32(hidden_states, i0, i1 + prompt_template_encode_start_idx, i2, i3);
1736-
ggml_tensor_set_f32(new_hidden_states, value, i0, i1, i2, i3);
1734+
ggml_ext_tensor_iter(new_hidden_states, [&](ggml_tensor* new_hidden_states, int64_t i0, int64_t i1, int64_t i2, int64_t i3) {
1735+
float value = ggml_ext_tensor_get_f32(hidden_states, i0, i1 + prompt_template_encode_start_idx, i2, i3);
1736+
ggml_ext_tensor_set_f32(new_hidden_states, value, i0, i1, i2, i3);
17371737
});
17381738

17391739
int64_t t1 = ggml_time_ms();

control.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ class ControlNetBlock : public GGMLBlock {
230230

231231
auto middle_block_out = std::dynamic_pointer_cast<Conv2d>(blocks["middle_block_out.0"]);
232232

233-
auto t_emb = ggml_nn_timestep_embedding(ctx, timesteps, model_channels); // [N, model_channels]
233+
auto t_emb = ggml_ext_timestep_embedding(ctx, timesteps, model_channels); // [N, model_channels]
234234

235235
auto emb = time_embed_0->forward(ctx, t_emb);
236236
emb = ggml_silu_inplace(ctx, emb);

0 commit comments

Comments
 (0)