Skip to content

Commit

Permalink
ggml : fix tests on Arm + do not use BLAS for F16 data
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Oct 16, 2023
1 parent d8539f3 commit 53f805e
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 8 deletions.
2 changes: 2 additions & 0 deletions src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -11645,6 +11645,8 @@ static bool ggml_compute_forward_mul_mat_use_blas(
// TODO: find the optimal values for these
if (ggml_is_contiguous(src0) &&
ggml_is_contiguous(src1) &&
src0->type == GGML_TYPE_F32 &&
src1->type == GGML_TYPE_F32 &&
(ne0 >= 32 && ne1 >= 32 && ne10 >= 32)) {

/*printf("BLAS: %d %d %d %d %d\n", ne0, ne1, ne10, ne00, ne01);*/
Expand Down
8 changes: 4 additions & 4 deletions tests/test-conv1d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ void load_model(test_model & model, bool use_gpu = false) {
}

// Convert adata to fp16 format
uint16_t* hadata = new uint16_t[K * IC * OC];
ggml_fp32_to_fp16_row(adata, hadata, K * IC * OC);
std::vector<ggml_fp16_t> hadata(K * IC * OC);
ggml_fp32_to_fp16_row(adata, hadata.data(), K * IC * OC);

// Initialize bdata
float* bdata = new float[IL * IC * N];
Expand Down Expand Up @@ -111,9 +111,9 @@ void load_model(test_model & model, bool use_gpu = false) {

// load data to buffer
if(ggml_backend_is_cpu(model.backend)) {
memcpy(model.a->data, hadata, ggml_nbytes(model.a));
memcpy(model.a->data, hadata.data(), ggml_nbytes(model.a));
} else {
ggml_backend_tensor_set(model.a, hadata, 0, ggml_nbytes(model.a));
ggml_backend_tensor_set(model.a, hadata.data(), 0, ggml_nbytes(model.a));
}

// alloc memory
Expand Down
8 changes: 4 additions & 4 deletions tests/test-conv2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ void load_model(test_model & model, bool use_gpu = false) {
}

// Convert adata to fp16 format
uint16_t* hadata = new uint16_t[KW * KH * IC * OC];
ggml_fp32_to_fp16_row(adata, hadata, KW * KH * IC * OC);
std::vector<ggml_fp16_t> hadata(KW * KH * IC * OC);
ggml_fp32_to_fp16_row(adata, hadata.data(), KW * KH * IC * OC);

// Initialize bdata
float* bdata = new float[IW * IH * IC * N];
Expand Down Expand Up @@ -111,9 +111,9 @@ void load_model(test_model & model, bool use_gpu = false) {

// load data to buffer
if(ggml_backend_is_cpu(model.backend)) {
memcpy(model.a->data, hadata, ggml_nbytes(model.a));
memcpy(model.a->data, hadata.data(), ggml_nbytes(model.a));
} else {
ggml_backend_tensor_set(model.a, hadata, 0, ggml_nbytes(model.a));
ggml_backend_tensor_set(model.a, hadata.data(), 0, ggml_nbytes(model.a));
}

// alloc memory
Expand Down

0 comments on commit 53f805e

Please sign in to comment.