From 484f6e94381c0f88b9162abc9fe9ef7a49e387a9 Mon Sep 17 00:00:00 2001 From: Yaohui Liu Date: Sat, 20 May 2023 00:32:08 +0800 Subject: [PATCH 1/2] llama: initialize f16 tables in quantize c api. --- examples/quantize/quantize.cpp | 7 ------- llama.cpp | 6 ++++++ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index 115d8fb1ba36b..7c991c8d56e0f 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -52,13 +52,6 @@ int main(int argc, char ** argv) { return 1; } - // needed to initialize f16 tables - { - struct ggml_init_params params = { 0, NULL, false }; - struct ggml_context * ctx = ggml_init(params); - ggml_free(ctx); - } - // parse command line arguments const std::string fname_inp = argv[1]; std::string fname_out; diff --git a/llama.cpp b/llama.cpp index 1f9d3784415ec..7b599c173c6f1 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2198,6 +2198,12 @@ int llama_model_quantize( enum llama_ftype ftype, int nthread) { try { + // needed to initialize f16 tables + { + struct ggml_init_params params = { 0, NULL, false }; + struct ggml_context * ctx = ggml_init(params); + ggml_free(ctx); + } llama_model_quantize_internal(fname_inp, fname_out, ftype, nthread); return 0; } catch (const std::string & err) { From e78a971859ecc4435a25f1856ea5e69640847951 Mon Sep 17 00:00:00 2001 From: Yaohui Liu Date: Sat, 20 May 2023 11:42:28 +0800 Subject: [PATCH 2/2] Add llama_init_ggml c api. --- examples/quantize/quantize.cpp | 7 +++++++ llama.cpp | 11 +++++------ llama.h | 4 ++++ 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index 7c991c8d56e0f..115d8fb1ba36b 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -52,6 +52,13 @@ int main(int argc, char ** argv) { return 1; } + // needed to initialize f16 tables + { + struct ggml_init_params params = { 0, NULL, false }; + struct ggml_context * ctx = ggml_init(params); + ggml_free(ctx); + } + // parse command line arguments const std::string fname_inp = argv[1]; std::string fname_out; diff --git a/llama.cpp b/llama.cpp index 7b599c173c6f1..5b154d50c659c 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2188,6 +2188,11 @@ struct llama_context * llama_init_from_file( return ctx; } +void llama_init_ggml(struct ggml_init_params params) { + struct ggml_context * ctx = ggml_init(params); + ggml_free(ctx); +} + void llama_free(struct llama_context * ctx) { delete ctx; } @@ -2198,12 +2203,6 @@ int llama_model_quantize( enum llama_ftype ftype, int nthread) { try { - // needed to initialize f16 tables - { - struct ggml_init_params params = { 0, NULL, false }; - struct ggml_context * ctx = ggml_init(params); - ggml_free(ctx); - } llama_model_quantize_internal(fname_inp, fname_out, ftype, nthread); return 0; } catch (const std::string & err) { diff --git a/llama.h b/llama.h index f955fa23db048..db98d05b7eb26 100644 --- a/llama.h +++ b/llama.h @@ -97,6 +97,10 @@ extern "C" { const char * path_model, struct llama_context_params params); + // Init the ggml context (it won't return a context ptr because it will free + // the ctx after initialing it). + LLAMA_API void llama_init_ggml(struct ggml_init_params params); + // Frees all allocated memory LLAMA_API void llama_free(struct llama_context * ctx);