From 484f6e94381c0f88b9162abc9fe9ef7a49e387a9 Mon Sep 17 00:00:00 2001
From: Yaohui Liu <AsakusaRinne@gmail.com>
Date: Sat, 20 May 2023 00:32:08 +0800
Subject: [PATCH 1/2] llama: initialize f16 tables in quantize c api.

---
 examples/quantize/quantize.cpp | 7 -------
 llama.cpp                      | 6 ++++++
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp
index 115d8fb1ba36b..7c991c8d56e0f 100644
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@@ -52,13 +52,6 @@ int main(int argc, char ** argv) {
         return 1;
     }
 
-    // needed to initialize f16 tables
-    {
-        struct ggml_init_params params = { 0, NULL, false };
-        struct ggml_context * ctx = ggml_init(params);
-        ggml_free(ctx);
-    }
-
     // parse command line arguments
     const std::string fname_inp = argv[1];
     std::string fname_out;
diff --git a/llama.cpp b/llama.cpp
index 1f9d3784415ec..7b599c173c6f1 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -2198,6 +2198,12 @@ int llama_model_quantize(
   enum llama_ftype   ftype,
         int          nthread) {
     try {
+        // needed to initialize f16 tables
+        {
+            struct ggml_init_params params = { 0, NULL, false };
+            struct ggml_context * ctx = ggml_init(params);
+            ggml_free(ctx);
+        }
         llama_model_quantize_internal(fname_inp, fname_out, ftype, nthread);
         return 0;
     } catch (const std::string & err) {

From e78a971859ecc4435a25f1856ea5e69640847951 Mon Sep 17 00:00:00 2001
From: Yaohui Liu <AsakusaRinne@gmail.com>
Date: Sat, 20 May 2023 11:42:28 +0800
Subject: [PATCH 2/2] Add llama_init_ggml c api.

---
 examples/quantize/quantize.cpp |  7 +++++++
 llama.cpp                      | 11 +++++------
 llama.h                        |  4 ++++
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp
index 7c991c8d56e0f..115d8fb1ba36b 100644
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@@ -52,6 +52,13 @@ int main(int argc, char ** argv) {
         return 1;
     }
 
+    // needed to initialize f16 tables
+    {
+        struct ggml_init_params params = { 0, NULL, false };
+        struct ggml_context * ctx = ggml_init(params);
+        ggml_free(ctx);
+    }
+
     // parse command line arguments
     const std::string fname_inp = argv[1];
     std::string fname_out;
diff --git a/llama.cpp b/llama.cpp
index 7b599c173c6f1..5b154d50c659c 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -2188,6 +2188,11 @@ struct llama_context * llama_init_from_file(
     return ctx;
 }
 
+void llama_init_ggml(struct ggml_init_params params) {
+    struct ggml_context * ctx = ggml_init(params);
+    ggml_free(ctx);
+}
+
 void llama_free(struct llama_context * ctx) {
     delete ctx;
 }
@@ -2198,12 +2203,6 @@ int llama_model_quantize(
   enum llama_ftype   ftype,
         int          nthread) {
     try {
-        // needed to initialize f16 tables
-        {
-            struct ggml_init_params params = { 0, NULL, false };
-            struct ggml_context * ctx = ggml_init(params);
-            ggml_free(ctx);
-        }
         llama_model_quantize_internal(fname_inp, fname_out, ftype, nthread);
         return 0;
     } catch (const std::string & err) {
diff --git a/llama.h b/llama.h
index f955fa23db048..db98d05b7eb26 100644
--- a/llama.h
+++ b/llama.h
@@ -97,6 +97,10 @@ extern "C" {
                              const char * path_model,
             struct llama_context_params   params);
 
+    // Init the ggml context (it won't return a context ptr because it will free
+    // the ctx after initialing it).
+    LLAMA_API void llama_init_ggml(struct ggml_init_params params);
+
     // Frees all allocated memory
     LLAMA_API void llama_free(struct llama_context * ctx);