From 2d29d4b89e21b3d2c4b9da14d806a4a2b5f48692 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebasti=C3=A1n=20Aedo?= <sebastian.aedo29@gmail.com>
Date: Sat, 11 Mar 2023 19:26:20 -0300
Subject: [PATCH 1/5] Apply fixes suggested to build on windows

Issue: https://github.com/ggerganov/llama.cpp/issues/22
---
 ggml.c       | 20 ++++++++++----------
 main.cpp     |  1 +
 quantize.cpp |  1 +
 utils.cpp    |  5 +++--
 4 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/ggml.c b/ggml.c
index 71c30280b1066..fbd7b9339dea6 100644
--- a/ggml.c
+++ b/ggml.c
@@ -407,8 +407,8 @@ void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) {
     const int nb = k / QK;
     const size_t bs = sizeof(float) + QK/2;
 
-    uint8_t * restrict pd = (uint8_t *) (y + 0*bs);
-    uint8_t * restrict pb = (uint8_t *) (y + 0*bs + sizeof(float));
+    uint8_t * restrict pd = ((uint8_t *)y + 0*bs);
+    uint8_t * restrict pb = ((uint8_t *)y + 0*bs + sizeof(float));
 
     uint8_t pp[QK/2];
 
@@ -654,8 +654,8 @@ void dequantize_row_q4_0(const void * restrict x, float * restrict y, int k) {
     const int nb = k / QK;
     const size_t bs = sizeof(float) + QK/2;
 
-    const uint8_t * restrict pd = (const uint8_t *) (x + 0*bs);
-    const uint8_t * restrict pb = (const uint8_t *) (x + 0*bs + sizeof(float));
+    const uint8_t * restrict pd = ((const uint8_t *)x + 0*bs);
+    const uint8_t * restrict pb = ((const uint8_t *)x + 0*bs + sizeof(float));
 
     // scalar
     for (int i = 0; i < nb; i++) {
@@ -1301,11 +1301,11 @@ inline static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void
 
     const size_t bs = sizeof(float) + QK/2;
 
-    const uint8_t * restrict pd0 = (const uint8_t *) (x + 0*bs);
-    const uint8_t * restrict pd1 = (const uint8_t *) (y + 0*bs);
+    const uint8_t * restrict pd0 = ((const uint8_t *)x + 0*bs);
+    const uint8_t * restrict pd1 = ((const uint8_t *)y + 0*bs);
 
-    const uint8_t * restrict pb0 = (const uint8_t *) (x + 0*bs + sizeof(float));
-    const uint8_t * restrict pb1 = (const uint8_t *) (y + 0*bs + sizeof(float));
+    const uint8_t * restrict pb0 = ((const uint8_t *)x + 0*bs + sizeof(float));
+    const uint8_t * restrict pb1 = ((const uint8_t *)y + 0*bs + sizeof(float));
 
     float sumf = 0.0;
 
@@ -1731,8 +1731,8 @@ inline static void ggml_vec_mad_q4_0(const int n, float * restrict y, void * res
     const int nb = n / QK;
     const size_t bs = sizeof(float) + QK/2;
 
-    const uint8_t * restrict pd = (const uint8_t *) (x + 0*bs);
-    const uint8_t * restrict pb = (const uint8_t *) (x + 0*bs + sizeof(float));
+    const uint8_t * restrict pd = ((const uint8_t *)x + 0*bs);
+    const uint8_t * restrict pb = ((const uint8_t *)x + 0*bs + sizeof(float));
 
 #if __ARM_NEON
 #if QK == 32
diff --git a/main.cpp b/main.cpp
index 2f47480698f1e..dd8323940d84f 100644
--- a/main.cpp
+++ b/main.cpp
@@ -733,6 +733,7 @@ bool llama_eval(
 }
 
 int main(int argc, char ** argv) {
+    ggml_time_init();
     const int64_t t_main_start_us = ggml_time_us();
 
     gpt_params params;
diff --git a/quantize.cpp b/quantize.cpp
index 0ae537339ecf3..14c7b277a4024 100644
--- a/quantize.cpp
+++ b/quantize.cpp
@@ -289,6 +289,7 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
 //  ./llama-quantize models/llama/ggml-model.bin models/llama/ggml-model-quant.bin type
 //
 int main(int argc, char ** argv) {
+    ggml_time_init();
     if (argc != 4) {
         fprintf(stderr, "usage: %s model-f32.bin model-quant.bin type\n", argv[0]);
         fprintf(stderr, "  type = 2 - q4_0\n");
diff --git a/utils.cpp b/utils.cpp
index abb34756ac026..d0294908c8ff8 100644
--- a/utils.cpp
+++ b/utils.cpp
@@ -1,4 +1,5 @@
 #include "utils.h"
+#define QK 32
 
 #include <cassert>
 #include <cstring>
@@ -453,7 +454,7 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t
 
     assert(k % qk == 0);
 
-    uint8_t pp[qk/2];
+    uint8_t pp[QK/2];
 
     char * pdst = (char *) dst;
 
@@ -507,7 +508,7 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t
 
     assert(k % qk == 0);
 
-    uint8_t pp[qk/2];
+    uint8_t pp[QK/2];
 
     char * pdst = (char *) dst;
 

From 718bb989a52cadc198a8522c2cbaa28b9149d063 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebasti=C3=A1n=20Aedo?= <sebastian.aedo29@gmail.com>
Date: Sun, 12 Mar 2023 00:02:03 -0300
Subject: [PATCH 2/5] Remove unsupported VLAs

---
 utils.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/utils.cpp b/utils.cpp
index d0294908c8ff8..5213ecb07c1b9 100644
--- a/utils.cpp
+++ b/utils.cpp
@@ -1,5 +1,4 @@
 #include "utils.h"
-#define QK 32
 
 #include <cassert>
 #include <cstring>
@@ -454,7 +453,8 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t
 
     assert(k % qk == 0);
 
-    uint8_t pp[QK/2];
+    std::vector<uint8_t> pp;
+    pp.reserve(qk/2);
 
     char * pdst = (char *) dst;
 
@@ -493,7 +493,7 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t
                     pp[l/2] = vi0 | (vi1 << 4);
                 }
 
-                memcpy(pb, pp, sizeof(pp));
+                memcpy(pb, pp.data(), pp.size());
                 pb += bs;
             }
         }
@@ -508,7 +508,8 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t
 
     assert(k % qk == 0);
 
-    uint8_t pp[QK/2];
+    std::vector<uint8_t> pp;
+    pp.reserve(qk/2);
 
     char * pdst = (char *) dst;
 
@@ -552,7 +553,7 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t
                     pp[l/2] = vi0 | (vi1 << 4);
                 }
 
-                memcpy(pb + i*qk/2, pp, sizeof(pp));
+                memcpy(pb + i*qk/2, pp.data(), pp.size());
             }
         }
     }

From ce0d5fb9de67cbc80601f2ccff687344e7a60f8f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebasti=C3=A1n=20Aedo?= <sebastian.aedo29@gmail.com>
Date: Sun, 12 Mar 2023 00:11:08 -0300
Subject: [PATCH 3/5] MSVC: Remove features that are only available on MSVC
 C++20.

---
 main.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/main.cpp b/main.cpp
index dd8323940d84f..0c86a2b480cf6 100644
--- a/main.cpp
+++ b/main.cpp
@@ -209,8 +209,8 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
     // create the ggml context
     {
         struct ggml_init_params params = {
-            .mem_size   = ctx_size,
-            .mem_buffer = NULL,
+            /*.mem_size   =*/ ctx_size,
+            /*.mem_buffer =*/ NULL,
         };
 
         model.ctx = ggml_init(params);
@@ -546,12 +546,13 @@ bool llama_eval(
     }
 
     struct ggml_init_params params = {
-        .mem_size   = buf_size,
-        .mem_buffer = buf,
+        /*.mem_size   =*/ buf_size,
+        /*.mem_buffer =*/ buf,
     };
 
     struct ggml_context * ctx0 = ggml_init(params);
-    struct ggml_cgraph gf = { .n_threads = n_threads };
+    struct ggml_cgraph gf;
+    gf.n_threads = n_threads;
 
     struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
     memcpy(embd->data, embd_inp.data(), N*ggml_element_size(embd));

From c505a2d2176d8874c189a60ceaf6d717d7227c1d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebasti=C3=A1n=20Aedo?= <sebastian.aedo29@gmail.com>
Date: Sun, 12 Mar 2023 02:12:53 -0300
Subject: [PATCH 4/5] Fix zero initialization of the other fields.

---
 main.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.cpp b/main.cpp
index 0c86a2b480cf6..72d8bfabf79a1 100644
--- a/main.cpp
+++ b/main.cpp
@@ -551,7 +551,7 @@ bool llama_eval(
     };
 
     struct ggml_context * ctx0 = ggml_init(params);
-    struct ggml_cgraph gf;
+    ggml_cgraph gf = {};
     gf.n_threads = n_threads;
 
     struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);

From 3b20e78c16d9887e1d44659157753a0627df12d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebasti=C3=A1n=20Aedo?= <sebastian.aedo29@gmail.com>
Date: Sun, 12 Mar 2023 12:39:57 -0300
Subject: [PATCH 5/5] Change the use of vector for stack allocations.

---
 utils.cpp | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/utils.cpp b/utils.cpp
index 5213ecb07c1b9..1105a2f2bb213 100644
--- a/utils.cpp
+++ b/utils.cpp
@@ -5,6 +5,12 @@
 #include <fstream>
 #include <regex>
 
+ #if defined(_MSC_VER) || defined(__MINGW32__)
+ #include <malloc.h> // using malloc.h with MSC/MINGW
+ #elif !defined(__FreeBSD__)
+ #include <alloca.h>
+ #endif
+
 bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
     for (int i = 1; i < argc; i++) {
         std::string arg = argv[i];
@@ -453,8 +459,8 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t
 
     assert(k % qk == 0);
 
-    std::vector<uint8_t> pp;
-    pp.reserve(qk/2);
+    const size_t pp_size = qk / 2;
+    uint8_t *pp = static_cast<uint8_t*>(alloca(pp_size));
 
     char * pdst = (char *) dst;
 
@@ -493,7 +499,7 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t
                     pp[l/2] = vi0 | (vi1 << 4);
                 }
 
-                memcpy(pb, pp.data(), pp.size());
+                memcpy(pb, pp, pp_size);
                 pb += bs;
             }
         }
@@ -508,8 +514,8 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t
 
     assert(k % qk == 0);
 
-    std::vector<uint8_t> pp;
-    pp.reserve(qk/2);
+    const size_t pp_size = qk / 2;
+    uint8_t *pp = static_cast<uint8_t*>(alloca(pp_size));
 
     char * pdst = (char *) dst;
 
@@ -553,7 +559,7 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t
                     pp[l/2] = vi0 | (vi1 << 4);
                 }
 
-                memcpy(pb + i*qk/2, pp.data(), pp.size());
+                memcpy(pb + i*qk/2, pp, pp_size);
             }
         }
     }