From 640fd77a258307cadbee6e183f93a591975fb3a9 Mon Sep 17 00:00:00 2001
From: maekawatoshiki <konndennsa@gmail.com>
Date: Sun, 12 Mar 2023 19:38:25 +0900
Subject: [PATCH 1/3] Use buffering

---
 main.cpp | 7 +++++++
 1 file changed, 7 insertions(+)
diff --git a/main.cpp b/main.cpp
index f02b5ddbde94d..b5eae67a9b58d 100644
--- a/main.cpp
+++ b/main.cpp
@@ -73,7 +73,11 @@ struct llama_model {
 bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) {
     printf("%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str());
 
+    const size_t f_buf_size = 1024*1024;
+    char *f_buf = (char *)malloc(f_buf_size);
+
     auto fin = std::ifstream(fname, std::ios::binary);
+    fin.rdbuf()->pubsetbuf(f_buf, f_buf_size);
     if (!fin) {
         fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
         return false;
@@ -311,6 +315,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
         printf("%s: loading model part %d/%d from '%s'\n", __func__, i+1, n_parts, fname_part.c_str());
 
         fin = std::ifstream(fname_part, std::ios::binary);
+        fin.rdbuf()->pubsetbuf(f_buf, f_buf_size);
         fin.seekg(file_offset);
 
         // load weights
@@ -496,6 +501,8 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
         fin.close();
     }
 
+    free(f_buf);
+
     return true;
 }
 

From efaa30ef1118831c9f27e3341336f83e271cae1a Mon Sep 17 00:00:00 2001
From: maekawatoshiki <konndennsa@gmail.com>
Date: Mon, 13 Mar 2023 10:38:41 +0900
Subject: [PATCH 2/3] Use vector

---
 main.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/main.cpp b/main.cpp
index b5eae67a9b58d..5b7093728066a 100644
--- a/main.cpp
+++ b/main.cpp
@@ -74,10 +74,10 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
     printf("%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str());
 
     const size_t f_buf_size = 1024*1024;
-    char *f_buf = (char *)malloc(f_buf_size);
+    std::vector<char> f_buf(f_buf_size);
 
     auto fin = std::ifstream(fname, std::ios::binary);
-    fin.rdbuf()->pubsetbuf(f_buf, f_buf_size);
+    fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf_size);
     if (!fin) {
         fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
         return false;
@@ -315,7 +315,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
         printf("%s: loading model part %d/%d from '%s'\n", __func__, i+1, n_parts, fname_part.c_str());
 
         fin = std::ifstream(fname_part, std::ios::binary);
-        fin.rdbuf()->pubsetbuf(f_buf, f_buf_size);
+        fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf_size);
         fin.seekg(file_offset);
 
         // load weights
@@ -501,8 +501,6 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
         fin.close();
     }
 
-    free(f_buf);
-
     return true;
 }
 

From 3419f88f11ec6ca3b9c18177bbd96f99d8500e52 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 13 Mar 2023 18:33:23 +0200
Subject: [PATCH 3/3] Minor

---
 main.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/main.cpp b/main.cpp
index 5b7093728066a..a5cb202e6b8b9 100644
--- a/main.cpp
+++ b/main.cpp
@@ -73,11 +73,10 @@ struct llama_model {
 bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) {
     printf("%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str());
 
-    const size_t f_buf_size = 1024*1024;
-    std::vector<char> f_buf(f_buf_size);
+    std::vector<char> f_buf(1024*1024);
 
     auto fin = std::ifstream(fname, std::ios::binary);
-    fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf_size);
+    fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf.size());
     if (!fin) {
         fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
         return false;
@@ -315,7 +314,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
         printf("%s: loading model part %d/%d from '%s'\n", __func__, i+1, n_parts, fname_part.c_str());
 
         fin = std::ifstream(fname_part, std::ios::binary);
-        fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf_size);
+        fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf.size());
         fin.seekg(file_offset);
 
         // load weights