Save prompt after initial prompt eval (fixes #1257)

ivanstepanovftw · ivanstepanovftw · commit dd88594585c4 · 2023-04-30T23:29:11.000+03:00
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
@@ -385,6 +385,12 @@ int main(int argc, char ** argv) {
 
         embd.clear();
 
+        // optionally save the session after prompt eval (for faster prompt loading next time)
+        if (!path_session.empty() && need_to_save_session) {
+            need_to_save_session = false;
+            llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size());
+        }
+
         if ((int) embd_inp.size() <= n_consumed && !is_interacting) {
             // out of user input, sample next token
             const float   temp            = params.temp;
@@ -401,12 +407,6 @@ int main(int argc, char ** argv) {
             const float   mirostat_eta    = params.mirostat_eta;
             const bool    penalize_nl     = params.penalize_nl;
 
-            // optionally save the session on first sample (for faster prompt loading next time)
-            if (!path_session.empty() && need_to_save_session) {
-                need_to_save_session = false;
-                llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size());
-            }
-
             llama_token id = 0;
 
             {
diff --git a/llama.cpp b/llama.cpp
@@ -2724,10 +2724,11 @@ size_t llama_load_session_file(struct llama_context * ctx, const char * path_ses
     const size_t n_orig_state_size = llama_get_state_size(ctx);
     if (n_state_size != n_orig_state_size) {
         fprintf(stderr, "%s : failed to validate state size\n", __func__);
+        return 0;
     }
-    std::unique_ptr<uint8_t[]> state_data(new uint8_t[n_state_size]);
-    file.read_raw(state_data.get(), n_state_size);
-    return llama_set_state_data(ctx, state_data.get());
+    std::vector<uint8_t> state_data(n_state_size);
+    file.read_raw(state_data.data(), n_state_size);
+    return llama_set_state_data(ctx, state_data.data());
 }
 
 size_t llama_save_session_file(struct llama_context * ctx, const char * path_session, const llama_token * tokens, size_t n_token_count) {

Original file line number	Diff line number	Diff line change
`@@ -2724,10 +2724,11 @@ size_t llama_load_session_file(struct llama_context * ctx, const char * path_ses`
`2724`	`2724`	`const size_t n_orig_state_size = llama_get_state_size(ctx);`
`2725`	`2725`	`if (n_state_size != n_orig_state_size) {`
`2726`	`2726`	`fprintf(stderr, "%s : failed to validate state size\n", __func__);`
	`2727`	`+ return 0;`
`2727`	`2728`	`}`
`2728`		`- std::unique_ptr<uint8_t[]> state_data(new uint8_t[n_state_size]);`
`2729`		`- file.read_raw(state_data.get(), n_state_size);`
`2730`		`- return llama_set_state_data(ctx, state_data.get());`
	`2729`	`+ std::vector<uint8_t> state_data(n_state_size);`
	`2730`	`+ file.read_raw(state_data.data(), n_state_size);`
	`2731`	`+ return llama_set_state_data(ctx, state_data.data());`
`2731`	`2732`	`}`
`2732`	`2733`
`2733`	`2734`	`size_t llama_save_session_file(struct llama_context * ctx, const char * path_session, const llama_token * tokens, size_t n_token_count) {`