From 4fe9734e0964e9363b7f2367394d8a6f1be09525 Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Mon, 20 Mar 2023 11:33:50 -0500 Subject: [PATCH 01/25] Improve interactive mode's coherence after EOS Aims to improve coherence and ability to resume the interactive session when the user is given input back after an end of text token is reached. Not sure what token 13 is or why it seems to help. See conversation for examples. --- main.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/main.cpp b/main.cpp index 15903337339fb..16751aa5f0457 100644 --- a/main.cpp +++ b/main.cpp @@ -1084,6 +1084,8 @@ int main(int argc, char ** argv) { if (embd.back() == EOS_TOKEN_ID) { if (params.interactive) { is_interacting = true; + embd.back() = 13; + last_n_tokens.back() = 13; } else { fprintf(stderr, " [end of text]\n"); break; From 330b86eed2d4e7e8588f62f5f1aba476e7ac406b Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Mon, 20 Mar 2023 13:02:54 -0500 Subject: [PATCH 02/25] Make newline token a constant --- main.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/main.cpp b/main.cpp index 16751aa5f0457..8b9c98ba8eaf0 100644 --- a/main.cpp +++ b/main.cpp @@ -30,6 +30,7 @@ #define ANSI_BOLD "\x1b[1m" static const int EOS_TOKEN_ID = 2; +static const int NEWLINE_TOKEN_ID = 13; // determine number of model parts based on the dimension static const std::map LLAMA_N_PARTS = { @@ -1084,8 +1085,8 @@ int main(int argc, char ** argv) { if (embd.back() == EOS_TOKEN_ID) { if (params.interactive) { is_interacting = true; - embd.back() = 13; - last_n_tokens.back() = 13; + embd.back() = NEWLINE_TOKEN_ID; + last_n_tokens.back() = NEWLINE_TOKEN_ID; } else { fprintf(stderr, " [end of text]\n"); break; From 3eca29ec0dc3cd98713a68079939929291248138 Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Mon, 20 Mar 2023 14:02:48 -0500 Subject: [PATCH 03/25] dynamically determine newline token --- main.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index 8b9c98ba8eaf0..7e8853296b0fd 100644 --- a/main.cpp +++ b/main.cpp @@ -30,7 +30,6 @@ #define ANSI_BOLD "\x1b[1m" static const int EOS_TOKEN_ID = 2; -static const int NEWLINE_TOKEN_ID = 13; // determine number of model parts based on the dimension static const std::map LLAMA_N_PARTS = { @@ -177,6 +176,9 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab vocab.id_to_token[i] = word; vocab.score[i] = score; + // dynamically determine the newline token + const auto NEWLINE_TOKEN_ID = vocab.token_to_id["\n"]; + //if (i < 30000) { // fprintf(stderr, "%s: vocab[%d] = '%s'\n", __func__, i, word.c_str()); //} From 94edeaf7dfa1f987a5b2acaef9197b8ba6e3b57a Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Mon, 20 Mar 2023 14:35:36 -0500 Subject: [PATCH 04/25] relocate previous newline token const --- main.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/main.cpp b/main.cpp index 7e8853296b0fd..097b17eefdae5 100644 --- a/main.cpp +++ b/main.cpp @@ -175,9 +175,6 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab vocab.token_to_id[word] = i; vocab.id_to_token[i] = word; vocab.score[i] = score; - - // dynamically determine the newline token - const auto NEWLINE_TOKEN_ID = vocab.token_to_id["\n"]; //if (i < 30000) { // fprintf(stderr, "%s: vocab[%d] = '%s'\n", __func__, i, word.c_str()); @@ -952,6 +949,9 @@ int main(int argc, char ** argv) { bool input_noecho = false; int remaining_tokens = params.n_predict; + + // dynamically determine the newline token + const auto NEWLINE_TOKEN_ID = vocab.token_to_id["\n"]; // set the color for the prompt which will be output initially if (params.use_color) { From ac6a9d9717e8c2b3fc8a4297fc331a82e1b74067 Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Mon, 20 Mar 2023 14:37:27 -0500 Subject: [PATCH 05/25] cleanup whitespace --- main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index 097b17eefdae5..5e2e14e7db1bb 100644 --- a/main.cpp +++ b/main.cpp @@ -175,7 +175,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab vocab.token_to_id[word] = i; vocab.id_to_token[i] = word; vocab.score[i] = score; - + //if (i < 30000) { // fprintf(stderr, "%s: vocab[%d] = '%s'\n", __func__, i, word.c_str()); //} From 466073896b10c9e22a16ac0b327008ef107d7db1 Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Mon, 20 Mar 2023 15:13:10 -0500 Subject: [PATCH 06/25] print a new line on end of text in interactive this may need to be looked into further when not using a reverse prompt --- main.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/main.cpp b/main.cpp index 5e2e14e7db1bb..1127e2d06aa87 100644 --- a/main.cpp +++ b/main.cpp @@ -1089,6 +1089,7 @@ int main(int argc, char ** argv) { is_interacting = true; embd.back() = NEWLINE_TOKEN_ID; last_n_tokens.back() = NEWLINE_TOKEN_ID; + fprintf(stderr, "\n"); } else { fprintf(stderr, " [end of text]\n"); break; From d9284a580c353cef7ab6913f4adb54ed3be52007 Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Mon, 20 Mar 2023 15:50:01 -0500 Subject: [PATCH 07/25] only print manual newline with reverse prompt fix formatting of reverse prompts so they don't end up at the end of the current line while not introducing unnecessary new lines otherwise --- main.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index 1127e2d06aa87..7db8de9280d6a 100644 --- a/main.cpp +++ b/main.cpp @@ -1089,7 +1089,9 @@ int main(int argc, char ** argv) { is_interacting = true; embd.back() = NEWLINE_TOKEN_ID; last_n_tokens.back() = NEWLINE_TOKEN_ID; - fprintf(stderr, "\n"); + if (params.antiprompt.size() != 0) { + fprintf(stderr, "\n"); + } } else { fprintf(stderr, " [end of text]\n"); break; From 2479c78bc6fd8355580b551bff55fffb44e882d9 Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Mon, 20 Mar 2023 17:45:52 -0500 Subject: [PATCH 08/25] alternate approach to replace end of text tokens --- main.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/main.cpp b/main.cpp index 7db8de9280d6a..5461c9e8446b9 100644 --- a/main.cpp +++ b/main.cpp @@ -1001,6 +1001,12 @@ int main(int argc, char ** argv) { t_sample_us += ggml_time_us() - t_start_sample_us; } + // replace end of text token with newline token when in interactive mode + if (id == EOS_TOKEN_ID && params.interactive) { + id = NEWLINE_TOKEN_ID; + is_interacting = true; + } + // add it to the context embd.push_back(id); @@ -1086,12 +1092,12 @@ int main(int argc, char ** argv) { // end of text token if (embd.back() == EOS_TOKEN_ID) { if (params.interactive) { - is_interacting = true; - embd.back() = NEWLINE_TOKEN_ID; - last_n_tokens.back() = NEWLINE_TOKEN_ID; - if (params.antiprompt.size() != 0) { - fprintf(stderr, "\n"); - } +// is_interacting = true; +// embd.back() = NEWLINE_TOKEN_ID; +// last_n_tokens.back() = NEWLINE_TOKEN_ID; +// if (params.antiprompt.size() != 0) { +// fprintf(stderr, "\n"); +// } } else { fprintf(stderr, " [end of text]\n"); break; From 8009a8ddb05597ac8b4b3db65c700fef2a425a7a Mon Sep 17 00:00:00 2001 From: Slaren <2141330+slaren@users.noreply.github.com> Date: Tue, 21 Mar 2023 04:56:47 +0100 Subject: [PATCH 09/25] Inject the reverse prompt again after eos in interactive mode --- main.cpp | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/main.cpp b/main.cpp index 5461c9e8446b9..dd2cc160e869f 100644 --- a/main.cpp +++ b/main.cpp @@ -1004,7 +1004,11 @@ int main(int argc, char ** argv) { // replace end of text token with newline token when in interactive mode if (id == EOS_TOKEN_ID && params.interactive) { id = NEWLINE_TOKEN_ID; - is_interacting = true; + if (!antipromptv_inp.empty()) { + // inject the reverse prompt to return control to the user + auto& ap_inp = antipromptv_inp.front(); + embd_inp.insert(embd_inp.end(), ap_inp.begin(), ap_inp.end()); + } } // add it to the context @@ -1091,17 +1095,8 @@ int main(int argc, char ** argv) { // end of text token if (embd.back() == EOS_TOKEN_ID) { - if (params.interactive) { -// is_interacting = true; -// embd.back() = NEWLINE_TOKEN_ID; -// last_n_tokens.back() = NEWLINE_TOKEN_ID; -// if (params.antiprompt.size() != 0) { -// fprintf(stderr, "\n"); -// } - } else { - fprintf(stderr, " [end of text]\n"); - break; - } + fprintf(stderr, " [end of text]\n"); + break; } // In interactive mode, respect the maximum number of tokens and drop back to user input when reached. From 3c211c64bd6520aa64a94a632cb24ee0f294552f Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Tue, 21 Mar 2023 12:53:32 -0500 Subject: [PATCH 10/25] tokenize reverse prompt when needed makes this PR compatible with https://github.com/ggerganov/llama.cpp/pull/330 --- main.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index dd2cc160e869f..2235f7b252403 100644 --- a/main.cpp +++ b/main.cpp @@ -1004,7 +1004,12 @@ int main(int argc, char ** argv) { // replace end of text token with newline token when in interactive mode if (id == EOS_TOKEN_ID && params.interactive) { id = NEWLINE_TOKEN_ID; - if (!antipromptv_inp.empty()) { + if (params.antiprompt.size() != 0) { + // tokenize the reverse prompt to inject + std::vector> antipromptv_inp; + for (auto antiprompt : params.antiprompt){ + antipromptv_inp.push_back(::llama_tokenize(vocab, antiprompt, false)); + } // inject the reverse prompt to return control to the user auto& ap_inp = antipromptv_inp.front(); embd_inp.insert(embd_inp.end(), ap_inp.begin(), ap_inp.end()); From e33df8e1a0375951a65e930c6d178fd02b106bac Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Tue, 21 Mar 2023 13:37:36 -0500 Subject: [PATCH 11/25] tokenize and inject only first reverse prompt thanks to tjohnman --- main.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/main.cpp b/main.cpp index 2235f7b252403..915558e2b8ebb 100644 --- a/main.cpp +++ b/main.cpp @@ -1005,14 +1005,9 @@ int main(int argc, char ** argv) { if (id == EOS_TOKEN_ID && params.interactive) { id = NEWLINE_TOKEN_ID; if (params.antiprompt.size() != 0) { - // tokenize the reverse prompt to inject - std::vector> antipromptv_inp; - for (auto antiprompt : params.antiprompt){ - antipromptv_inp.push_back(::llama_tokenize(vocab, antiprompt, false)); - } - // inject the reverse prompt to return control to the user - auto& ap_inp = antipromptv_inp.front(); - embd_inp.insert(embd_inp.end(), ap_inp.begin(), ap_inp.end()); + // tokenize the first reverse prompt and inject on the newline + std::vector first_antiprompt = ::llama_tokenize(vocab, params.antiprompt.front(), false); + embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end()); } } From 52f46ef78afd65e13179436fce7930e1dfdfc8de Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Tue, 21 Mar 2023 14:10:20 -0500 Subject: [PATCH 12/25] tokenize first reverse prompt once --- main.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/main.cpp b/main.cpp index 915558e2b8ebb..c380324817874 100644 --- a/main.cpp +++ b/main.cpp @@ -885,6 +885,9 @@ int main(int argc, char ** argv) { antipromptv_inp.push_back(::llama_tokenize(vocab, antiprompt, false)); } + // tokenize the first reverse prompt + std::vector first_antiprompt = ::llama_tokenize(vocab, params.antiprompt.front(), false); + // enable interactive mode if reverse prompt is specified if (antipromptv_inp.size() != 0) { params.interactive = true; @@ -1005,8 +1008,7 @@ int main(int argc, char ** argv) { if (id == EOS_TOKEN_ID && params.interactive) { id = NEWLINE_TOKEN_ID; if (params.antiprompt.size() != 0) { - // tokenize the first reverse prompt and inject on the newline - std::vector first_antiprompt = ::llama_tokenize(vocab, params.antiprompt.front(), false); + // inject first reverse prompt embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end()); } } From 1752bc92eb13d1e2ad5674cf8c4cef92004d0d42 Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Wed, 22 Mar 2023 01:55:17 -0500 Subject: [PATCH 13/25] add newline token --- llama.h | 1 + 1 file changed, 1 insertion(+) diff --git a/llama.h b/llama.h index 3df9ed1fdd82c..ce7f5a49ab674 100644 --- a/llama.h +++ b/llama.h @@ -114,6 +114,7 @@ extern "C" { // Special tokens LLAMA_API llama_token llama_token_bos(); LLAMA_API llama_token llama_token_eos(); + LLAMA_API llama_token llama_token_newline(); // TODO: improve the last_n_tokens interface ? LLAMA_API llama_token llama_sample_top_p_top_k( From 23bb78fbdc5e41c873d7baa61f1a67fe4256cd64 Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Wed, 22 Mar 2023 01:55:57 -0500 Subject: [PATCH 14/25] add newline token --- llama.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llama.cpp b/llama.cpp index fde4d251682ca..be0e58f7fa0e4 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1496,6 +1496,9 @@ llama_token llama_token_bos() { llama_token llama_token_eos() { return 2; } +llama_token llama_token_newline() { + return 13; +} llama_token llama_sample_top_p_top_k( llama_context * ctx, From da0837f55fa9050713ac6e699357c74f4075a75e Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Wed, 22 Mar 2023 11:01:47 -0500 Subject: [PATCH 15/25] tokenize/inject reverse prompt for refactor this doesn't seem right though --- main.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/main.cpp b/main.cpp index 4c4aa370142bd..df9d0f152b380 100644 --- a/main.cpp +++ b/main.cpp @@ -250,10 +250,10 @@ int main(int argc, char ** argv) { } // tokenize the first reverse prompt -// std::vector first_antiprompt; -// if (!params.antiprompt.empty()) { -// first_antiprompt = ::llama_tokenize(vocab, params.antiprompt.front(), false); -// } + auto first_antiprompt = ::llama_tokenize(ctx, params.prompt,false); + if (!params.antiprompt.empty()) { + auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false); + } // enable interactive mode if reverse prompt is specified if (params.antiprompt.size() != 0) { @@ -364,10 +364,10 @@ int main(int argc, char ** argv) { // replace end of text token with newline token when in interactive mode if (id == llama_token_eos() && params.interactive) { id = llama_token_newline(); -// if (params.antiprompt.size() != 0) { + if (params.antiprompt.size() != 0) { // inject first reverse prompt -// embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end()); -// } + embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end()); + } } // add it to the context @@ -484,4 +484,4 @@ int main(int argc, char ** argv) { set_console_state(CONSOLE_STATE_DEFAULT); return 0; -} \ No newline at end of file +} From c4efdb22af1175dbd8308d981d17fb17ed7e4f18 Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Wed, 22 Mar 2023 11:22:56 -0500 Subject: [PATCH 16/25] tokenize nothing for antiprompt if no reverse --- main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index df9d0f152b380..b64e9d13f7c89 100644 --- a/main.cpp +++ b/main.cpp @@ -250,7 +250,7 @@ int main(int argc, char ** argv) { } // tokenize the first reverse prompt - auto first_antiprompt = ::llama_tokenize(ctx, params.prompt,false); + auto first_antiprompt = ::llama_tokenize(ctx,"",false); if (!params.antiprompt.empty()) { auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false); } From 879da33ab4ad4db52ce99e1eeb5394c0839bd878 Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Wed, 22 Mar 2023 11:41:19 -0500 Subject: [PATCH 17/25] Update main.cpp --- main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.cpp b/main.cpp index b64e9d13f7c89..bd35270e071ec 100644 --- a/main.cpp +++ b/main.cpp @@ -250,9 +250,9 @@ int main(int argc, char ** argv) { } // tokenize the first reverse prompt - auto first_antiprompt = ::llama_tokenize(ctx,"",false); + auto first_antiprompt = ::llama_tokenize(ctx, params.prompt, true); if (!params.antiprompt.empty()) { - auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false); + auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), true); } // enable interactive mode if reverse prompt is specified From e590787ab3919eab6a2057f4d490566eb18900a8 Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Wed, 22 Mar 2023 11:43:57 -0500 Subject: [PATCH 18/25] Update main.cpp --- main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.cpp b/main.cpp index bd35270e071ec..df9d0f152b380 100644 --- a/main.cpp +++ b/main.cpp @@ -250,9 +250,9 @@ int main(int argc, char ** argv) { } // tokenize the first reverse prompt - auto first_antiprompt = ::llama_tokenize(ctx, params.prompt, true); + auto first_antiprompt = ::llama_tokenize(ctx, params.prompt,false); if (!params.antiprompt.empty()) { - auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), true); + auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false); } // enable interactive mode if reverse prompt is specified From 4e4cfdfb67f5ff5f47a99ede1cdda79aaa3b25e3 Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Wed, 22 Mar 2023 17:46:23 -0500 Subject: [PATCH 19/25] tokenize and inject reverse prompt as needed this doesn't seem to work if the reverse prompt is tokenized outside earlier on --- main.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/main.cpp b/main.cpp index df9d0f152b380..6bb4977709c82 100644 --- a/main.cpp +++ b/main.cpp @@ -249,12 +249,6 @@ int main(int argc, char ** argv) { params.antiprompt.push_back("### Instruction:\n\n"); } - // tokenize the first reverse prompt - auto first_antiprompt = ::llama_tokenize(ctx, params.prompt,false); - if (!params.antiprompt.empty()) { - auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false); - } - // enable interactive mode if reverse prompt is specified if (params.antiprompt.size() != 0) { params.interactive = true; @@ -365,7 +359,9 @@ int main(int argc, char ** argv) { if (id == llama_token_eos() && params.interactive) { id = llama_token_newline(); if (params.antiprompt.size() != 0) { - // inject first reverse prompt + // tokenize and inject first reverse prompt + auto first_antiprompt = ::llama_tokenize(ctx, "", false); + first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false); embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end()); } } From 6a4cfc4dfaee22ee2da42de425aed6d1fbcff69e Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Wed, 22 Mar 2023 18:02:35 -0500 Subject: [PATCH 20/25] not needed --- main.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/main.cpp b/main.cpp index 6bb4977709c82..29f05f76222ea 100644 --- a/main.cpp +++ b/main.cpp @@ -360,7 +360,6 @@ int main(int argc, char ** argv) { id = llama_token_newline(); if (params.antiprompt.size() != 0) { // tokenize and inject first reverse prompt - auto first_antiprompt = ::llama_tokenize(ctx, "", false); first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false); embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end()); } From 10206d0360317ce44efa70ce02ace70de2b4b6ec Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Wed, 22 Mar 2023 18:52:51 -0500 Subject: [PATCH 21/25] remove newline token --- llama.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/llama.cpp b/llama.cpp index be0e58f7fa0e4..fde4d251682ca 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1496,9 +1496,6 @@ llama_token llama_token_bos() { llama_token llama_token_eos() { return 2; } -llama_token llama_token_newline() { - return 13; -} llama_token llama_sample_top_p_top_k( llama_context * ctx, From 8f83ce838065fa2b75061daaee41ce3536e0d0cd Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Wed, 22 Mar 2023 18:53:10 -0500 Subject: [PATCH 22/25] remove newline token --- llama.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llama.h b/llama.h index ce7f5a49ab674..3df9ed1fdd82c 100644 --- a/llama.h +++ b/llama.h @@ -114,7 +114,6 @@ extern "C" { // Special tokens LLAMA_API llama_token llama_token_bos(); LLAMA_API llama_token llama_token_eos(); - LLAMA_API llama_token llama_token_newline(); // TODO: improve the last_n_tokens interface ? LLAMA_API llama_token llama_sample_top_p_top_k( From 7864eef92ce34f66ca9b7b3ef9fa2a39f0603b08 Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Wed, 22 Mar 2023 19:19:49 -0500 Subject: [PATCH 23/25] tokenize newline token --- main.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index 29f05f76222ea..b63d01f122e9f 100644 --- a/main.cpp +++ b/main.cpp @@ -254,6 +254,9 @@ int main(int argc, char ** argv) { params.interactive = true; } + //determine newline token + auto llama_token_newline = ::llama_tokenize(ctx, "\n", false); + fprintf(stderr, "\n"); fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str()); fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); @@ -357,7 +360,7 @@ int main(int argc, char ** argv) { // replace end of text token with newline token when in interactive mode if (id == llama_token_eos() && params.interactive) { - id = llama_token_newline(); + id = llama_token_newline.front(); if (params.antiprompt.size() != 0) { // tokenize and inject first reverse prompt first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false); From 88df270f6b5fc5b2e8922157beb2133c7400b386 Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Wed, 22 Mar 2023 19:44:00 -0500 Subject: [PATCH 24/25] add space to comment --- main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index cc77593cbee18..0403ef53a79a5 100644 --- a/main.cpp +++ b/main.cpp @@ -258,7 +258,7 @@ int main(int argc, char ** argv) { params.interactive = true; } - //determine newline token + // determine newline token auto llama_token_newline = ::llama_tokenize(ctx, "\n", false); fprintf(stderr, "\n"); From 55b899b8f2a0d638c7d4fc56894711b22dd5a841 Mon Sep 17 00:00:00 2001 From: rabidcopy Date: Thu, 23 Mar 2023 13:47:18 -0500 Subject: [PATCH 25/25] Update main.cpp Co-authored-by: Georgi Gerganov --- main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index 0403ef53a79a5..5ba6d5a7561dc 100644 --- a/main.cpp +++ b/main.cpp @@ -367,7 +367,7 @@ int main(int argc, char ** argv) { id = llama_token_newline.front(); if (params.antiprompt.size() != 0) { // tokenize and inject first reverse prompt - first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false); + const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false); embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end()); } }