From a5d742b72cd36034515f2be1a227b780080f40a6 Mon Sep 17 00:00:00 2001 From: sa_ddam213 Date: Mon, 28 Aug 2023 11:57:50 +1200 Subject: [PATCH] Fix Tokenize of new line, Remove space inserts --- LLama/LLamaEmbedder.cs | 4 ---- LLama/LLamaInstructExecutor.cs | 1 - LLama/LLamaInteractExecutor.cs | 3 +-- 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/LLama/LLamaEmbedder.cs b/LLama/LLamaEmbedder.cs index 5980d17c4..30d721635 100644 --- a/LLama/LLamaEmbedder.cs +++ b/LLama/LLamaEmbedder.cs @@ -65,10 +65,6 @@ public float[] GetEmbeddings(string text) /// public float[] GetEmbeddings(string text, bool addBos) { - if (addBos) - { - text = text.Insert(0, " "); - } var embed_inp_array = _ctx.Tokenize(text, addBos); diff --git a/LLama/LLamaInstructExecutor.cs b/LLama/LLamaInstructExecutor.cs index a6c8603ca..a7d53cc81 100644 --- a/LLama/LLamaInstructExecutor.cs +++ b/LLama/LLamaInstructExecutor.cs @@ -114,7 +114,6 @@ protected override void PreprocessInputs(string text, InferStateArgs args) if (_is_prompt_run) { // When running the first input (prompt) in inteactive mode, we should specially process it. - text = " " + text; _embed_inps = Context.Tokenize(text, true).ToList(); } else diff --git a/LLama/LLamaInteractExecutor.cs b/LLama/LLamaInteractExecutor.cs index 9b57e74f8..38d6b4436 100644 --- a/LLama/LLamaInteractExecutor.cs +++ b/LLama/LLamaInteractExecutor.cs @@ -26,7 +26,7 @@ public class InteractiveExecutor : StatefulExecutorBase /// public InteractiveExecutor(LLamaContext context) : base(context) { - _llama_token_newline = Context.NativeHandle.Tokenize("\n", false, Context.Encoding); + _llama_token_newline = new [] { NativeApi.llama_token_nl(Context.NativeHandle) }; } /// @@ -104,7 +104,6 @@ protected override void PreprocessInputs(string text, InferStateArgs args) if (_is_prompt_run) { // When running the first input (prompt) in inteactive mode, we should specially process it. - text = " " + text; _embed_inps = Context.Tokenize(text, true).ToList(); } else