Make CLI chatbot work better with base models

Mozilla-Ocho · Nov 23, 2024 · 12c3761 · 12c3761
1 parent 241bf21
commit 12c3761
Show file tree

Hide file tree

Showing 5 changed files with 52 additions and 12 deletions.
diff --git a/llamafile/chatbot.h b/llamafile/chatbot.h
@@ -21,6 +21,11 @@
 #include <__fwd/vector.h>
 #include <signal.h>
 
+#define DEFAULT_SYSTEM_PROMPT \
+    "A chat between a curious human and an artificial intelligence assistant. " \
+    "The assistant gives helpful, detailed, and polite answers to the " \
+    "human's questions."
+
 struct bestlineCompletions;
 struct clip_ctx;
 struct gpt_params;
@@ -58,6 +63,7 @@ bool eval_string(std::string_view, bool, bool);
 bool eval_token(int);
 bool eval_tokens(std::vector<int>);
 bool handle_command(const char *);
+bool is_base_model();
 bool out_of_context(int);
 char *on_hint(const char *, const char **, const char **);
 const char *get_role_color(enum Role);

diff --git a/llamafile/chatbot_hint.cpp b/llamafile/chatbot_hint.cpp
@@ -27,8 +27,13 @@ namespace chatbot {
 static const char *on_hint_impl(const char *line) {
     if (!*line && g_manual_mode)
         return get_role_name(g_role);
-    if (!*line && !g_manual_mode && !g_said_something)
-        return "say something (or type /help for help)";
+    if (!*line && !g_manual_mode && !g_said_something) {
+        if (is_base_model()) {
+            return "type text to be completed (or /help for help)";
+        } else {
+            return "say something (or type /help for help)";
+        }
+    }
     static const char *const kHints[] = {
         "/clear", //
         "/context", //

diff --git a/llamafile/chatbot_hist.cpp b/llamafile/chatbot_hist.cpp
@@ -219,6 +219,10 @@ void rewind(int pos) {
 }
 
 void on_manual(const std::vector<std::string> &args) {
+    if (is_base_model()) {
+        err("error: /manual mode not supported on base models");
+        return;
+    }
     if (args.size() == 1) {
         g_manual_mode = !g_manual_mode;
     } else if (args.size() == 2 && (args[1] == "on" || args[1] == "off")) {

diff --git a/llamafile/chatbot_main.cpp b/llamafile/chatbot_main.cpp
@@ -95,6 +95,17 @@ const char *tip() {
     return " (use the --verbose flag for further details)";
 }
 
+bool is_base_model() {
+
+    // check if user explicitly passed --chat-template flag
+    if (!g_params.chat_template.empty())
+        return false;
+
+    // check if gguf metadata has chat template. this should always be
+    // present for "instruct" models, and never specified on base ones
+    return llama_model_meta_val_str(g_model, "tokenizer.chat_template", 0, 0) == -1;
+}
+
 int main(int argc, char **argv) {
 
     // print logo
@@ -107,9 +118,7 @@ int main(int argc, char **argv) {
     // override defaults for some flags
     g_params.n_batch = 256; // for better progress indication
     g_params.sparams.temp = 0; // don't believe in randomness by default
-    g_params.prompt = "A chat between a curious human and an artificial intelligence assistant. "
-                      "The assistant gives helpful, detailed, and polite answers to the "
-                      "human's questions.";
+    g_params.prompt = DEFAULT_SYSTEM_PROMPT;
 
     // parse flags (sadly initializes gpu support as side-effect)
     print_ephemeral("loading backend...");
@@ -158,6 +167,8 @@ int main(int argc, char **argv) {
         printf(BOLD "software" UNBOLD ": llamafile " LLAMAFILE_VERSION_STRING "\n" //
                BOLD "model" UNBOLD ":    %s\n",
                basename(g_params.model).c_str());
+        if (is_base_model())
+            printf(BOLD "mode" UNBOLD ":     RAW TEXT COMPLETION (base model)\n");
         printf(BOLD "compute" UNBOLD ":  %s\n", describe_compute().c_str());
         if (want_server)
             printf(BOLD "server" UNBOLD ":   %s\n", g_listen_url.c_str());

diff --git a/llamafile/chatbot_repl.cpp b/llamafile/chatbot_repl.cpp
@@ -104,12 +104,21 @@ void repl() {
     }
     record_undo();
 
+    // make base models have no system prompt by default
+    if (is_base_model() && g_params.prompt == DEFAULT_SYSTEM_PROMPT)
+        g_params.prompt = "";
+
     // setup system prompt
     if (!g_params.prompt.empty()) {
         print_ephemeral("loading system prompt...");
-        std::vector<llama_chat_msg> chat = {{"system", g_params.prompt}};
-        std::string msg =
-            llama_chat_apply_template(g_model, g_params.chat_template, chat, DONT_ADD_ASSISTANT);
+        std::string msg;
+        if (is_base_model()) {
+            msg = g_params.prompt;
+        } else {
+            std::vector<llama_chat_msg> chat = {{"system", g_params.prompt}};
+            msg = llama_chat_apply_template(g_model, g_params.chat_template, chat,
+                                            DONT_ADD_ASSISTANT);
+        }
         if (!eval_string(msg, DONT_ADD_SPECIAL, PARSE_SPECIAL))
             exit(6);
         llama_synchronize(g_ctx);
@@ -135,12 +144,13 @@ void repl() {
         write(1, get_role_color(g_role), strlen(get_role_color(g_role)));
         char *line = bestlineWithHistory(">>> ", "llamafile");
         write(1, UNFOREGROUND, strlen(UNFOREGROUND));
+        g_last_printed_char = '\n';
         if (!line) {
             if (g_got_sigint)
                 ensure_newline();
             break;
         }
-        if (is_empty(line)) {
+        if (!is_base_model() && is_empty(line)) {
             if (g_manual_mode) {
                 g_role = cycle_role(g_role);
                 write(1, "\033[F", 3);
@@ -155,9 +165,13 @@ void repl() {
         }
         bool add_assi = !g_manual_mode;
         int tokens_used_before = tokens_used();
-        std::vector<llama_chat_msg> chat = {{get_role_name(g_role), line}};
-        std::string msg =
-            llama_chat_apply_template(g_model, g_params.chat_template, chat, add_assi);
+        std::string msg;
+        if (is_base_model()) {
+            msg = line;
+        } else {
+            std::vector<llama_chat_msg> chat = {{get_role_name(g_role), line}};
+            msg = llama_chat_apply_template(g_model, g_params.chat_template, chat, add_assi);
+        }
         if (!eval_string(msg, DONT_ADD_SPECIAL, PARSE_SPECIAL)) {
             rewind(tokens_used_before);
             continue;