Skip to content

Commit

Permalink
Make CLI chatbot work better with base models
Browse files Browse the repository at this point in the history
  • Loading branch information
jart committed Nov 23, 2024
1 parent 241bf21 commit 12c3761
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 12 deletions.
6 changes: 6 additions & 0 deletions llamafile/chatbot.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@
#include <__fwd/vector.h>
#include <signal.h>

#define DEFAULT_SYSTEM_PROMPT \
"A chat between a curious human and an artificial intelligence assistant. " \
"The assistant gives helpful, detailed, and polite answers to the " \
"human's questions."

struct bestlineCompletions;
struct clip_ctx;
struct gpt_params;
Expand Down Expand Up @@ -58,6 +63,7 @@ bool eval_string(std::string_view, bool, bool);
bool eval_token(int);
bool eval_tokens(std::vector<int>);
bool handle_command(const char *);
bool is_base_model();
bool out_of_context(int);
char *on_hint(const char *, const char **, const char **);
const char *get_role_color(enum Role);
Expand Down
9 changes: 7 additions & 2 deletions llamafile/chatbot_hint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,13 @@ namespace chatbot {
static const char *on_hint_impl(const char *line) {
if (!*line && g_manual_mode)
return get_role_name(g_role);
if (!*line && !g_manual_mode && !g_said_something)
return "say something (or type /help for help)";
if (!*line && !g_manual_mode && !g_said_something) {
if (is_base_model()) {
return "type text to be completed (or /help for help)";
} else {
return "say something (or type /help for help)";
}
}
static const char *const kHints[] = {
"/clear", //
"/context", //
Expand Down
4 changes: 4 additions & 0 deletions llamafile/chatbot_hist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,10 @@ void rewind(int pos) {
}

void on_manual(const std::vector<std::string> &args) {
if (is_base_model()) {
err("error: /manual mode not supported on base models");
return;
}
if (args.size() == 1) {
g_manual_mode = !g_manual_mode;
} else if (args.size() == 2 && (args[1] == "on" || args[1] == "off")) {
Expand Down
17 changes: 14 additions & 3 deletions llamafile/chatbot_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,17 @@ const char *tip() {
return " (use the --verbose flag for further details)";
}

bool is_base_model() {

// check if user explicitly passed --chat-template flag
if (!g_params.chat_template.empty())
return false;

// check if gguf metadata has chat template. this should always be
// present for "instruct" models, and never specified on base ones
return llama_model_meta_val_str(g_model, "tokenizer.chat_template", 0, 0) == -1;
}

int main(int argc, char **argv) {

// print logo
Expand All @@ -107,9 +118,7 @@ int main(int argc, char **argv) {
// override defaults for some flags
g_params.n_batch = 256; // for better progress indication
g_params.sparams.temp = 0; // don't believe in randomness by default
g_params.prompt = "A chat between a curious human and an artificial intelligence assistant. "
"The assistant gives helpful, detailed, and polite answers to the "
"human's questions.";
g_params.prompt = DEFAULT_SYSTEM_PROMPT;

// parse flags (sadly initializes gpu support as side-effect)
print_ephemeral("loading backend...");
Expand Down Expand Up @@ -158,6 +167,8 @@ int main(int argc, char **argv) {
printf(BOLD "software" UNBOLD ": llamafile " LLAMAFILE_VERSION_STRING "\n" //
BOLD "model" UNBOLD ": %s\n",
basename(g_params.model).c_str());
if (is_base_model())
printf(BOLD "mode" UNBOLD ": RAW TEXT COMPLETION (base model)\n");
printf(BOLD "compute" UNBOLD ": %s\n", describe_compute().c_str());
if (want_server)
printf(BOLD "server" UNBOLD ": %s\n", g_listen_url.c_str());
Expand Down
28 changes: 21 additions & 7 deletions llamafile/chatbot_repl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,21 @@ void repl() {
}
record_undo();

// make base models have no system prompt by default
if (is_base_model() && g_params.prompt == DEFAULT_SYSTEM_PROMPT)
g_params.prompt = "";

// setup system prompt
if (!g_params.prompt.empty()) {
print_ephemeral("loading system prompt...");
std::vector<llama_chat_msg> chat = {{"system", g_params.prompt}};
std::string msg =
llama_chat_apply_template(g_model, g_params.chat_template, chat, DONT_ADD_ASSISTANT);
std::string msg;
if (is_base_model()) {
msg = g_params.prompt;
} else {
std::vector<llama_chat_msg> chat = {{"system", g_params.prompt}};
msg = llama_chat_apply_template(g_model, g_params.chat_template, chat,
DONT_ADD_ASSISTANT);
}
if (!eval_string(msg, DONT_ADD_SPECIAL, PARSE_SPECIAL))
exit(6);
llama_synchronize(g_ctx);
Expand All @@ -135,12 +144,13 @@ void repl() {
write(1, get_role_color(g_role), strlen(get_role_color(g_role)));
char *line = bestlineWithHistory(">>> ", "llamafile");
write(1, UNFOREGROUND, strlen(UNFOREGROUND));
g_last_printed_char = '\n';
if (!line) {
if (g_got_sigint)
ensure_newline();
break;
}
if (is_empty(line)) {
if (!is_base_model() && is_empty(line)) {
if (g_manual_mode) {
g_role = cycle_role(g_role);
write(1, "\033[F", 3);
Expand All @@ -155,9 +165,13 @@ void repl() {
}
bool add_assi = !g_manual_mode;
int tokens_used_before = tokens_used();
std::vector<llama_chat_msg> chat = {{get_role_name(g_role), line}};
std::string msg =
llama_chat_apply_template(g_model, g_params.chat_template, chat, add_assi);
std::string msg;
if (is_base_model()) {
msg = line;
} else {
std::vector<llama_chat_msg> chat = {{get_role_name(g_role), line}};
msg = llama_chat_apply_template(g_model, g_params.chat_template, chat, add_assi);
}
if (!eval_string(msg, DONT_ADD_SPECIAL, PARSE_SPECIAL)) {
rewind(tokens_used_before);
continue;
Expand Down

0 comments on commit 12c3761

Please sign in to comment.