Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

main : don't print special tokens with --grammar #6923

Merged
merged 15 commits into from
May 25, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -905,6 +905,10 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
params.interactive_specials = true;
return true;
}
if (arg == "--no-special") {
params.no_special = true;
return true;
}
if (arg == "--embedding") {
params.embedding = true;
return true;
Expand Down Expand Up @@ -1434,6 +1438,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
printf(" -i, --interactive run in interactive mode\n");
printf(" --interactive-specials allow special tokens in user text, in interactive mode\n");
printf(" --interactive-first run in interactive mode and wait for input right away\n");
printf(" --no-special control tokens output disabled\n");
printf(" -cnv, --conversation run in conversation mode (does not print special tokens and suffix/prefix)\n");
printf(" -ins, --instruct run in instruction mode (use with Alpaca models)\n");
printf(" -cml, --chatml run in chatml mode (use with ChatML-compatible models)\n");
Expand Down
1 change: 1 addition & 0 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ struct gpt_params {
bool use_color = false; // use color to distinguish generations and inputs
bool interactive = false; // interactive mode
bool interactive_specials = false; // whether to allow special tokens from user, during interactive mode
bool no_special = false; // disable control token output
bool conversation = false; // conversation mode (does not print special tokens and suffix/prefix)
bool chatml = false; // chatml mode (used for models trained on chatml syntax)
bool prompt_cache_all = false; // save user input and generations to prompt cache
Expand Down
43 changes: 39 additions & 4 deletions examples/main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
#include <signal.h>
#include <unistd.h>
#include <fcntl.h>
#define SPECIAL_FILENO 3
#elif defined (_WIN32)
#define WIN32_LEAN_AND_MEAN
#ifndef NOMINMAX
Expand Down Expand Up @@ -118,6 +120,16 @@ static void llama_log_callback_logTee(ggml_log_level level, const char * text, v
}

int main(int argc, char ** argv) {
#ifndef _MSC_VER
// Check if we have an external attachment to a file descriptor for out of band control tokens (e.g. bash `3>/dev/null` )
// Placed here to avoid file descriptor being polluted by gpt_params_parse() opening files
const bool control_token_file_descriptor_is_attached = fcntl(SPECIAL_FILENO, F_GETFL) != -1;
if (!control_token_file_descriptor_is_attached) {
// Duplicate stdout file descriptor to control token file descriptor to merge the two streams
dup2(STDOUT_FILENO, SPECIAL_FILENO);
}
#endif
mofosyne marked this conversation as resolved.
Show resolved Hide resolved

gpt_params params;
g_params = &params;

Expand All @@ -126,6 +138,8 @@ int main(int argc, char ** argv) {
}
llama_sampling_params & sparams = params.sparams;

const bool control_token_allowed_on_standard_stream = !params.conversation && sparams.grammar.empty();

#ifndef LOG_DISABLE_LOGS
log_set_target(log_filename_generator("main", "log"));
LOG_TEE("Log start\n");
Expand Down Expand Up @@ -740,18 +754,39 @@ int main(int argc, char ** argv) {
// display text
if (input_echo && display) {
for (auto id : embd) {
const std::string token_str = llama_token_to_piece(ctx, id, !params.conversation);
printf("%s", token_str.c_str());

const std::string token_str = llama_token_to_piece(ctx, id);

// Console/Stream Output
if (!llama_token_is_control(llama_get_model(ctx), id)) {
// Stream Output Token To Standard Output
fprintf(stdout, "%s", token_str.c_str());
} else if (!params.no_special) {
#ifndef _MSC_VER
if (control_token_file_descriptor_is_attached) {
// Stream Control Token To Special Token Output. Useful for debugging control token behaviour
(void)! write(SPECIAL_FILENO, token_str.c_str(), token_str.length());
} else
#endif
if (control_token_allowed_on_standard_stream)
{
// Stream Control Token To Standard Output Stream
fprintf(stdout, "%s", token_str.c_str());
}
}
// Record Displayed Tokens To Log
// Note: Generated tokens are created one by one hence this check
if (embd.size() > 1) {
// Incoming Requested Tokens
input_tokens.push_back(id);
} else {
// Outgoing Generated Tokens
output_tokens.push_back(id);
output_ss << token_str;
}
fflush(stdout);
}
fflush(stdout);
}

// reset color to default if there is no pending user input
if (input_echo && (int) embd_inp.size() == n_consumed) {
console::set_display(console::reset);
Expand Down
4 changes: 4 additions & 0 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17347,6 +17347,10 @@ bool llama_token_is_eog(const struct llama_model * model, llama_token token) {
);
}

bool llama_token_is_control(const struct llama_model * model, llama_token token) {
return llama_is_control_token(model->vocab, token);
}

llama_token llama_token_bos(const struct llama_model * model) {
return model->vocab.special_bos_id;
}
Expand Down
3 changes: 3 additions & 0 deletions llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -817,6 +817,9 @@ extern "C" {
// Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)
LLAMA_API bool llama_token_is_eog(const struct llama_model * model, llama_token token);

// Identify if Token Id is a control token or a render-able token
LLAMA_API bool llama_token_is_control(const struct llama_model * model, llama_token token);

// Special tokens
LLAMA_API llama_token llama_token_bos(const struct llama_model * model); // beginning-of-sentence
LLAMA_API llama_token llama_token_eos(const struct llama_model * model); // end-of-sentence
Expand Down
Loading