Skip to content

Commit

Permalink
common : revert showing control tokens by default for server (#6860)
Browse files Browse the repository at this point in the history
* fix: revert showing control tokens by default

* feat: revert changes to default behavior of llama_token_to_piece; provide overridden declaration to receive "bool special" param to toggle showing control tokens

* feat: use the overridden declaration of llama_token_to_piece from common/common.cpp to specify "false" so that control tokens are not shown in chat completion responses"

* common : simplify

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
  • Loading branch information
K-Mistele and ggerganov authored Apr 24, 2024
1 parent 28103f4 commit 37246b1
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 6 deletions.
6 changes: 3 additions & 3 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2328,12 +2328,12 @@ std::vector<llama_token> llama_tokenize(
return result;
}

std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
std::vector<char> result(8, 0);
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), true);
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
if (n_tokens < 0) {
result.resize(-n_tokens);
int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), true);
int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
GGML_ASSERT(check == -n_tokens);
} else {
result.resize(n_tokens);
Expand Down
5 changes: 3 additions & 2 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,11 +237,12 @@ std::vector<llama_token> llama_tokenize(
bool add_special,
bool parse_special = false);

// tokenizes a token into a piece
// tokenizes a token into a piece, optionally renders special/control tokens
// should work similar to Python's `tokenizer.id_to_piece`
std::string llama_token_to_piece(
const struct llama_context * ctx,
llama_token token);
llama_token token,
bool special = true);

// TODO: these should be moved in llama.h C-style API under single `llama_detokenize` function
// that takes into account the tokenizer type and decides how to handle the leading space
Expand Down
2 changes: 1 addition & 1 deletion examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1117,7 +1117,7 @@ struct server_context {

bool process_token(completion_token_output & result, server_slot & slot) {
// remember which tokens were sampled - used for repetition penalties during sampling
const std::string token_str = llama_token_to_piece(ctx, result.tok);
const std::string token_str = llama_token_to_piece(ctx, result.tok, false);
slot.sampled = result.tok;

// search stop word and delete it
Expand Down

0 comments on commit 37246b1

Please sign in to comment.