From d19df2c5b9647bc4f0a5cf1ece08bface5f35354 Mon Sep 17 00:00:00 2001 From: Kai Zau Date: Sat, 30 Mar 2024 09:43:47 +0900 Subject: [PATCH 01/14] Add openchat chat template --- llama.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/llama.cpp b/llama.cpp index 21e7a067af65f..ed7ef7e0a3e51 100644 --- a/llama.cpp +++ b/llama.cpp @@ -15697,6 +15697,22 @@ static int32_t llama_chat_apply_template_internal( ss << message->content << ""; } } + } else if (tmpl == "openchat" || tmpl.find("GPT4 Correct ") != std::string::npos) { + // Openchat, Starling + for (auto message : chat) { + std::string role(message->role); + if (role == "user") { + ss << "GPT4 Correct User: "; + } else if (role == "assistant") { + ss << "GPT4 Correct Assistant: "; + } + // Not documented, but apparently the system message is prepended without prefix: + // https://huggingface.co/openchat/openchat_3.5/discussions/5#65448109b4a3f3a2f486fd9d + ss << message->content << "<|end_of_turn|>"; + } + if (add_ass) { + ss << "GPT4 Correct Assistant: "; + } } else { // template not supported return -1; From 0d24c6af89e60d4e77ff4fe37fcca6a2d8f7433d Mon Sep 17 00:00:00 2001 From: Kai Zau Date: Sat, 30 Mar 2024 10:52:55 +0900 Subject: [PATCH 02/14] Add chat template test for openchat --- tests/test-chat-template.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index 6e9e4bd1ea2cc..f063ba15f144a 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -33,6 +33,8 @@ int main(void) { "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\\n' + message['content'] | trim + '\\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\\n'}}{% endif %}", // OrionStarAI/Orion-14B-Chat "{% for message in messages %}{% if loop.first %}{{ bos_token }}{% endif %}{% if message['role'] == 'user' %}{{ 'Human: ' + message['content'] + '\\n\\nAssistant: ' + eos_token }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}", + // openchat/openchat-3.5-0106 + "{{ bos_token }}{% for message in messages %}{{ 'GPT4 Correct ' + message['role'].title() + ': ' + message['content'] + '<|end_of_turn|>'}}{% endfor %}{% if add_generation_prompt %}{{ 'GPT4 Correct Assistant:' }}{% endif %}" }; std::vector expected_output = { // teknium/OpenHermes-2.5-Mistral-7B @@ -49,6 +51,8 @@ int main(void) { "user\nYou are a helpful assistant\n\nHello\nmodel\nHi there\nuser\nWho are you\nmodel\nI am an assistant\nuser\nAnother question\nmodel\n", // OrionStarAI/Orion-14B-Chat "Human: You are a helpful assistant\n\nHello\n\nAssistant: Hi thereHuman: Who are you\n\nAssistant: I am an assistant Human: Another question\n\nAssistant: ", + // openchat/openchat-3.5-0106 + "You are a helpful assistant<|end_of_turn|>GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi there<|end_of_turn|>GPT4 Correct User: Who are you?<|end_of_turn|>GPT4 Correct Assistant: I am an assistant <|end_of_turn|>GPT4 Correct User: Another question<|end_of_turn|>GPT4 Correct Assistant:" }; std::vector formatted_chat(1024); int32_t res; From f6104b9b774d4575ed0308632f2c52129a9a97fb Mon Sep 17 00:00:00 2001 From: Kai Zau Date: Sat, 30 Mar 2024 11:23:18 +0900 Subject: [PATCH 03/14] Add chat template for vicuna --- llama.cpp | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index ed7ef7e0a3e51..fa473e5be93d8 100644 --- a/llama.cpp +++ b/llama.cpp @@ -15711,7 +15711,24 @@ static int32_t llama_chat_apply_template_internal( ss << message->content << "<|end_of_turn|>"; } if (add_ass) { - ss << "GPT4 Correct Assistant: "; + ss << "GPT4 Correct Assistant:"; + } + } else if (tmpl == "vicuna" || tmpl.find("USER: ") != std::string::npos) { + for (auto message : chat) { + std::string role(message->role); + if (role == "user") { + ss << "USER: "; + } else if (role == "assistant") { + ss << "ASSISTANT: "; + } + ss << message->content << "\n"; + if (role == "system") { + // Extra newline after system message + ss << "\n"; + } + } + if (add_ass) { + ss << "ASSISTANT:"; } } else { // template not supported From e0f9d9d73243699e0e98ed566d8b7bb3a265d0e4 Mon Sep 17 00:00:00 2001 From: Kai Zau Date: Sat, 30 Mar 2024 14:41:43 +0900 Subject: [PATCH 04/14] Add chat template for orca-vicuna --- llama.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/llama.cpp b/llama.cpp index fa473e5be93d8..abff2304cb3d6 100644 --- a/llama.cpp +++ b/llama.cpp @@ -15714,6 +15714,7 @@ static int32_t llama_chat_apply_template_internal( ss << "GPT4 Correct Assistant:"; } } else if (tmpl == "vicuna" || tmpl.find("USER: ") != std::string::npos) { + // Vicuna 1.1+, Nous Capybara, etc. for (auto message : chat) { std::string role(message->role); if (role == "user") { @@ -15730,6 +15731,22 @@ static int32_t llama_chat_apply_template_internal( if (add_ass) { ss << "ASSISTANT:"; } + } else if (tmpl == "orca-vicuna" || tmpl.find("SYSTEM: ") != std::string::npos) { + // Orca-Vicuna + for (auto message : chat) { + std::string role(message->role); + if (role == "system") { + ss << "SYSTEM: "; + } else if (role == "user") { + ss << "USER: "; + } else if (role == "assistant") { + ss << "ASSISTANT: "; + } + ss << message->content << "\n"; + } + if (add_ass) { + ss << "ASSISTANT:"; + } } else { // template not supported return -1; From e423aa1adf648ae15f549af2047563b31ce4dd84 Mon Sep 17 00:00:00 2001 From: Kai Zau Date: Sat, 30 Mar 2024 14:54:12 +0900 Subject: [PATCH 05/14] Add EOS for vicuna templates --- llama.cpp | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/llama.cpp b/llama.cpp index abff2304cb3d6..0679759b54c7d 100644 --- a/llama.cpp +++ b/llama.cpp @@ -15717,15 +15717,12 @@ static int32_t llama_chat_apply_template_internal( // Vicuna 1.1+, Nous Capybara, etc. for (auto message : chat) { std::string role(message->role); - if (role == "user") { - ss << "USER: "; - } else if (role == "assistant") { - ss << "ASSISTANT: "; - } - ss << message->content << "\n"; if (role == "system") { - // Extra newline after system message - ss << "\n"; + ss << message->content << "\n\n"; + } else if (role == "user") { + ss << "USER: " << message->content << "\n"; + } else if (role == "assistant") { + ss << "ASSISTANT: " << message->content << "\n"; } } if (add_ass) { @@ -15736,13 +15733,12 @@ static int32_t llama_chat_apply_template_internal( for (auto message : chat) { std::string role(message->role); if (role == "system") { - ss << "SYSTEM: "; + ss << "SYSTEM: " << message->content << "\n"; } else if (role == "user") { - ss << "USER: "; + ss << "USER: " << message->content << "\n"; } else if (role == "assistant") { - ss << "ASSISTANT: "; + ss << "ASSISTANT: " << message->content << "\n"; } - ss << message->content << "\n"; } if (add_ass) { ss << "ASSISTANT:"; From 5305d6822ab4f35d05f737091fd486755b710081 Mon Sep 17 00:00:00 2001 From: Kai Zau Date: Sat, 30 Mar 2024 17:47:37 +0900 Subject: [PATCH 06/14] Combine vicuna chat templates --- llama.cpp | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/llama.cpp b/llama.cpp index 0679759b54c7d..580a4e53b6867 100644 --- a/llama.cpp +++ b/llama.cpp @@ -15698,7 +15698,7 @@ static int32_t llama_chat_apply_template_internal( } } } else if (tmpl == "openchat" || tmpl.find("GPT4 Correct ") != std::string::npos) { - // Openchat, Starling + // openchat/openchat-3.5-0106, for (auto message : chat) { std::string role(message->role); if (role == "user") { @@ -15713,27 +15713,17 @@ static int32_t llama_chat_apply_template_internal( if (add_ass) { ss << "GPT4 Correct Assistant:"; } - } else if (tmpl == "vicuna" || tmpl.find("USER: ") != std::string::npos) { - // Vicuna 1.1+, Nous Capybara, etc. + } else if (tmpl == "vicuna" || (tmpl.find("ASSISTANT: ") != std::string::npos && tmpl.find("USER: ") != std::string::npos)) { + // eachadea/vicuna-13b-1.1 (and Orca variant) for (auto message : chat) { std::string role(message->role); if (role == "system") { - ss << message->content << "\n\n"; - } else if (role == "user") { - ss << "USER: " << message->content << "\n"; - } else if (role == "assistant") { - ss << "ASSISTANT: " << message->content << "\n"; - } - } - if (add_ass) { - ss << "ASSISTANT:"; - } - } else if (tmpl == "orca-vicuna" || tmpl.find("SYSTEM: ") != std::string::npos) { - // Orca-Vicuna - for (auto message : chat) { - std::string role(message->role); - if (role == "system") { - ss << "SYSTEM: " << message->content << "\n"; + // Orca-Vicuna variant uses a system prefix + if (tmpl.find("SYSTEM: ") != std::string::npos) { + ss << "SYSTEM: " << message->content << "\n"; + } else { + ss << message->content << "\n\n"; + } } else if (role == "user") { ss << "USER: " << message->content << "\n"; } else if (role == "assistant") { From c708544cd63f1a86a11031051e49d496a0dca0f0 Mon Sep 17 00:00:00 2001 From: Kai Zau Date: Sat, 30 Mar 2024 17:48:15 +0900 Subject: [PATCH 07/14] Add tests for openchat and vicuna chat templates --- tests/test-chat-template.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index f063ba15f144a..96b136908bb9e 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -33,8 +33,14 @@ int main(void) { "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\\n' + message['content'] | trim + '\\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\\n'}}{% endif %}", // OrionStarAI/Orion-14B-Chat "{% for message in messages %}{% if loop.first %}{{ bos_token }}{% endif %}{% if message['role'] == 'user' %}{{ 'Human: ' + message['content'] + '\\n\\nAssistant: ' + eos_token }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}", - // openchat/openchat-3.5-0106 - "{{ bos_token }}{% for message in messages %}{{ 'GPT4 Correct ' + message['role'].title() + ': ' + message['content'] + '<|end_of_turn|>'}}{% endfor %}{% if add_generation_prompt %}{{ 'GPT4 Correct Assistant:' }}{% endif %}" + // openchat/openchat-3.5-0106, + "{{ bos_token }}{% for message in messages %}{{ 'GPT4 Correct ' + message['role'].title() + ': ' + message['content'] + '<|end_of_turn|>'}}{% endfor %}{% if add_generation_prompt %}{{ 'GPT4 Correct Assistant:' }}{% endif %}", + // eachadea/vicuna-13b-1.1, + // No template included in config_tokenizer.json; extracted from https://github.com/oobabooga/text-generation-webui/blob/main/instruction-templates/Vicuna-v1.1.yaml + "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{- '' + message['content'] + '\n\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}", + // Orca-Vicuna + // No template included in config_tokenizer.json; extracted from: https://github.com/oobabooga/text-generation-webui/blob/main/instruction-templates/Orca-Vicuna.yaml + "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{-'SYSTEM: ' + message['content'] + '\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}" }; std::vector expected_output = { // teknium/OpenHermes-2.5-Mistral-7B @@ -52,7 +58,11 @@ int main(void) { // OrionStarAI/Orion-14B-Chat "Human: You are a helpful assistant\n\nHello\n\nAssistant: Hi thereHuman: Who are you\n\nAssistant: I am an assistant Human: Another question\n\nAssistant: ", // openchat/openchat-3.5-0106 - "You are a helpful assistant<|end_of_turn|>GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi there<|end_of_turn|>GPT4 Correct User: Who are you?<|end_of_turn|>GPT4 Correct Assistant: I am an assistant <|end_of_turn|>GPT4 Correct User: Another question<|end_of_turn|>GPT4 Correct Assistant:" + "You are a helpful assistant<|end_of_turn|>GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi there<|end_of_turn|>GPT4 Correct User: Who are you<|end_of_turn|>GPT4 Correct Assistant: I am an assistant <|end_of_turn|>GPT4 Correct User: Another question<|end_of_turn|>GPT4 Correct Assistant:", + // eachadea/vicuna-13b-1.1 + "You are a helpful assistant\n\nUSER: Hello\nASSISTANT: Hi there\nUSER: Who are you\nASSISTANT: I am an assistant \nUSER: Another question\nASSISTANT:", + // Orca-Vicuna + "SYSTEM: You are a helpful assistant\nUSER: Hello\nASSISTANT: Hi there\nUSER: Who are you\nASSISTANT: I am an assistant \nUSER: Another question\nASSISTANT:" }; std::vector formatted_chat(1024); int32_t res; From f1a3b12ceda2c3c212d73cdaa376cf6d1775986b Mon Sep 17 00:00:00 2001 From: Kai Zau Date: Sat, 30 Mar 2024 19:04:49 +0900 Subject: [PATCH 08/14] Add chat template for alpaca --- llama.cpp | 16 ++++++++++++++++ tests/test-chat-template.cpp | 9 +++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/llama.cpp b/llama.cpp index 580a4e53b6867..7a694e2baae3e 100644 --- a/llama.cpp +++ b/llama.cpp @@ -15733,6 +15733,22 @@ static int32_t llama_chat_apply_template_internal( if (add_ass) { ss << "ASSISTANT:"; } + } else if (tmpl == "alpaca" || tmpl.find("### Instruction:\\n") != std::string::npos) { + // wxjiao/alpaca-7b, deepseek-ai/deepseek-coder-33b-instruct + for (auto message : chat) { + std::string role(message->role); + if (role == "system") { + ss << message->content << "\n\n"; + } else if (role == "user") { + ss << "### Instruction:\n" << message->content << "\n\n"; + } else if (role == "assistant") { + ss << "### Response:\n" << message->content << "\n\n"; + } + } + if (add_ass) { + ss << "### Response:\n"; + } + } else { // template not supported return -1; diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index 96b136908bb9e..ba1bc0fd84414 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -40,7 +40,10 @@ int main(void) { "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{- '' + message['content'] + '\n\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}", // Orca-Vicuna // No template included in config_tokenizer.json; extracted from: https://github.com/oobabooga/text-generation-webui/blob/main/instruction-templates/Orca-Vicuna.yaml - "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{-'SYSTEM: ' + message['content'] + '\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}" + "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{-'SYSTEM: ' + message['content'] + '\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}", + // wxjiao/alpaca-7b, deepseek-ai/deepseek-coder-33b-instruct + // No original Alpaca template Jinja, so using DeepSeek's instead + "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n {%- else %}\n {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}" }; std::vector expected_output = { // teknium/OpenHermes-2.5-Mistral-7B @@ -62,7 +65,9 @@ int main(void) { // eachadea/vicuna-13b-1.1 "You are a helpful assistant\n\nUSER: Hello\nASSISTANT: Hi there\nUSER: Who are you\nASSISTANT: I am an assistant \nUSER: Another question\nASSISTANT:", // Orca-Vicuna - "SYSTEM: You are a helpful assistant\nUSER: Hello\nASSISTANT: Hi there\nUSER: Who are you\nASSISTANT: I am an assistant \nUSER: Another question\nASSISTANT:" + "SYSTEM: You are a helpful assistant\nUSER: Hello\nASSISTANT: Hi there\nUSER: Who are you\nASSISTANT: I am an assistant \nUSER: Another question\nASSISTANT:", + // wxjiao/alpaca-7b, deepseek-ai/deepseek-coder-33b-instruct + "You are a helpful assistant\n\n### Instruction:\nHello\n\n### Response:\nHi there\n\n### Instruction:\nWho are you\n\n### Response:\n I am an assistant \n\n### Instruction:\nAnother question\n\n### Response:\n", }; std::vector formatted_chat(1024); int32_t res; From a4986dd52eb156e03bef5c6d339150fadfe7ff7f Mon Sep 17 00:00:00 2001 From: Kai Zau Date: Sat, 30 Mar 2024 19:29:27 +0900 Subject: [PATCH 09/14] Add separate template name for vicuna-orca --- llama.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama.cpp b/llama.cpp index 7a694e2baae3e..5d653efd34dab 100644 --- a/llama.cpp +++ b/llama.cpp @@ -15713,13 +15713,13 @@ static int32_t llama_chat_apply_template_internal( if (add_ass) { ss << "GPT4 Correct Assistant:"; } - } else if (tmpl == "vicuna" || (tmpl.find("ASSISTANT: ") != std::string::npos && tmpl.find("USER: ") != std::string::npos)) { + } else if (tmpl == "vicuna" || tmpl == "vicuna-orca" || (tmpl.find("USER: ") != std::string::npos && tmpl.find("ASSISTANT: ") != std::string::npos)) { // eachadea/vicuna-13b-1.1 (and Orca variant) for (auto message : chat) { std::string role(message->role); if (role == "system") { // Orca-Vicuna variant uses a system prefix - if (tmpl.find("SYSTEM: ") != std::string::npos) { + if (tmpl == "vicuna-orca" || tmpl.find("SYSTEM: ") != std::string::npos) { ss << "SYSTEM: " << message->content << "\n"; } else { ss << message->content << "\n\n"; From d297225e982a3ee83342d5270d9d66317f766b09 Mon Sep 17 00:00:00 2001 From: Kai Zau Date: Mon, 1 Apr 2024 16:40:07 +0900 Subject: [PATCH 10/14] Remove alpaca, match deepseek with jinja output --- llama.cpp | 15 ++++++--------- tests/test-chat-template.cpp | 7 +++---- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/llama.cpp b/llama.cpp index 5d653efd34dab..31e1af383cfc6 100644 --- a/llama.cpp +++ b/llama.cpp @@ -15733,22 +15733,19 @@ static int32_t llama_chat_apply_template_internal( if (add_ass) { ss << "ASSISTANT:"; } - } else if (tmpl == "alpaca" || tmpl.find("### Instruction:\\n") != std::string::npos) { - // wxjiao/alpaca-7b, deepseek-ai/deepseek-coder-33b-instruct + } else if (tmpl == "deepseek" || (tmpl.find("### Instruction:") != std::string::npos && tmpl.find("<|EOT|>") != std::string::npos)) { + // deepseek-ai/deepseek-coder-33b-instruct + // Use of both U+ff5c and U+007c pipes is deliberate, based on the Jinja template for (auto message : chat) { std::string role(message->role); if (role == "system") { - ss << message->content << "\n\n"; + ss << "<|begin▁of▁sentence|>" << message->content; } else if (role == "user") { - ss << "### Instruction:\n" << message->content << "\n\n"; + ss << "### Instruction:\n" << message->content << "\n"; } else if (role == "assistant") { - ss << "### Response:\n" << message->content << "\n\n"; + ss << "### Response:\n" << message->content << "\n<|EOT|>\n"; } } - if (add_ass) { - ss << "### Response:\n"; - } - } else { // template not supported return -1; diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index ba1bc0fd84414..b8f1c24d8ea2b 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -41,8 +41,7 @@ int main(void) { // Orca-Vicuna // No template included in config_tokenizer.json; extracted from: https://github.com/oobabooga/text-generation-webui/blob/main/instruction-templates/Orca-Vicuna.yaml "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{-'SYSTEM: ' + message['content'] + '\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}", - // wxjiao/alpaca-7b, deepseek-ai/deepseek-coder-33b-instruct - // No original Alpaca template Jinja, so using DeepSeek's instead + // deepseek-ai/deepseek-coder-33b-instruct "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n {%- else %}\n {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}" }; std::vector expected_output = { @@ -66,8 +65,8 @@ int main(void) { "You are a helpful assistant\n\nUSER: Hello\nASSISTANT: Hi there\nUSER: Who are you\nASSISTANT: I am an assistant \nUSER: Another question\nASSISTANT:", // Orca-Vicuna "SYSTEM: You are a helpful assistant\nUSER: Hello\nASSISTANT: Hi there\nUSER: Who are you\nASSISTANT: I am an assistant \nUSER: Another question\nASSISTANT:", - // wxjiao/alpaca-7b, deepseek-ai/deepseek-coder-33b-instruct - "You are a helpful assistant\n\n### Instruction:\nHello\n\n### Response:\nHi there\n\n### Instruction:\nWho are you\n\n### Response:\n I am an assistant \n\n### Instruction:\nAnother question\n\n### Response:\n", + // deepseek-ai/deepseek-coder-33b-instruct + "<|begin▁of▁sentence|>You are a helpful assistant### Instruction:\nHello\n### Response:\nHi there\n<|EOT|>\n### Instruction:\nWho are you\n### Response:\n I am an assistant \n<|EOT|>\n### Instruction:\nAnother question\n" }; std::vector formatted_chat(1024); int32_t res; From 9165380c5239aade66319e570033a7f37e3d3faf Mon Sep 17 00:00:00 2001 From: Kai Zau Date: Mon, 1 Apr 2024 19:21:43 +0800 Subject: [PATCH 11/14] Regenerate chat template test with add_generation_prompt --- llama.cpp | 3 +++ tests/test-chat-template.cpp | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/llama.cpp b/llama.cpp index 31e1af383cfc6..28dae85c4b418 100644 --- a/llama.cpp +++ b/llama.cpp @@ -15746,6 +15746,9 @@ static int32_t llama_chat_apply_template_internal( ss << "### Response:\n" << message->content << "\n<|EOT|>\n"; } } + if (add_ass) { + ss << "### Response:\n"; + } } else { // template not supported return -1; diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index b8f1c24d8ea2b..440ffe8aa2807 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -42,7 +42,7 @@ int main(void) { // No template included in config_tokenizer.json; extracted from: https://github.com/oobabooga/text-generation-webui/blob/main/instruction-templates/Orca-Vicuna.yaml "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{-'SYSTEM: ' + message['content'] + '\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}", // deepseek-ai/deepseek-coder-33b-instruct - "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n {%- else %}\n {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}" + "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n {%- else %}\n {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}", }; std::vector expected_output = { // teknium/OpenHermes-2.5-Mistral-7B @@ -66,7 +66,7 @@ int main(void) { // Orca-Vicuna "SYSTEM: You are a helpful assistant\nUSER: Hello\nASSISTANT: Hi there\nUSER: Who are you\nASSISTANT: I am an assistant \nUSER: Another question\nASSISTANT:", // deepseek-ai/deepseek-coder-33b-instruct - "<|begin▁of▁sentence|>You are a helpful assistant### Instruction:\nHello\n### Response:\nHi there\n<|EOT|>\n### Instruction:\nWho are you\n### Response:\n I am an assistant \n<|EOT|>\n### Instruction:\nAnother question\n" + "<|begin▁of▁sentence|>You are a helpful assistant### Instruction:\nHello\n### Response:\nHi there\n<|EOT|>\n### Instruction:\nWho are you\n### Response:\n I am an assistant \n<|EOT|>\n### Instruction:\nAnother question\n### Response:\n", }; std::vector formatted_chat(1024); int32_t res; From 1eebfc9f0fd79fe4a707198415a602e93d7545b0 Mon Sep 17 00:00:00 2001 From: Kai Zau Date: Mon, 1 Apr 2024 19:34:31 +0800 Subject: [PATCH 12/14] Separate deepseek bos from system message --- llama.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 28dae85c4b418..13dafda4aee63 100644 --- a/llama.cpp +++ b/llama.cpp @@ -15738,8 +15738,11 @@ static int32_t llama_chat_apply_template_internal( // Use of both U+ff5c and U+007c pipes is deliberate, based on the Jinja template for (auto message : chat) { std::string role(message->role); + if (message == chat.front()) { + ss << "<|begin▁of▁sentence|>"; + } if (role == "system") { - ss << "<|begin▁of▁sentence|>" << message->content; + ss << message->content; } else if (role == "user") { ss << "### Instruction:\n" << message->content << "\n"; } else if (role == "assistant") { From cfcbc7adf3fb7b06a9ed9a33f13176c1710913b7 Mon Sep 17 00:00:00 2001 From: Kai Zau Date: Mon, 1 Apr 2024 19:35:20 +0800 Subject: [PATCH 13/14] Match openchat template with jinja output --- llama.cpp | 11 ++++------- tests/test-chat-template.cpp | 2 +- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/llama.cpp b/llama.cpp index 13dafda4aee63..07af4644419d5 100644 --- a/llama.cpp +++ b/llama.cpp @@ -15701,14 +15701,11 @@ static int32_t llama_chat_apply_template_internal( // openchat/openchat-3.5-0106, for (auto message : chat) { std::string role(message->role); - if (role == "user") { - ss << "GPT4 Correct User: "; - } else if (role == "assistant") { - ss << "GPT4 Correct Assistant: "; + if (message == chat.front()) { + ss << ""; } - // Not documented, but apparently the system message is prepended without prefix: - // https://huggingface.co/openchat/openchat_3.5/discussions/5#65448109b4a3f3a2f486fd9d - ss << message->content << "<|end_of_turn|>"; + role[0] = toupper(role[0]); + ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>"; } if (add_ass) { ss << "GPT4 Correct Assistant:"; diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index 440ffe8aa2807..f25d3b89171ea 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -60,7 +60,7 @@ int main(void) { // OrionStarAI/Orion-14B-Chat "Human: You are a helpful assistant\n\nHello\n\nAssistant: Hi thereHuman: Who are you\n\nAssistant: I am an assistant Human: Another question\n\nAssistant: ", // openchat/openchat-3.5-0106 - "You are a helpful assistant<|end_of_turn|>GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi there<|end_of_turn|>GPT4 Correct User: Who are you<|end_of_turn|>GPT4 Correct Assistant: I am an assistant <|end_of_turn|>GPT4 Correct User: Another question<|end_of_turn|>GPT4 Correct Assistant:", + "GPT4 Correct System: You are a helpful assistant<|end_of_turn|>GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi there<|end_of_turn|>GPT4 Correct User: Who are you<|end_of_turn|>GPT4 Correct Assistant: I am an assistant <|end_of_turn|>GPT4 Correct User: Another question<|end_of_turn|>GPT4 Correct Assistant:", // eachadea/vicuna-13b-1.1 "You are a helpful assistant\n\nUSER: Hello\nASSISTANT: Hi there\nUSER: Who are you\nASSISTANT: I am an assistant \nUSER: Another question\nASSISTANT:", // Orca-Vicuna From 48850cff191f05592d52125addaa8b2c6c27aae6 Mon Sep 17 00:00:00 2001 From: Kai Zau Date: Wed, 3 Apr 2024 21:45:48 +0800 Subject: [PATCH 14/14] Remove BOS token from templates, unprefix openchat --- llama.cpp | 13 +++++-------- tests/test-chat-template.cpp | 20 +++++++++++--------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/llama.cpp b/llama.cpp index 07af4644419d5..19235d2810407 100644 --- a/llama.cpp +++ b/llama.cpp @@ -15701,11 +15701,12 @@ static int32_t llama_chat_apply_template_internal( // openchat/openchat-3.5-0106, for (auto message : chat) { std::string role(message->role); - if (message == chat.front()) { - ss << ""; + if (role == "system") { + ss << message->content << "<|end_of_turn|>"; + } else { + role[0] = toupper(role[0]); + ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>"; } - role[0] = toupper(role[0]); - ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>"; } if (add_ass) { ss << "GPT4 Correct Assistant:"; @@ -15732,12 +15733,8 @@ static int32_t llama_chat_apply_template_internal( } } else if (tmpl == "deepseek" || (tmpl.find("### Instruction:") != std::string::npos && tmpl.find("<|EOT|>") != std::string::npos)) { // deepseek-ai/deepseek-coder-33b-instruct - // Use of both U+ff5c and U+007c pipes is deliberate, based on the Jinja template for (auto message : chat) { std::string role(message->role); - if (message == chat.front()) { - ss << "<|begin▁of▁sentence|>"; - } if (role == "system") { ss << message->content; } else if (role == "user") { diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index f25d3b89171ea..73c3536fdb878 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -33,16 +33,18 @@ int main(void) { "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\\n' + message['content'] | trim + '\\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\\n'}}{% endif %}", // OrionStarAI/Orion-14B-Chat "{% for message in messages %}{% if loop.first %}{{ bos_token }}{% endif %}{% if message['role'] == 'user' %}{{ 'Human: ' + message['content'] + '\\n\\nAssistant: ' + eos_token }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}", - // openchat/openchat-3.5-0106, + // openchat/openchat-3.5-0106 + // The included chat_template differs from the author's suggestions here: https://huggingface.co/openchat/openchat_3.5/discussions/5#65448109b4a3f3a2f486fd9d + // So we match against the included template but implement the suggested version. "{{ bos_token }}{% for message in messages %}{{ 'GPT4 Correct ' + message['role'].title() + ': ' + message['content'] + '<|end_of_turn|>'}}{% endfor %}{% if add_generation_prompt %}{{ 'GPT4 Correct Assistant:' }}{% endif %}", - // eachadea/vicuna-13b-1.1, - // No template included in config_tokenizer.json; extracted from https://github.com/oobabooga/text-generation-webui/blob/main/instruction-templates/Vicuna-v1.1.yaml + // deepseek-ai/deepseek-coder-33b-instruct + "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n {%- else %}\n {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}", + // eachadea/vicuna-13b-1.1 + // No template included in tokenizer_config.json, so this template likely needs to be manually set. "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{- '' + message['content'] + '\n\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}", // Orca-Vicuna - // No template included in config_tokenizer.json; extracted from: https://github.com/oobabooga/text-generation-webui/blob/main/instruction-templates/Orca-Vicuna.yaml + // No template included in tokenizer_config.json, so this template likely needs to be manually set. "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{-'SYSTEM: ' + message['content'] + '\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}", - // deepseek-ai/deepseek-coder-33b-instruct - "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n {%- else %}\n {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}", }; std::vector expected_output = { // teknium/OpenHermes-2.5-Mistral-7B @@ -60,13 +62,13 @@ int main(void) { // OrionStarAI/Orion-14B-Chat "Human: You are a helpful assistant\n\nHello\n\nAssistant: Hi thereHuman: Who are you\n\nAssistant: I am an assistant Human: Another question\n\nAssistant: ", // openchat/openchat-3.5-0106 - "GPT4 Correct System: You are a helpful assistant<|end_of_turn|>GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi there<|end_of_turn|>GPT4 Correct User: Who are you<|end_of_turn|>GPT4 Correct Assistant: I am an assistant <|end_of_turn|>GPT4 Correct User: Another question<|end_of_turn|>GPT4 Correct Assistant:", + "You are a helpful assistant<|end_of_turn|>GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi there<|end_of_turn|>GPT4 Correct User: Who are you<|end_of_turn|>GPT4 Correct Assistant: I am an assistant <|end_of_turn|>GPT4 Correct User: Another question<|end_of_turn|>GPT4 Correct Assistant:", + // deepseek-ai/deepseek-coder-33b-instruct + "You are a helpful assistant### Instruction:\nHello\n### Response:\nHi there\n<|EOT|>\n### Instruction:\nWho are you\n### Response:\n I am an assistant \n<|EOT|>\n### Instruction:\nAnother question\n### Response:\n", // eachadea/vicuna-13b-1.1 "You are a helpful assistant\n\nUSER: Hello\nASSISTANT: Hi there\nUSER: Who are you\nASSISTANT: I am an assistant \nUSER: Another question\nASSISTANT:", // Orca-Vicuna "SYSTEM: You are a helpful assistant\nUSER: Hello\nASSISTANT: Hi there\nUSER: Who are you\nASSISTANT: I am an assistant \nUSER: Another question\nASSISTANT:", - // deepseek-ai/deepseek-coder-33b-instruct - "<|begin▁of▁sentence|>You are a helpful assistant### Instruction:\nHello\n### Response:\nHi there\n<|EOT|>\n### Instruction:\nWho are you\n### Response:\n I am an assistant \n<|EOT|>\n### Instruction:\nAnother question\n### Response:\n", }; std::vector formatted_chat(1024); int32_t res;