From dabf01973acdc9b0f28a5366c5693f3ffff9942d Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 18 Jun 2024 13:54:42 +0100 Subject: [PATCH] Make "tool_use" the default chat template key when tools are passed (#31429) * Make "tool_use" the default when tools are passed * Add some opinionated text to the docs * Add some opinionated text to the docs --- docs/source/en/chat_templating.md | 18 ++++++++++++++++++ src/transformers/tokenization_utils_base.py | 20 ++++++++++++-------- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/docs/source/en/chat_templating.md b/docs/source/en/chat_templating.md index 8d49fa5c80ee28..0de70266de68ec 100644 --- a/docs/source/en/chat_templating.md +++ b/docs/source/en/chat_templating.md @@ -677,6 +677,24 @@ template. This will ensure that text generation tools can correctly figure out w +### Why do some models have multiple templates? + +Some models use different templates for different use cases. For example, they might use one template for normal chat +and another for tool-use, or retrieval-augmented generation. In these cases, `tokenizer.chat_template` is a dictionary. +This can cause some confusion, and where possible, we recommend using a single template for all use-cases. You can use +Jinja statements like `if tools is defined` and `{% macro %}` definitions to easily wrap multiple code paths in a +single template. + +When a tokenizer has multiple templates, `tokenizer.chat_template` will be a `dict`, where each key is the name +of a template. The `apply_chat_template` method has special handling for certain template names: Specifically, it will +look for a template named `default` in most cases, and will raise an error if it can't find one. However, if a template +named `tool_use` exists when the user has passed a `tools` argument, it will use that instead. To access templates +with other names, pass the name of the template you want to the `chat_template` argument of +`apply_chat_template()`. + +We find that this can be a bit confusing for users, though - so if you're writing a template yourself, we recommend +trying to put it all in a single template where possible! + ### What are "default" templates? Before the introduction of chat templates, chat handling was hardcoded at the model class level. For backwards diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index ab965c5279b44f..3563db5e489cdb 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -1781,16 +1781,20 @@ def apply_chat_template( chat_template = template_dict[chat_template] if using_default_dict: using_default_template = True - elif chat_template is None and "default" in template_dict: - chat_template = template_dict["default"] + elif chat_template is None: + if tools is not None and "tool_use" in template_dict: + chat_template = template_dict["tool_use"] + elif "default" in template_dict: + chat_template = template_dict["default"] + else: + raise ValueError( + "This model has multiple chat templates with no default specified! Please either pass a chat " + "template or the name of the template you wish to use to the `chat_template` argument. Available " + f"template names are {sorted(template_dict.keys())}." + ) if using_default_dict: using_default_template = True - elif chat_template is None: - raise ValueError( - "This model has multiple chat templates with no default specified! Please either pass a chat " - "template or the name of the template you wish to use to the `chat_template` argument. Available " - f"template names are {sorted(template_dict.keys())}." - ) + elif chat_template is None: # These are the cases when the model has a single template # priority: `chat_template` argument > `tokenizer.chat_template` > `tokenizer.default_chat_template