diff --git a/docs/source/features/tool_calling.md b/docs/source/features/tool_calling.md index 57888e122969..f98ec6108cea 100644 --- a/docs/source/features/tool_calling.md +++ b/docs/source/features/tool_calling.md @@ -152,10 +152,11 @@ Recommended flags: `--tool-call-parser mistral --chat-template examples/tool_cha Supported models: -All Llama 3.1 and 3.2 models should be supported. +All Llama 3.1, 3.2 and 4 models should be supported. * `meta-llama/Llama-3.1-*` * `meta-llama/Llama-3.2-*` +* `meta-llama/Llama-4-*` The tool calling that is supported is the [JSON based tool calling](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1/#json-based-tool-calling). For [pythonic tool calling](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/text_prompt_format.md#zero-shot-function-calling) introduced by the Llama-3.2 models, see the `pythonic` tool parser below. @@ -176,6 +177,12 @@ images. Recommended flags: `--tool-call-parser llama3_json --chat-template {see_above}` +VLLM also provides a JSON based chat template for Llama 4: +* `examples/tool_chat_template_llama4_json.jinja` - this is based on the "official" chat template for the Llama 4 +models, but tweaked so that it works better with vLLM. + +For Llama 4 use `--tool-call-parser llama4_json examples/tool_chat_template_llama4_json.jinja`. + #### IBM Granite Supported models: diff --git a/examples/tool_chat_template_llama4_json.jinja b/examples/tool_chat_template_llama4_json.jinja new file mode 100644 index 000000000000..759f16554436 --- /dev/null +++ b/examples/tool_chat_template_llama4_json.jinja @@ -0,0 +1,116 @@ +{%- macro is_array_of_type_objects(var) -%} + {%- if var is iterable and var is not string -%} + {%- set valid = true -%} + {%- for item in var -%} + {%- if 'type' not in item -%} + {%- set valid = false -%} + {%- break -%} + {%- endif -%} + {%- endfor -%} + {{ valid }} + {%- else -%} + {{ false }} + {%- endif -%} +{%- endmacro %} + +{%- macro render_message(message) %} + {%- if message['content'] is string %} + {{- message['content']|trim }} + {%- elif is_array_of_type_objects(data) == 'True' %} + {%- for content in message['content'] %} + {%- if content['type'] == 'image' %} + {{- '<|image|>' }} + {%- elif content['type'] == 'text' %} + {{- content['text']|trim }} + {%- endif %} + {%- endfor %} + {%- else %} + {{- message['content']|tojson }} + {%- endif %} +{%- endmacro %} + +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0] %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = ({ "content": "You are a helpful assistant with tool calling " + "capabilities. Only reply with a tool call if the function exists in the " + "library provided by the user. If it doesn't exist, just reply directly in " + "natural language. When you receive a tool call response, use the output to " + "format an answer to the original user question."}) %} +{%- endif %} + +{%- set tool_lib_preamble = 'Tools: You have access to the following tools. You might need to use one ' + 'or more function/tool calls to fulfill the task. \n' + 'If none are needed, then proceed to the response.\n\n' + 'Tool Call Syntax: You can call tools using the following syntax:\n' + '{"name": function name, "parameters": dictionary of argument name and its value}.\n' + 'Separate multiple function calls by "; ". Do not use variables.\n' + 'Do not include anything else when calling the tools with the syntax above.\n\n' + 'Here is a list of functions in JSON format that you can invoke.\n' %} + +{{- "<|header_start|>system<|header_end|>\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- tool_lib_preamble }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- render_message(system_message) }} +{{ "<|eot|>\n" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0] %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} + {%- endif %} + {{- '<|header_start|>user<|header_end|>\n\n' }} + {{- tool_lib_preamble }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- render_message(first_user_message) + "\n<|eot|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|header_start|>' + message['role'] + '<|header_end|>\n\n' }} + {{- render_message(message) }} + {{- "\n<|eot|>" }} + {%- elif 'tool_calls' in message and message.tool_calls|length > 0 %} + {{- '\n<|header_start|>assistant<|header_end|>\n\n' -}} + {{- render_message(message) }} + {%- for tool_call in message.tool_calls %} + {{- '{"name": "' + tool_call.function.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.function.arguments | tojson }} + {{- "}" }} + {%- endfor %} + {{- "\n<|eot|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "\n<|header_start|>ipython<|header_end|>\n\n" }} + {{- render_message(message) }} + {{- "\n<|eom|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '\n<|header_start|>assistant<|header_end|>\n\n' }} +{%- endif %} diff --git a/tests/tool_use/utils.py b/tests/tool_use/utils.py index 7c87c73f04da..c14eaf71e978 100644 --- a/tests/tool_use/utils.py +++ b/tests/tool_use/utils.py @@ -98,6 +98,20 @@ def ensure_system_prompt(messages: list[dict[str, Any]], "extended": True }, + "llama4_json": { + "model": + "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "arguments": [ + "--enforce-eager", "--no-enable-prefix-caching", "-tp", "4", + "--distributed-executor-backend", "mp", "--tool-call-parser", + "llama4_json", "--chat-template", + str(VLLM_PATH / "examples/tool_chat_template_llama4_json.jinja") + ], + "supports_parallel": + True, + "extended": + True + }, "mistral": { "model": "mistralai/Mistral-7B-Instruct-v0.3", diff --git a/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py index 20c3238fb3df..5c181616aa01 100644 --- a/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py @@ -27,6 +27,7 @@ @ToolParserManager.register_module("llama3_json") +@ToolParserManager.register_module("llama4_json") class Llama3JsonToolParser(ToolParser): """ Tool call parser for Llama 3.1 models intended for use with the