From a4618f731ce003c5fd431903386c18d6ab3d698c Mon Sep 17 00:00:00 2001
From: Max de Bayser <mbayser@br.ibm.com>
Date: Thu, 10 Apr 2025 16:37:52 -0300
Subject: [PATCH 1/4] Add chat template for Llama 4 models

The main differences are that some token have changed,
e.g. start_header_id was changed to header_start.

Now the models also support multiple tool calls
although one of our parallel tool calls is still
failing.

The Meta chat template also removed the sentence
about the knowledge cutoff date and the current
date.

Signed-off-by: Max de Bayser <mbayser@br.ibm.com>
---
 docs/source/features/tool_calling.md          |   3 +
 examples/tool_chat_template_llama4_json.jinja | 118 ++++++++++++++++++
 tests/tool_use/utils.py                       |  14 +++
 3 files changed, 135 insertions(+)
 create mode 100644 examples/tool_chat_template_llama4_json.jinja

diff --git a/docs/source/features/tool_calling.md b/docs/source/features/tool_calling.md
index 17cee6da471c..e5f23ad8ce08 100644
--- a/docs/source/features/tool_calling.md
+++ b/docs/source/features/tool_calling.md
@@ -156,6 +156,8 @@ Supported models:
 * `meta-llama/Meta-Llama-3.1-70B-Instruct`
 * `meta-llama/Meta-Llama-3.1-405B-Instruct`
 * `meta-llama/Meta-Llama-3.1-405B-Instruct-FP8`
+* `meta-llama/Llama-3.2-3B-Instruct`
+* `meta-llama/Llama-4-Scout-17B-16E-Instruct`
 
 The tool calling that is supported is the [JSON based tool calling](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1/#json-based-tool-calling). For [pythonic tool calling](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/text_prompt_format.md#zero-shot-function-calling) in Llama-3.2 models, see the `pythonic` tool parser below.
 Other tool calling formats like the built in python tool calling or custom tool calling are not supported.
@@ -170,6 +172,7 @@ The `tool_chat_template_llama3_json.jinja` file contains the "official" Llama ch
 it works better with vLLM.
 
 Recommended flags: `--tool-call-parser llama3_json --chat-template examples/tool_chat_template_llama3_json.jinja`
+For Llama 4 use `examples/tool_chat_template_llama4_json.jinja`.
 
 #### IBM Granite
 
diff --git a/examples/tool_chat_template_llama4_json.jinja b/examples/tool_chat_template_llama4_json.jinja
new file mode 100644
index 000000000000..264745434ffb
--- /dev/null
+++ b/examples/tool_chat_template_llama4_json.jinja
@@ -0,0 +1,118 @@
+{{- bos_token }}
+{%- if custom_tools is defined %}
+    {%- set tools = custom_tools %}
+{%- endif %}
+{%- if not tools_in_user_message is defined %}
+    {%- set tools_in_user_message = true %}
+{%- endif %}
+{%- if not date_string is defined %}
+    {%- if strftime_now is defined %}
+        {%- set date_string = strftime_now("%d %b %Y") %}
+    {%- else %}
+        {%- set date_string = "26 Jul 2024" %}
+    {%- endif %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+
+{#- This block extracts the system message, so we can slot it into the right place. #}
+{%- if messages[0]['role'] == 'system' %}
+    {%- if messages[0]['content'] is string %}
+        {%- set system_message = messages[0]['content']|trim %}
+    {%- else %}
+        {#- FIXME: The processor requires an array, always. #}
+        {%- set system_message = messages[0]['content'][0]['text']|trim %}
+    {%- endif %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- set system_message = "" %}
+{%- endif %}
+
+{{- "<|header_start|>system<|header_end|>\n\n" }}
+{%- if tools is not none %}
+    {{- "Environment: ipython\n" }}
+{%- endif %}
+{%- if tools is not none and not tools_in_user_message %}
+    {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+{%- endif %}
+{{- system_message }}
+{{- "<|eot|>" }}
+
+{#- Custom tools are passed in a user message with some extra guidance #}
+{%- if tools_in_user_message and not tools is none %}
+    {#- Extract the first user message so we can plug it in here #}
+    {%- if messages | length != 0 %}
+        {%- set first_user_message = messages[0]['content']|trim %}
+        {%- set messages = messages[1:] %}
+    {%- else %}
+        {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
+{%- endif %}
+    {{- '<|header_start|>user<|header_end|>\n\n' -}}
+    {{- "Given the following functions, please respond with a JSON for a function call " }}
+    {{- "with its proper arguments that best answers the given prompt.\n\n" }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+    {{- first_user_message + "<|eot|>"}}
+{%- endif %}
+
+{%- for message in messages %}
+    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
+        {{- '<|header_start|>' + message['role'] + '<|header_end|>\n\n' }}
+        {%- if message['content'] is string %}
+            {{- message['content'] }}
+        {%- else %}
+            {%- for content in message['content'] %}
+                {%- if content['type'] == 'image' %}
+                    {{- '<|image|>' }}
+                {%- elif content['type'] == 'text' %}
+                    {{- content['text'] }}
+                {%- endif %}
+            {%- endfor %}
+        {%- endif %}
+        {{- "<|eot|>" }}
+    {%- elif 'tool_calls' in message and message.tool_calls|length > 0 %}
+       {{- '<|header_start|>assistant<|header_end|>\n\n' -}}
+       {{- '<|python_start|>' }}
+        {%- if message['content'] is string %}
+            {{- message['content'] }}
+        {%- else %}
+            {%- for content in message['content'] %}
+                {%- if content['type'] == 'image' %}
+                    {{- '<|image|>' }}
+                {%- elif content['type'] == 'text' %}
+                    {{- content['text'] }}
+                {%- endif %}
+            {%- endfor %}
+        {%- endif %}
+       {{- '<|python_end|>' }}
+        {%- for tool_call in message.tool_calls %}
+           {{- '{"name": "' + tool_call.function.name + '", ' }}
+           {{- '"parameters": ' }}
+           {{- tool_call.function.arguments | tojson }}
+           {{- "}" }}
+        {%- endfor %}
+       {{- "<|eot|>" }}
+    {%- elif message.role == "tool" or message.role == "ipython" %}
+        {{- "<|header_start|>ipython<|header_end|>\n\n" }}
+        {%- if message.content is mapping or message.content is iterable %}
+            {{- message.content | tojson }}
+        {%- else %}
+            {{- message.content }}
+        {%- endif %}
+        {{- "<|eot|>" }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|header_start|>assistant<|header_end|>\n\n' }}
+{%- endif %}
diff --git a/tests/tool_use/utils.py b/tests/tool_use/utils.py
index 231e4aad8c33..6b8e8cb6d6b0 100644
--- a/tests/tool_use/utils.py
+++ b/tests/tool_use/utils.py
@@ -82,6 +82,20 @@ def ensure_system_prompt(messages: list[dict[str, Any]],
         "supports_parallel":
         False,
     },
+    # TODO: this is a configuration that works in 4 A100s
+    # but enabling it would probably break CI
+    # "llama4": {
+    #     "model":
+    #     "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    #     "arguments": [
+    #         "--enforce-eager", "--no-enable-prefix-caching",
+    #         "-tp", "4", "--distributed-executor-backend", "mp",
+    #         "--tool-call-parser", "llama3_json", "--chat-template",
+    #         str(VLLM_PATH / "examples/tool_chat_template_llama4_json.jinja")
+    #     ],
+    #     "supports_parallel":
+    #     False, # <--- one of the parallel tests actually passes
+    # },
     "mistral": {
         "model":
         "mistralai/Mistral-7B-Instruct-v0.3",

From 1c5dd8c695f326810a0d29e560ff616dec3d375e Mon Sep 17 00:00:00 2001
From: Max de Bayser <mbayser@br.ibm.com>
Date: Thu, 10 Apr 2025 17:42:29 -0300
Subject: [PATCH 2/4] Add llama4_json as alias to the llama tool call parser

Signed-off-by: Max de Bayser <mbayser@br.ibm.com>
---
 docs/source/features/tool_calling.md                      | 2 +-
 vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/source/features/tool_calling.md b/docs/source/features/tool_calling.md
index e5f23ad8ce08..aa28bafa7d04 100644
--- a/docs/source/features/tool_calling.md
+++ b/docs/source/features/tool_calling.md
@@ -172,7 +172,7 @@ The `tool_chat_template_llama3_json.jinja` file contains the "official" Llama ch
 it works better with vLLM.
 
 Recommended flags: `--tool-call-parser llama3_json --chat-template examples/tool_chat_template_llama3_json.jinja`
-For Llama 4 use `examples/tool_chat_template_llama4_json.jinja`.
+For Llama 4 use `--tool-call-parser llama4_json examples/tool_chat_template_llama4_json.jinja`.
 
 #### IBM Granite
 
diff --git a/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py
index 20c3238fb3df..5c181616aa01 100644
--- a/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py
@@ -27,6 +27,7 @@
 
 
 @ToolParserManager.register_module("llama3_json")
+@ToolParserManager.register_module("llama4_json")
 class Llama3JsonToolParser(ToolParser):
     """
     Tool call parser for Llama 3.1 models intended for use with the

From fcf0e4738da76bd4e388591bc49e525cc9ea7b60 Mon Sep 17 00:00:00 2001
From: Max de Bayser <mbayser@br.ibm.com>
Date: Tue, 15 Apr 2025 11:37:12 -0300
Subject: [PATCH 3/4] Refactor llama 4 json template to support parallel tools

Signed-off-by: Max de Bayser <mbayser@br.ibm.com>
---
 examples/tool_chat_template_llama4_json.jinja | 128 +++++++++---------
 tests/tool_use/utils.py                       |  28 ++--
 2 files changed, 77 insertions(+), 79 deletions(-)

diff --git a/examples/tool_chat_template_llama4_json.jinja b/examples/tool_chat_template_llama4_json.jinja
index 264745434ffb..759f16554436 100644
--- a/examples/tool_chat_template_llama4_json.jinja
+++ b/examples/tool_chat_template_llama4_json.jinja
@@ -1,3 +1,34 @@
+{%- macro is_array_of_type_objects(var) -%}
+    {%- if var is iterable and var is not string -%}
+        {%- set valid = true -%}
+        {%- for item in var -%}
+            {%- if 'type' not in item -%}
+                {%- set valid = false -%}
+                {%- break -%}
+            {%- endif -%}
+        {%- endfor -%}
+        {{ valid }}
+    {%- else -%}
+        {{ false }}
+    {%- endif -%}
+{%- endmacro %}
+
+{%- macro render_message(message) %}
+    {%- if message['content'] is string %}
+        {{- message['content']|trim }}
+    {%- elif is_array_of_type_objects(data) == 'True' %}
+        {%- for content in message['content'] %}
+            {%- if content['type'] == 'image' %}
+                {{- '<|image|>' }}
+            {%- elif content['type'] == 'text' %}
+                {{- content['text']|trim }}
+            {%- endif %}
+        {%- endfor %}
+    {%- else %}
+        {{- message['content']|tojson }}
+    {%- endif %}
+{%- endmacro %}
+
 {{- bos_token }}
 {%- if custom_tools is defined %}
     {%- set tools = custom_tools %}
@@ -5,114 +36,81 @@
 {%- if not tools_in_user_message is defined %}
     {%- set tools_in_user_message = true %}
 {%- endif %}
-{%- if not date_string is defined %}
-    {%- if strftime_now is defined %}
-        {%- set date_string = strftime_now("%d %b %Y") %}
-    {%- else %}
-        {%- set date_string = "26 Jul 2024" %}
-    {%- endif %}
-{%- endif %}
 {%- if not tools is defined %}
     {%- set tools = none %}
 {%- endif %}
 
 {#- This block extracts the system message, so we can slot it into the right place. #}
 {%- if messages[0]['role'] == 'system' %}
-    {%- if messages[0]['content'] is string %}
-        {%- set system_message = messages[0]['content']|trim %}
-    {%- else %}
-        {#- FIXME: The processor requires an array, always. #}
-        {%- set system_message = messages[0]['content'][0]['text']|trim %}
-    {%- endif %}
+    {%- set system_message = messages[0] %}
     {%- set messages = messages[1:] %}
 {%- else %}
-    {%- set system_message = "" %}
+    {%- set system_message = ({ "content": "You are a helpful assistant with tool calling "
+        "capabilities. Only reply with a tool call if the function exists in the "
+        "library provided by the user. If it doesn't exist, just reply directly in "
+        "natural language. When you receive a tool call response, use the output to "
+        "format an answer to the original user question."}) %}
 {%- endif %}
 
+{%- set tool_lib_preamble = 'Tools: You have access to the following tools. You might need to use one '
+    'or more function/tool calls to fulfill the task. \n'
+    'If none are needed, then proceed to the response.\n\n'
+    'Tool Call Syntax: You can call tools using the following syntax:\n'
+    '{"name": function name, "parameters": dictionary of argument name and its value}.\n'
+    'Separate multiple function calls by "; ". Do not use variables.\n'
+    'Do not include anything else when calling the tools with the syntax above.\n\n'
+    'Here is a list of functions in JSON format that you can invoke.\n' %}
+
 {{- "<|header_start|>system<|header_end|>\n\n" }}
-{%- if tools is not none %}
-    {{- "Environment: ipython\n" }}
-{%- endif %}
 {%- if tools is not none and not tools_in_user_message %}
-    {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
-    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
-    {{- "Do not use variables.\n\n" }}
+    {{- tool_lib_preamble }}
     {%- for t in tools %}
         {{- t | tojson(indent=4) }}
         {{- "\n\n" }}
     {%- endfor %}
 {%- endif %}
-{{- system_message }}
-{{- "<|eot|>" }}
+{{- render_message(system_message) }}
+{{ "<|eot|>\n" }}
 
 {#- Custom tools are passed in a user message with some extra guidance #}
 {%- if tools_in_user_message and not tools is none %}
     {#- Extract the first user message so we can plug it in here #}
     {%- if messages | length != 0 %}
-        {%- set first_user_message = messages[0]['content']|trim %}
+        {%- set first_user_message = messages[0] %}
         {%- set messages = messages[1:] %}
     {%- else %}
         {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
-{%- endif %}
-    {{- '<|header_start|>user<|header_end|>\n\n' -}}
-    {{- "Given the following functions, please respond with a JSON for a function call " }}
-    {{- "with its proper arguments that best answers the given prompt.\n\n" }}
-    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
-    {{- "Do not use variables.\n\n" }}
+    {%- endif %}
+    {{- '<|header_start|>user<|header_end|>\n\n' }}
+    {{- tool_lib_preamble }}
     {%- for t in tools %}
         {{- t | tojson(indent=4) }}
         {{- "\n\n" }}
     {%- endfor %}
-    {{- first_user_message + "<|eot|>"}}
+    {{- render_message(first_user_message) + "\n<|eot|>"}}
 {%- endif %}
 
 {%- for message in messages %}
     {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
         {{- '<|header_start|>' + message['role'] + '<|header_end|>\n\n' }}
-        {%- if message['content'] is string %}
-            {{- message['content'] }}
-        {%- else %}
-            {%- for content in message['content'] %}
-                {%- if content['type'] == 'image' %}
-                    {{- '<|image|>' }}
-                {%- elif content['type'] == 'text' %}
-                    {{- content['text'] }}
-                {%- endif %}
-            {%- endfor %}
-        {%- endif %}
-        {{- "<|eot|>" }}
+        {{- render_message(message) }}
+        {{- "\n<|eot|>" }}
     {%- elif 'tool_calls' in message and message.tool_calls|length > 0 %}
-       {{- '<|header_start|>assistant<|header_end|>\n\n' -}}
-       {{- '<|python_start|>' }}
-        {%- if message['content'] is string %}
-            {{- message['content'] }}
-        {%- else %}
-            {%- for content in message['content'] %}
-                {%- if content['type'] == 'image' %}
-                    {{- '<|image|>' }}
-                {%- elif content['type'] == 'text' %}
-                    {{- content['text'] }}
-                {%- endif %}
-            {%- endfor %}
-        {%- endif %}
-       {{- '<|python_end|>' }}
+        {{- '\n<|header_start|>assistant<|header_end|>\n\n' -}}
+        {{- render_message(message) }}
         {%- for tool_call in message.tool_calls %}
            {{- '{"name": "' + tool_call.function.name + '", ' }}
            {{- '"parameters": ' }}
            {{- tool_call.function.arguments | tojson }}
            {{- "}" }}
         {%- endfor %}
-       {{- "<|eot|>" }}
+       {{- "\n<|eot|>" }}
     {%- elif message.role == "tool" or message.role == "ipython" %}
-        {{- "<|header_start|>ipython<|header_end|>\n\n" }}
-        {%- if message.content is mapping or message.content is iterable %}
-            {{- message.content | tojson }}
-        {%- else %}
-            {{- message.content }}
-        {%- endif %}
-        {{- "<|eot|>" }}
+        {{- "\n<|header_start|>ipython<|header_end|>\n\n" }}
+        {{- render_message(message) }}
+        {{- "\n<|eom|>" }}
     {%- endif %}
 {%- endfor %}
 {%- if add_generation_prompt %}
-    {{- '<|header_start|>assistant<|header_end|>\n\n' }}
+    {{- '\n<|header_start|>assistant<|header_end|>\n\n' }}
 {%- endif %}
diff --git a/tests/tool_use/utils.py b/tests/tool_use/utils.py
index fcefa18d9834..c14eaf71e978 100644
--- a/tests/tool_use/utils.py
+++ b/tests/tool_use/utils.py
@@ -98,20 +98,20 @@ def ensure_system_prompt(messages: list[dict[str, Any]],
         "extended":
         True
     },
-    # TODO: this is a configuration that works in 4 A100s
-    # but enabling it would probably break CI
-    # "llama4": {
-    #     "model":
-    #     "meta-llama/Llama-4-Scout-17B-16E-Instruct",
-    #     "arguments": [
-    #         "--enforce-eager", "--no-enable-prefix-caching",
-    #         "-tp", "4", "--distributed-executor-backend", "mp",
-    #         "--tool-call-parser", "llama3_json", "--chat-template",
-    #         str(VLLM_PATH / "examples/tool_chat_template_llama4_json.jinja")
-    #     ],
-    #     "supports_parallel":
-    #     False, # <--- one of the parallel tests actually passes
-    # },
+    "llama4_json": {
+        "model":
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "arguments": [
+            "--enforce-eager", "--no-enable-prefix-caching", "-tp", "4",
+            "--distributed-executor-backend", "mp", "--tool-call-parser",
+            "llama4_json", "--chat-template",
+            str(VLLM_PATH / "examples/tool_chat_template_llama4_json.jinja")
+        ],
+        "supports_parallel":
+        True,
+        "extended":
+        True
+    },
     "mistral": {
         "model":
         "mistralai/Mistral-7B-Instruct-v0.3",

From 200079144bff82149cd18514a4427914b488e6ed Mon Sep 17 00:00:00 2001
From: Max de Bayser <mbayser@br.ibm.com>
Date: Tue, 22 Apr 2025 15:43:03 -0300
Subject: [PATCH 4/4] trigger ci

Signed-off-by: Max de Bayser <mbayser@br.ibm.com>