Skip to content

Commit d6cd59f

Browse files
authored
[Frontend] Support tool calling and reasoning parser (#14511)
Signed-off-by: WangErXiao <863579016@qq.com>
1 parent bc8ed3c commit d6cd59f

File tree

8 files changed

+555
-63
lines changed

8 files changed

+555
-63
lines changed

.buildkite/test-pipeline.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ steps:
118118
- pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
119119
- pytest -v -s entrypoints/llm/test_generate_multiple_loras.py # it needs a clean process
120120
- VLLM_USE_V1=0 pytest -v -s entrypoints/llm/test_guided_generate.py # it needs a clean process
121-
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/correctness/
121+
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/correctness/
122122
- pytest -v -s entrypoints/test_chat_utils.py
123123
- VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
124124

docs/source/features/reasoning_outputs.md

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ Reasoning models return a additional `reasoning_content` field in their outputs,
1010

1111
vLLM currently supports the following reasoning models:
1212

13-
| Model Series | Parser Name | Structured Output Support |
14-
|--------------|-------------|------------------|
15-
| [DeepSeek R1 series](https://huggingface.co/collections/deepseek-ai/deepseek-r1-678e1e131c0169c0bc89728d) | `deepseek_r1` | `guided_json`, `guided_regex` |
16-
| [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B) | `deepseek_r1` | `guided_json`, `guided_regex` |
13+
| Model Series | Parser Name | Structured Output Support | Tool Calling |
14+
|--------------|-------------|------------------|-------------|
15+
| [DeepSeek R1 series](https://huggingface.co/collections/deepseek-ai/deepseek-r1-678e1e131c0169c0bc89728d) | `deepseek_r1` | `guided_json`, `guided_regex` ||
16+
| [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B) | `deepseek_r1` | `guided_json`, `guided_regex` ||
1717

1818
## Quickstart
1919

@@ -170,10 +170,51 @@ print("reasoning_content: ", completion.choices[0].message.reasoning_content)
170170
print("content: ", completion.choices[0].message.content)
171171
```
172172

173+
## Tool Calling
174+
175+
The reasoning content is also available when both tool calling and the reasoning parser are enabled. Additionally, tool calling only parses functions from the `content` field, not from the `reasoning_content`.
176+
177+
```python
178+
from openai import OpenAI
179+
180+
client = OpenAI(base_url="http://localhost:8000/v1", api_key="dummy")
181+
182+
tools = [{
183+
"type": "function",
184+
"function": {
185+
"name": "get_weather",
186+
"description": "Get the current weather in a given location",
187+
"parameters": {
188+
"type": "object",
189+
"properties": {
190+
"location": {"type": "string", "description": "City and state, e.g., 'San Francisco, CA'"},
191+
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}
192+
},
193+
"required": ["location", "unit"]
194+
}
195+
}
196+
}]
197+
198+
response = client.chat.completions.create(
199+
model=client.models.list().data[0].id,
200+
messages=[{"role": "user", "content": "What's the weather like in San Francisco?"}],
201+
tools=tools,
202+
tool_choice="auto"
203+
)
204+
205+
print(response)
206+
tool_call = response.choices[0].message.tool_calls[0].function
207+
208+
print(f"reasoning_content: {response.choices[0].message.reasoning_content}")
209+
print(f"Function called: {tool_call.name}")
210+
print(f"Arguments: {tool_call.arguments}")
211+
```
212+
213+
For more examples, please refer to <gh-file:examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py> .
214+
173215
## Limitations
174216

175217
- The reasoning content is only available for online serving's chat completion endpoint (`/v1/chat/completions`).
176-
- It is not compatible with [`tool_calling`](#tool_calling).
177218

178219
## How to support a new reasoning model
179220

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
"""
3+
An example demonstrates how to use tool calling with reasoning models
4+
like QwQ-32B. The reasoning_content will not be parsed by the tool
5+
calling process; only the final output will be parsed.
6+
7+
To run this example, you need to start the vLLM server with both
8+
the reasoning parser and tool calling enabled.
9+
10+
```bash
11+
vllm serve Qwen/QwQ-32B \
12+
--enable-reasoning --reasoning-parser deepseek_r1 \
13+
--enable-auto-tool-choice --tool-call-parser hermes
14+
15+
```
16+
17+
"""
18+
19+
from openai import OpenAI
20+
21+
22+
# Now, simulate a tool call
23+
def get_current_weather(city: str, state: str, unit: 'str'):
24+
return ("The weather in Dallas, Texas is 85 degrees fahrenheit. It is "
25+
"partly cloudly, with highs in the 90's.")
26+
27+
28+
available_tools = {"get_current_weather": get_current_weather}
29+
30+
# Modify OpenAI's API key and API base to use vLLM's API server.
31+
openai_api_key = "EMPTY"
32+
openai_api_base = "http://localhost:8000/v1"
33+
34+
client = OpenAI(
35+
api_key=openai_api_key,
36+
base_url=openai_api_base,
37+
)
38+
39+
models = client.models.list()
40+
model = models.data[0].id
41+
42+
tools = [{
43+
"type": "function",
44+
"function": {
45+
"name": "get_current_weather",
46+
"description": "Get the current weather in a given location",
47+
"parameters": {
48+
"type": "object",
49+
"properties": {
50+
"city": {
51+
"type":
52+
"string",
53+
"description":
54+
"The city to find the weather for, e.g. 'San Francisco'"
55+
},
56+
"state": {
57+
"type":
58+
"string",
59+
"description":
60+
"the two-letter abbreviation for the state that the city is"
61+
" in, e.g. 'CA' which would mean 'California'"
62+
},
63+
"unit": {
64+
"type": "string",
65+
"description": "The unit to fetch the temperature in",
66+
"enum": ["celsius", "fahrenheit"]
67+
}
68+
},
69+
"required": ["city", "state", "unit"]
70+
}
71+
}
72+
}]
73+
messages = [{
74+
"role": "user",
75+
"content": "Hi! How are you doing today?"
76+
}, {
77+
"role": "assistant",
78+
"content": "I'm doing well! How can I help you?"
79+
}, {
80+
"role":
81+
"user",
82+
"content":
83+
"Can you tell me what the temperate will be in Dallas, in fahrenheit?"
84+
}]
85+
86+
87+
def extract_reasoning_and_calls(chunks: list):
88+
reasoning_content = ""
89+
tool_call_idx = -1
90+
arguments = []
91+
function_names = []
92+
for chunk in chunks:
93+
if chunk.choices[0].delta.tool_calls:
94+
tool_call = chunk.choices[0].delta.tool_calls[0]
95+
if tool_call.index != tool_call_idx:
96+
tool_call_idx = chunk.choices[0].delta.tool_calls[0].index
97+
arguments.append("")
98+
function_names.append("")
99+
100+
if tool_call.function:
101+
if tool_call.function.name:
102+
function_names[tool_call_idx] = tool_call.function.name
103+
104+
if tool_call.function.arguments:
105+
arguments[tool_call_idx] += tool_call.function.arguments
106+
else:
107+
if hasattr(chunk.choices[0].delta, "reasoning_content"):
108+
reasoning_content += chunk.choices[0].delta.reasoning_content
109+
return reasoning_content, arguments, function_names
110+
111+
112+
print("---------Full Generate With Automatic Function Calling-------------")
113+
tool_calls = client.chat.completions.create(messages=messages,
114+
model=model,
115+
tools=tools)
116+
print(f"reasoning_content: {tool_calls.choices[0].message.reasoning_content}")
117+
print(f"function name: "
118+
f"{tool_calls.choices[0].message.tool_calls[0].function.name}")
119+
print(f"function arguments: "
120+
f"{tool_calls.choices[0].message.tool_calls[0].function.arguments}")
121+
122+
print("----------Stream Generate With Automatic Function Calling-----------")
123+
tool_calls_stream = client.chat.completions.create(messages=messages,
124+
model=model,
125+
tools=tools,
126+
stream=True)
127+
chunks = []
128+
for chunk in tool_calls_stream:
129+
chunks.append(chunk)
130+
131+
reasoning_content, arguments, function_names = extract_reasoning_and_calls(
132+
chunks)
133+
134+
print(f"reasoning_content: {reasoning_content}")
135+
print(f"function name: {function_names[0]}")
136+
print(f"function arguments: {arguments[0]}")
137+
138+
print("----------Full Generate With Named Function Calling-----------------")
139+
tool_calls = client.chat.completions.create(messages=messages,
140+
model=model,
141+
tools=tools,
142+
tool_choice={
143+
"type": "function",
144+
"function": {
145+
"name":
146+
"get_current_weather"
147+
}
148+
})
149+
150+
tool_call = tool_calls.choices[0].message.tool_calls[0].function
151+
print(f"reasoning_content: {tool_calls.choices[0].message.reasoning_content}")
152+
print(f"function name: {tool_call.name}")
153+
print(f"function arguments: {tool_call.arguments}")
154+
print("----------Stream Generate With Named Function Calling--------------")
155+
156+
tool_calls_stream = client.chat.completions.create(
157+
messages=messages,
158+
model=model,
159+
tools=tools,
160+
tool_choice={
161+
"type": "function",
162+
"function": {
163+
"name": "get_current_weather"
164+
}
165+
},
166+
stream=True)
167+
168+
chunks = []
169+
for chunk in tool_calls_stream:
170+
chunks.append(chunk)
171+
172+
reasoning_content, arguments, function_names = extract_reasoning_and_calls(
173+
chunks)
174+
print(f"reasoning_content: {reasoning_content}")
175+
print(f"function name: {function_names[0]}")
176+
print(f"function arguments: {arguments[0]}")
177+
print("\n\n")

0 commit comments

Comments
 (0)