Skip to content

Commit 9383cd6

Browse files
authored
[Frontend] Add a new xml-based tool parser for qwen3-coder (#25028)
Signed-off-by: Zhikaiiii <1658973216@qq.com>
1 parent ba8d216 commit 9383cd6

File tree

4 files changed

+1238
-22
lines changed

4 files changed

+1238
-22
lines changed

docs/features/tool_calling.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,15 @@ Supported models:
319319

320320
Flags: `--tool-call-parser glm45`
321321

322+
### Qwen3-Coder Models (`qwen3_xml`)
323+
324+
Supported models:
325+
326+
* `Qwen/Qwen3-480B-A35B-Instruct`
327+
* `Qwen/Qwen3-Coder-30B-A3B-Instruct`
328+
329+
Flags: `--tool-call-parser qwen3_xml`
330+
322331
### Models with Pythonic Tool Calls (`pythonic`)
323332

324333
A growing number of models output a python list to represent tool calls instead of using JSON. This has the advantage of inherently supporting parallel tool calls and removing ambiguity around the JSON schema required for tool calls. The `pythonic` tool parser can support such models.

tests/tool_use/test_qwen3coder_tool_parser.py

Lines changed: 90 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
ToolCall)
1414
from vllm.entrypoints.openai.tool_parsers.qwen3coder_tool_parser import (
1515
Qwen3CoderToolParser)
16+
from vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser import (
17+
Qwen3XMLToolParser)
1618
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
1719
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
1820

@@ -29,6 +31,21 @@ def qwen3_tool_parser(qwen3_tokenizer):
2931
return Qwen3CoderToolParser(qwen3_tokenizer)
3032

3133

34+
@pytest.fixture
35+
def qwen3_xml_tool_parser(qwen3_tokenizer):
36+
return Qwen3XMLToolParser(qwen3_tokenizer)
37+
38+
39+
@pytest.fixture(params=["original", "xml"])
40+
def qwen3_tool_parser_parametrized(qwen3_tool_parser, qwen3_xml_tool_parser,
41+
request):
42+
"""Parameterized fixture that provides both parser types for testing"""
43+
if request.param == "original":
44+
return qwen3_tool_parser
45+
else:
46+
return qwen3_xml_tool_parser
47+
48+
3249
@pytest.fixture
3350
def sample_tools():
3451
return [
@@ -95,7 +112,7 @@ def assert_tool_calls(actual_tool_calls: list[ToolCall],
95112

96113

97114
def stream_delta_message_generator(
98-
qwen3_tool_parser: Qwen3CoderToolParser,
115+
qwen3_tool_parser,
99116
qwen3_tokenizer: AnyTokenizer,
100117
model_output: str,
101118
request: Optional[ChatCompletionRequest] = None
@@ -144,9 +161,9 @@ def stream_delta_message_generator(
144161
read_offset = new_read_offset
145162

146163

147-
def test_extract_tool_calls_no_tools(qwen3_tool_parser):
164+
def test_extract_tool_calls_no_tools(qwen3_tool_parser_parametrized):
148165
model_output = "This is a test response without any tool calls"
149-
extracted_tool_calls = qwen3_tool_parser.extract_tool_calls(
166+
extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
150167
model_output, request=None) # type: ignore[arg-type]
151168
assert not extracted_tool_calls.tools_called
152169
assert extracted_tool_calls.tool_calls == []
@@ -294,12 +311,13 @@ def test_extract_tool_calls_no_tools(qwen3_tool_parser):
294311
], "Let me calculate that area for you."),
295312
],
296313
)
297-
def test_extract_tool_calls(qwen3_tool_parser, sample_tools, model_output,
298-
expected_tool_calls, expected_content):
314+
def test_extract_tool_calls(qwen3_tool_parser_parametrized, sample_tools,
315+
model_output, expected_tool_calls,
316+
expected_content):
299317
request = ChatCompletionRequest(model=MODEL,
300318
messages=[],
301319
tools=sample_tools)
302-
extracted_tool_calls = qwen3_tool_parser.extract_tool_calls(
320+
extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
303321
model_output, request=request)
304322
assert extracted_tool_calls.tools_called
305323

@@ -308,7 +326,8 @@ def test_extract_tool_calls(qwen3_tool_parser, sample_tools, model_output,
308326
assert extracted_tool_calls.content == expected_content
309327

310328

311-
def test_extract_tool_calls_fallback_no_tags(qwen3_tool_parser, sample_tools):
329+
def test_extract_tool_calls_fallback_no_tags(qwen3_tool_parser_parametrized,
330+
sample_tools):
312331
"""Test fallback parsing when XML tags are missing"""
313332
model_output = '''<function=get_current_weather>
314333
<parameter=city>
@@ -322,7 +341,7 @@ def test_extract_tool_calls_fallback_no_tags(qwen3_tool_parser, sample_tools):
322341
request = ChatCompletionRequest(model=MODEL,
323342
messages=[],
324343
tools=sample_tools)
325-
extracted_tool_calls = qwen3_tool_parser.extract_tool_calls(
344+
extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
326345
model_output, request=request)
327346

328347
assert extracted_tool_calls.tools_called
@@ -331,7 +350,7 @@ def test_extract_tool_calls_fallback_no_tags(qwen3_tool_parser, sample_tools):
331350
"get_current_weather")
332351

333352

334-
def test_extract_tool_calls_type_conversion(qwen3_tool_parser):
353+
def test_extract_tool_calls_type_conversion(qwen3_tool_parser_parametrized):
335354
"""Test parameter type conversion based on tool schema"""
336355
tools = [
337356
ChatCompletionToolsParam(type="function",
@@ -381,7 +400,7 @@ def test_extract_tool_calls_type_conversion(qwen3_tool_parser):
381400
</tool_call>'''
382401

383402
request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
384-
extracted_tool_calls = qwen3_tool_parser.extract_tool_calls(
403+
extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
385404
model_output, request=request)
386405

387406
args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments)
@@ -536,9 +555,10 @@ def test_extract_tool_calls_type_conversion(qwen3_tool_parser):
536555
], "Let me calculate that area for you."),
537556
],
538557
)
539-
def test_extract_tool_calls_streaming(qwen3_tool_parser, qwen3_tokenizer,
540-
sample_tools, model_output,
541-
expected_tool_calls, expected_content):
558+
def test_extract_tool_calls_streaming(qwen3_tool_parser_parametrized,
559+
qwen3_tokenizer, sample_tools,
560+
model_output, expected_tool_calls,
561+
expected_content):
542562
"""Test incremental streaming behavior including typed parameters"""
543563
request = ChatCompletionRequest(model=MODEL,
544564
messages=[],
@@ -548,7 +568,8 @@ def test_extract_tool_calls_streaming(qwen3_tool_parser, qwen3_tokenizer,
548568
tool_states = {} # Track state per tool index
549569

550570
for delta_message in stream_delta_message_generator(
551-
qwen3_tool_parser, qwen3_tokenizer, model_output, request):
571+
qwen3_tool_parser_parametrized, qwen3_tokenizer, model_output,
572+
request):
552573
# role should never be streamed from tool parser
553574
assert not delta_message.role
554575

@@ -609,7 +630,7 @@ def test_extract_tool_calls_streaming(qwen3_tool_parser, qwen3_tokenizer,
609630

610631

611632
def test_extract_tool_calls_missing_closing_parameter_tag(
612-
qwen3_tool_parser, sample_tools):
633+
qwen3_tool_parser_parametrized, sample_tools):
613634
"""Test handling of missing closing </parameter> tag"""
614635
# Using get_current_weather from sample_tools but with malformed XML
615636
model_output = '''Let me check the weather for you:
@@ -629,7 +650,7 @@ def test_extract_tool_calls_missing_closing_parameter_tag(
629650
request = ChatCompletionRequest(model=MODEL,
630651
messages=[],
631652
tools=sample_tools)
632-
extracted_tool_calls = qwen3_tool_parser.extract_tool_calls(
653+
extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
633654
model_output, request=request)
634655

635656
# The parser should handle the malformed XML gracefully
@@ -652,7 +673,7 @@ def test_extract_tool_calls_missing_closing_parameter_tag(
652673

653674

654675
def test_extract_tool_calls_streaming_missing_closing_tag(
655-
qwen3_tool_parser, qwen3_tokenizer, sample_tools):
676+
qwen3_tool_parser_parametrized, qwen3_tokenizer, sample_tools):
656677
"""Test streaming with missing closing </parameter> tag"""
657678
# Using get_current_weather from sample_tools but with malformed XML
658679
model_output = '''Let me check the weather for you:
@@ -677,7 +698,8 @@ def test_extract_tool_calls_streaming_missing_closing_tag(
677698
tool_states = {}
678699

679700
for delta_message in stream_delta_message_generator(
680-
qwen3_tool_parser, qwen3_tokenizer, model_output, request):
701+
qwen3_tool_parser_parametrized, qwen3_tokenizer, model_output,
702+
request):
681703

682704
if delta_message.content:
683705
other_content += delta_message.content
@@ -727,9 +749,8 @@ def test_extract_tool_calls_streaming_missing_closing_tag(
727749
assert args["unit"] == "fahrenheit"
728750

729751

730-
def test_extract_tool_calls_streaming_incremental(qwen3_tool_parser,
731-
qwen3_tokenizer,
732-
sample_tools):
752+
def test_extract_tool_calls_streaming_incremental(
753+
qwen3_tool_parser_parametrized, qwen3_tokenizer, sample_tools):
733754
"""Test that streaming is truly incremental"""
734755
model_output = '''I'll check the weather.<tool_call>
735756
<function=get_current_weather>
@@ -748,7 +769,8 @@ def test_extract_tool_calls_streaming_incremental(qwen3_tool_parser,
748769

749770
chunks = []
750771
for delta_message in stream_delta_message_generator(
751-
qwen3_tool_parser, qwen3_tokenizer, model_output, request):
772+
qwen3_tool_parser_parametrized, qwen3_tokenizer, model_output,
773+
request):
752774
chunks.append(delta_message)
753775

754776
# Should have multiple chunks
@@ -784,3 +806,49 @@ def test_extract_tool_calls_streaming_incremental(qwen3_tool_parser,
784806
parsed_args = json.loads(full_args)
785807
assert parsed_args["city"] == "Dallas"
786808
assert parsed_args["state"] == "TX"
809+
810+
811+
def test_extract_tool_calls_complex_type_with_single_quote(
812+
qwen3_tool_parser_parametrized):
813+
"""Test parameter type conversion based on tool schema"""
814+
tools = [
815+
ChatCompletionToolsParam(type="function",
816+
function={
817+
"name": "test_types",
818+
"parameters": {
819+
"type": "object",
820+
"properties": {
821+
"int_param": {
822+
"type": "integer"
823+
},
824+
"float_param": {
825+
"type": "float"
826+
},
827+
"bool_param": {
828+
"type": "boolean"
829+
},
830+
"str_param": {
831+
"type": "string"
832+
},
833+
"obj_param": {
834+
"type": "object"
835+
}
836+
}
837+
}
838+
})
839+
]
840+
841+
model_output = '''<tool_call>
842+
<function=test_types>
843+
<parameter=obj_param>
844+
{'key': 'value'}
845+
</parameter>
846+
</function>
847+
</tool_call>'''
848+
849+
request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
850+
extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
851+
model_output, request=request)
852+
853+
args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments)
854+
assert args["obj_param"] == {"key": "value"}

vllm/entrypoints/openai/tool_parsers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from .phi4mini_tool_parser import Phi4MiniJsonToolParser
2121
from .pythonic_tool_parser import PythonicToolParser
2222
from .qwen3coder_tool_parser import Qwen3CoderToolParser
23+
from .qwen3xml_tool_parser import Qwen3XMLToolParser
2324
from .seed_oss_tool_parser import SeedOssToolParser
2425
from .step3_tool_parser import Step3ToolParser
2526
from .xlam_tool_parser import xLAMToolParser
@@ -45,6 +46,7 @@
4546
"HunyuanA13BToolParser",
4647
"Glm4MoeModelToolParser",
4748
"Qwen3CoderToolParser",
49+
"Qwen3XMLToolParser",
4850
"SeedOssToolParser",
4951
"Step3ToolParser",
5052
"OpenAIToolParser",

0 commit comments

Comments
 (0)