55
66import pytest
77
8+ from vllm .entrypoints .openai .protocol import ChatCompletionRequest
9+ from vllm .entrypoints .openai .tool_parsers .hermes_tool_parser import (
10+ Hermes2ProToolParser )
11+ from vllm .transformers_utils .tokenizer import AnyTokenizer
12+
813from ....utils import RemoteOpenAIServer
914
1015MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
3742 },
3843 "unit" : {
3944 "type" : "string" ,
40- "enum" : ["celsius" , "fahrenheit" ]
45+ "enum" : ["celsius" , "fahrenheit" ],
4146 },
4247 },
4348 "required" : ["location" ],
7580 "user" ,
7681 "content" :
7782 "Hi! Do you have any detailed information about the product id "
78- "7355608 and inserted true?"
83+ "7355608 and inserted true?" ,
7984}]
8085
8186
@@ -144,8 +149,8 @@ async def test_streaming_tool_call():
144149 if tool_chunk .function .name :
145150 tool_call_chunks [index ]["name" ] += tool_chunk .function .name
146151 if tool_chunk .function .arguments :
147- tool_call_chunks [index ][
148- "arguments" ] += tool_chunk .function .arguments
152+ tool_call_chunks [index ]["arguments" ] += (
153+ tool_chunk .function .arguments )
149154
150155 assert len (tool_call_chunks ) == 1
151156 reconstructed_tool_call = tool_call_chunks [0 ]
@@ -234,8 +239,8 @@ async def test_streaming_product_tool_call():
234239 if tool_chunk .function .name :
235240 tool_call_chunks [index ]["name" ] += tool_chunk .function .name
236241 if tool_chunk .function .arguments :
237- tool_call_chunks [index ][
238- "arguments" ] += tool_chunk .function .arguments
242+ tool_call_chunks [index ]["arguments" ] += (
243+ tool_chunk .function .arguments )
239244
240245 assert len (tool_call_chunks ) == 1
241246 reconstructed_tool_call = tool_call_chunks [0 ]
@@ -258,3 +263,195 @@ async def test_streaming_product_tool_call():
258263 print ("\n [Streaming Product Test Passed]" )
259264 print (f"Reconstructed Tool Call: { reconstructed_tool_call ['name' ]} " )
260265 print (f"Reconstructed Arguments: { arguments } " )
266+
267+
268+ @pytest .fixture
269+ def qwen_tokenizer () -> AnyTokenizer :
270+ from vllm .transformers_utils .tokenizer import get_tokenizer
271+
272+ return get_tokenizer ("Qwen/Qwen3-32B" )
273+
274+
275+ @pytest .fixture
276+ def hermes_parser (qwen_tokenizer : AnyTokenizer ) -> Hermes2ProToolParser :
277+ return Hermes2ProToolParser (qwen_tokenizer )
278+
279+
280+ @pytest .fixture
281+ def any_chat_request () -> ChatCompletionRequest :
282+ return ChatCompletionRequest (
283+ seed = 42 ,
284+ model = "Qwen/Qwen3-32B" ,
285+ messages = [],
286+ )
287+
288+
289+ def test_hermes_parser_streaming_just_forward_text (
290+ qwen_tokenizer : AnyTokenizer ,
291+ hermes_parser : Hermes2ProToolParser ,
292+ any_chat_request : ChatCompletionRequest ,
293+ ) -> None :
294+ text = (
295+ """This is some prior text that has nothing to do with tool calling."""
296+ )
297+ tokens = qwen_tokenizer .encode (text )
298+ previous_text = ""
299+ delta_messages = []
300+ for token in tokens :
301+ delta_text = qwen_tokenizer .decode ([token ])
302+ current_text = previous_text + delta_text
303+ delta = hermes_parser .extract_tool_calls_streaming (
304+ previous_text = previous_text ,
305+ current_text = current_text ,
306+ delta_text = delta_text ,
307+ previous_token_ids = [],
308+ current_token_ids = [],
309+ delta_token_ids = [],
310+ request = any_chat_request ,
311+ )
312+ previous_text = current_text
313+ delta_messages .append (delta )
314+
315+ for delta in delta_messages :
316+ assert delta is not None
317+ assert not delta .tool_calls
318+
319+ print (delta_messages )
320+ assert "" .join ([delta .content for delta in delta_messages ]) == text
321+
322+
323+ def test_hermes_parser_streaming_failure_case_bug_19056 (
324+ qwen_tokenizer : AnyTokenizer ,
325+ hermes_parser : Hermes2ProToolParser ,
326+ any_chat_request : ChatCompletionRequest ,
327+ ) -> None :
328+ text = """<tool_call>
329+ {"name": "final_answer", "arguments": {"trigger": true}}
330+ </tool_call>"""
331+ tokens = qwen_tokenizer .encode (text )
332+ previous_text = ""
333+ delta_messages = []
334+ for token in tokens :
335+ text = qwen_tokenizer .decode ([token ])
336+ current_text = previous_text + text
337+ delta = hermes_parser .extract_tool_calls_streaming (
338+ previous_text = previous_text ,
339+ current_text = current_text ,
340+ delta_text = text ,
341+ previous_token_ids = [],
342+ current_token_ids = [],
343+ delta_token_ids = [],
344+ request = any_chat_request ,
345+ )
346+ previous_text = current_text
347+ if delta is not None :
348+ delta_messages .append (delta )
349+
350+ assert delta_messages [0 ].tool_calls [0 ].function .name == "final_answer"
351+ tool_call_args = "" .join (delta .tool_calls [0 ].function .arguments or ""
352+ for delta in delta_messages )
353+ assert tool_call_args == '{"trigger": true}'
354+
355+
356+ def test_hermes_parser_streaming (
357+ qwen_tokenizer : AnyTokenizer ,
358+ hermes_parser : Hermes2ProToolParser ,
359+ any_chat_request : ChatCompletionRequest ,
360+ ) -> None :
361+ text = '<tool_call>\
362+ {"name": "get_current_temperature",\
363+ "arguments": {"location":\
364+ "San Francisco, California, United States", "unit": "celsius"}}\
365+ </tool_call>'
366+
367+ tokens = qwen_tokenizer .encode (text )
368+ previous_text = ""
369+ delta_messages = []
370+ for token in tokens :
371+ text = qwen_tokenizer .decode ([token ])
372+ current_text = previous_text + text
373+ delta = hermes_parser .extract_tool_calls_streaming (
374+ previous_text = previous_text ,
375+ current_text = current_text ,
376+ delta_text = text ,
377+ previous_token_ids = [],
378+ current_token_ids = [],
379+ delta_token_ids = [],
380+ request = any_chat_request ,
381+ )
382+ previous_text = current_text
383+ if delta is not None :
384+ delta_messages .append (delta )
385+ print (delta_messages )
386+ assert (delta_messages [0 ].tool_calls [0 ].function .name ==
387+ "get_current_temperature" )
388+ tool_call_args = "" .join (delta .tool_calls [0 ].function .arguments or ""
389+ for delta in delta_messages )
390+ assert tool_call_args == (
391+ '{"location":"San Francisco, California, United States", '
392+ '"unit": "celsius"}' )
393+
394+
395+ def test_hermes_parser_non_streaming_no_tool_call (
396+ hermes_parser : Hermes2ProToolParser ,
397+ any_chat_request : ChatCompletionRequest ,
398+ ) -> None :
399+ text = """This is not a tool call."""
400+ tool_call = hermes_parser .extract_tool_calls (
401+ model_output = text ,
402+ request = any_chat_request ,
403+ )
404+
405+ assert tool_call is not None
406+ assert not tool_call .tools_called
407+
408+
409+ def test_hermes_parser_non_streaming_tool_call_between_tags (
410+ hermes_parser : Hermes2ProToolParser ,
411+ any_chat_request : ChatCompletionRequest ,
412+ ) -> None :
413+ text = """<tool_call>
414+ {"name": "final_answer", "arguments": {"trigger": true}}
415+ </tool_call>"""
416+ tool_call = hermes_parser .extract_tool_calls (
417+ model_output = text ,
418+ request = any_chat_request ,
419+ )
420+
421+ assert tool_call is not None
422+ assert tool_call .tools_called
423+ assert tool_call .tool_calls [0 ].function .name == "final_answer"
424+ assert tool_call .tool_calls [0 ].function .arguments == '{"trigger": true}'
425+
426+
427+ def test_hermes_parser_non_streaming_tool_call_until_eos (
428+ hermes_parser : Hermes2ProToolParser ,
429+ any_chat_request : ChatCompletionRequest ,
430+ ) -> None :
431+ text = """<tool_call>
432+ {"name": "final_answer", "arguments": {"trigger": true}}"""
433+ tool_call = hermes_parser .extract_tool_calls (
434+ model_output = text ,
435+ request = any_chat_request ,
436+ )
437+
438+ assert tool_call is not None
439+ assert tool_call .tools_called
440+ assert tool_call .tool_calls [0 ].function .name == "final_answer"
441+ assert tool_call .tool_calls [0 ].function .arguments == '{"trigger": true}'
442+
443+
444+ def test_hermes_parser_non_streaming_tool_call_invalid_json (
445+ hermes_parser : Hermes2ProToolParser ,
446+ any_chat_request : ChatCompletionRequest ,
447+ ) -> None :
448+ # Missing closing brace to trigger exception
449+ text = """<tool_call>
450+ {"name": "final_answer", "arguments": {"trigger": true}"""
451+ tool_call = hermes_parser .extract_tool_calls (
452+ model_output = text ,
453+ request = any_chat_request ,
454+ )
455+
456+ assert tool_call is not None
457+ assert not tool_call .tools_called
0 commit comments