1616from vllm .config import ModelConfig
1717from vllm .engine .protocol import EngineClient
1818from vllm .entrypoints .chat_utils import (ChatTemplateContentFormatOption ,
19- ConversationMessage )
19+ ConversationMessage ,
20+ random_tool_call_id )
2021from vllm .entrypoints .logger import RequestLogger
2122from vllm .entrypoints .openai .protocol import (
2223 ChatCompletionLogProb , ChatCompletionLogProbs ,
@@ -363,9 +364,10 @@ def extract_tool_call_required_streaming(
363364
364365 function_name_returned = True
365366 delta_message = DeltaMessage (tool_calls = [
366- DeltaToolCall (function = DeltaFunctionCall (
367- name = current_tool_call ["name" ],
368- arguments = arguments ),
367+ DeltaToolCall (id = random_tool_call_id (),
368+ function = DeltaFunctionCall (
369+ name = current_tool_call ["name" ],
370+ arguments = arguments ),
369371 index = len (obj ) - 1 ,
370372 type = "function" )
371373 ])
@@ -382,8 +384,7 @@ def extract_tool_call_required_streaming(
382384 # instead of name every time
383385 name = None ,
384386 arguments = delta_text ),
385- index = len (obj ) - 1 ,
386- type = "function" )
387+ index = len (obj ) - 1 )
387388 ])
388389 else :
389390 delta_message = None
@@ -422,7 +423,7 @@ async def chat_completion_stream_generator(
422423 and self ._should_stream_with_auto_tool_parsing (request ))
423424
424425 all_previous_token_ids : Optional [list [list [int ]]]
425- function_name_returned : Optional [ list [ bool ]] = None
426+ function_name_returned = [ False ] * num_choices
426427
427428 # Only one of these will be used, thus previous_texts and
428429 # all_previous_token_ids will not be used twice in the same iteration.
@@ -435,7 +436,6 @@ async def chat_completion_stream_generator(
435436 reasoning_end_arr = [False ] * num_choices
436437 elif request .tool_choice == "required" :
437438 previous_texts = ["" ] * num_choices
438- function_name_returned = [False ] * num_choices
439439 all_previous_token_ids = None
440440 else :
441441 previous_texts , all_previous_token_ids = None , None
@@ -623,16 +623,27 @@ async def chat_completion_stream_generator(
623623 delta_text = previous_text + delta_text
624624 current_text = ""
625625
626+ if function_name_returned [i ]:
627+ delta_tool_call = DeltaToolCall (
628+ function = DeltaFunctionCall (
629+ arguments = delta_text ),
630+ index = i )
631+ else :
632+ delta_tool_call = DeltaToolCall (
633+ id = random_tool_call_id (),
634+ type = "function" ,
635+ function = DeltaFunctionCall (
636+ name = tool_choice_function_name ,
637+ arguments = delta_text ),
638+ index = i )
639+ function_name_returned [i ] = True
640+
626641 delta_message = DeltaMessage (tool_calls = [
627- DeltaToolCall (function = DeltaFunctionCall (
628- name = tool_choice_function_name ,
629- arguments = delta_text ),
630- index = i )
642+ delta_tool_call ,
631643 ])
632644
633645 elif request .tool_choice == "required" :
634646 assert previous_texts is not None
635- assert function_name_returned is not None
636647 previous_text = previous_texts [i ]
637648 current_text = previous_text + delta_text
638649 fn_name_returned = function_name_returned [i ]
@@ -835,7 +846,7 @@ async def chat_completion_stream_generator(
835846 total_tokens = num_prompt_tokens + completion_tokens ,
836847 )
837848
838- data = chunk .model_dump_json (exclude_unset = True )
849+ data = chunk .model_dump_json (exclude_none = True )
839850 yield f"data: { data } \n \n "
840851
841852 # once the final token is handled, if stream_options.include_usage
0 commit comments