@@ -563,8 +563,6 @@ async def chat_completion_stream_generator(
563563 # For reasoning parser and tool call all enabled
564564 added_content_delta_arr = [False ] * num_choices
565565 reasoning_end_arr = [False ] * num_choices
566- elif request .tool_choice == "required" :
567- all_previous_token_ids = None
568566 else :
569567 all_previous_token_ids = None
570568
@@ -880,29 +878,56 @@ async def chat_completion_stream_generator(
880878 previous_text = previous_texts [i ]
881879 current_text = previous_text + delta_text
882880 fn_name_returned = function_name_returned [i ]
881+ output_token_ids = as_list (output .token_ids )
883882
884- if self .reasoning_parser :
885- _ , content = reasoning_parser .extract_reasoning_content (
886- current_text , request
883+ if (
884+ self .reasoning_parser is not None
885+ and not reasoning_end_arr [i ]
886+ and res .prompt_token_ids
887+ and reasoning_parser .is_reasoning_end (res .prompt_token_ids )
888+ ):
889+ reasoning_end_arr [i ] = True
890+
891+ if self .reasoning_parser and not reasoning_end_arr [i ]:
892+ delta_message = (
893+ reasoning_parser .extract_reasoning_content_streaming (
894+ previous_text ,
895+ current_text ,
896+ delta_text ,
897+ previous_token_ids ,
898+ current_token_ids ,
899+ output_token_ids ,
900+ )
887901 )
902+ if reasoning_parser .is_reasoning_end (output_token_ids ):
903+ reasoning_end_arr [i ] = True
904+ if delta_message and delta_message .content :
905+ current_text = delta_message .content
906+ delta_message .content = None
907+ else :
908+ # reasoning ended
909+ current_text = ""
910+
888911 else :
912+ # either finished reasoning or no reasoning at all
889913 content = current_text
890- delta_message , function_name_returned [i ] = (
891- self .extract_tool_call_required_streaming (
892- previous_text = previous_text ,
893- current_text = content ,
894- delta_text = delta_text ,
895- function_name_returned = fn_name_returned ,
896- tool_call_idx = history_tool_call_cnt ,
914+
915+ delta_message , function_name_returned [i ] = (
916+ self .extract_tool_call_required_streaming (
917+ previous_text = previous_text ,
918+ current_text = content ,
919+ delta_text = delta_text ,
920+ function_name_returned = fn_name_returned ,
921+ tool_call_idx = history_tool_call_cnt ,
922+ )
897923 )
898- )
899- if (
900- delta_message
901- and delta_message .tool_calls
902- and delta_message .tool_calls [0 ].id is not None
903- ):
904- history_tool_call_cnt += 1
905- tools_streamed [i ] = True
924+ if (
925+ delta_message
926+ and delta_message .tool_calls
927+ and delta_message .tool_calls [0 ].id is not None
928+ ):
929+ history_tool_call_cnt += 1
930+ tools_streamed [i ] = True
906931
907932 # handle streaming deltas for tools with "auto" tool choice
908933 # and reasoning parser
0 commit comments