@@ -563,8 +563,6 @@ async def chat_completion_stream_generator(
563563            # For reasoning parser and tool call all enabled 
564564            added_content_delta_arr  =  [False ] *  num_choices 
565565            reasoning_end_arr  =  [False ] *  num_choices 
566-         elif  request .tool_choice  ==  "required" :
567-             all_previous_token_ids  =  None 
568566        else :
569567            all_previous_token_ids  =  None 
570568
@@ -880,29 +878,56 @@ async def chat_completion_stream_generator(
880878                        previous_text  =  previous_texts [i ]
881879                        current_text  =  previous_text  +  delta_text 
882880                        fn_name_returned  =  function_name_returned [i ]
881+                         output_token_ids  =  as_list (output .token_ids )
883882
884-                         if  self .reasoning_parser :
885-                             _ , content  =  reasoning_parser .extract_reasoning_content (
886-                                 current_text , request 
883+                         if  (
884+                             self .reasoning_parser  is  not None 
885+                             and  not  reasoning_end_arr [i ]
886+                             and  res .prompt_token_ids 
887+                             and  reasoning_parser .is_reasoning_end (res .prompt_token_ids )
888+                         ):
889+                             reasoning_end_arr [i ] =  True 
890+ 
891+                         if  self .reasoning_parser  and  not  reasoning_end_arr [i ]:
892+                             delta_message  =  (
893+                                 reasoning_parser .extract_reasoning_content_streaming (
894+                                     previous_text ,
895+                                     current_text ,
896+                                     delta_text ,
897+                                     previous_token_ids ,
898+                                     current_token_ids ,
899+                                     output_token_ids ,
900+                                 )
887901                            )
902+                             if  reasoning_parser .is_reasoning_end (output_token_ids ):
903+                                 reasoning_end_arr [i ] =  True 
904+                                 if  delta_message  and  delta_message .content :
905+                                     current_text  =  delta_message .content 
906+                                     delta_message .content  =  None 
907+                                 else :
908+                                     # reasoning ended 
909+                                     current_text  =  "" 
910+ 
888911                        else :
912+                             # either finished reasoning or no reasoning at all 
889913                            content  =  current_text 
890-                         delta_message , function_name_returned [i ] =  (
891-                             self .extract_tool_call_required_streaming (
892-                                 previous_text = previous_text ,
893-                                 current_text = content ,
894-                                 delta_text = delta_text ,
895-                                 function_name_returned = fn_name_returned ,
896-                                 tool_call_idx = history_tool_call_cnt ,
914+ 
915+                             delta_message , function_name_returned [i ] =  (
916+                                 self .extract_tool_call_required_streaming (
917+                                     previous_text = previous_text ,
918+                                     current_text = content ,
919+                                     delta_text = delta_text ,
920+                                     function_name_returned = fn_name_returned ,
921+                                     tool_call_idx = history_tool_call_cnt ,
922+                                 )
897923                            )
898-                         )
899-                         if  (
900-                             delta_message 
901-                             and  delta_message .tool_calls 
902-                             and  delta_message .tool_calls [0 ].id  is  not None 
903-                         ):
904-                             history_tool_call_cnt  +=  1 
905-                             tools_streamed [i ] =  True 
924+                             if  (
925+                                 delta_message 
926+                                 and  delta_message .tool_calls 
927+                                 and  delta_message .tool_calls [0 ].id  is  not None 
928+                             ):
929+                                 history_tool_call_cnt  +=  1 
930+                                 tools_streamed [i ] =  True 
906931
907932                    # handle streaming deltas for tools with "auto" tool choice 
908933                    # and reasoning parser 
0 commit comments