Skip to content

Commit a4c29e6

Browse files
ExtReMLapinCNE Pierre FICHEPOILchaunceyjiang
authored
fixed reasoning streaming with tool_choice="required" (#24108)
Signed-off-by: CNE Pierre FICHEPOIL <pierre-1.fichepoil@gendarmerie.interieur.gouv.fr> Signed-off-by: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com> Co-authored-by: CNE Pierre FICHEPOIL <pierre-1.fichepoil@gendarmerie.interieur.gouv.fr> Co-authored-by: Chauncey <chaunceyjiang@gmail.com>
1 parent 8f18feb commit a4c29e6

File tree

2 files changed

+55
-22
lines changed

2 files changed

+55
-22
lines changed

tests/entrypoints/openai/test_completion_with_function_calling.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,11 +194,19 @@ async def test_function_tool_use(
194194
)
195195

196196
output = []
197+
reasoning = []
197198
async for chunk in output_stream:
198-
if chunk.choices and chunk.choices[0].delta.tool_calls:
199-
output.extend(chunk.choices[0].delta.tool_calls)
199+
if chunk.choices:
200+
if enable_thinking and getattr(
201+
chunk.choices[0].delta, "reasoning_content", None
202+
):
203+
reasoning.append(chunk.choices[0].delta.reasoning_content)
204+
if chunk.choices[0].delta.tool_calls:
205+
output.extend(chunk.choices[0].delta.tool_calls)
200206

201207
assert len(output) > 0
208+
if enable_thinking:
209+
assert len(reasoning) > 0
202210

203211

204212
@pytest.fixture(scope="module")

vllm/entrypoints/openai/serving_chat.py

Lines changed: 45 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -563,8 +563,6 @@ async def chat_completion_stream_generator(
563563
# For reasoning parser and tool call all enabled
564564
added_content_delta_arr = [False] * num_choices
565565
reasoning_end_arr = [False] * num_choices
566-
elif request.tool_choice == "required":
567-
all_previous_token_ids = None
568566
else:
569567
all_previous_token_ids = None
570568

@@ -880,29 +878,56 @@ async def chat_completion_stream_generator(
880878
previous_text = previous_texts[i]
881879
current_text = previous_text + delta_text
882880
fn_name_returned = function_name_returned[i]
881+
output_token_ids = as_list(output.token_ids)
883882

884-
if self.reasoning_parser:
885-
_, content = reasoning_parser.extract_reasoning_content(
886-
current_text, request
883+
if (
884+
self.reasoning_parser is not None
885+
and not reasoning_end_arr[i]
886+
and res.prompt_token_ids
887+
and reasoning_parser.is_reasoning_end(res.prompt_token_ids)
888+
):
889+
reasoning_end_arr[i] = True
890+
891+
if self.reasoning_parser and not reasoning_end_arr[i]:
892+
delta_message = (
893+
reasoning_parser.extract_reasoning_content_streaming(
894+
previous_text,
895+
current_text,
896+
delta_text,
897+
previous_token_ids,
898+
current_token_ids,
899+
output_token_ids,
900+
)
887901
)
902+
if reasoning_parser.is_reasoning_end(output_token_ids):
903+
reasoning_end_arr[i] = True
904+
if delta_message and delta_message.content:
905+
current_text = delta_message.content
906+
delta_message.content = None
907+
else:
908+
# reasoning ended
909+
current_text = ""
910+
888911
else:
912+
# either finished reasoning or no reasoning at all
889913
content = current_text
890-
delta_message, function_name_returned[i] = (
891-
self.extract_tool_call_required_streaming(
892-
previous_text=previous_text,
893-
current_text=content,
894-
delta_text=delta_text,
895-
function_name_returned=fn_name_returned,
896-
tool_call_idx=history_tool_call_cnt,
914+
915+
delta_message, function_name_returned[i] = (
916+
self.extract_tool_call_required_streaming(
917+
previous_text=previous_text,
918+
current_text=content,
919+
delta_text=delta_text,
920+
function_name_returned=fn_name_returned,
921+
tool_call_idx=history_tool_call_cnt,
922+
)
897923
)
898-
)
899-
if (
900-
delta_message
901-
and delta_message.tool_calls
902-
and delta_message.tool_calls[0].id is not None
903-
):
904-
history_tool_call_cnt += 1
905-
tools_streamed[i] = True
924+
if (
925+
delta_message
926+
and delta_message.tool_calls
927+
and delta_message.tool_calls[0].id is not None
928+
):
929+
history_tool_call_cnt += 1
930+
tools_streamed[i] = True
906931

907932
# handle streaming deltas for tools with "auto" tool choice
908933
# and reasoning parser

0 commit comments

Comments
 (0)