diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py index 73d3af0c9a..2d1e618f3e 100644 --- a/pydantic_ai_slim/pydantic_ai/_agent_graph.py +++ b/pydantic_ai_slim/pydantic_ai/_agent_graph.py @@ -434,9 +434,11 @@ async def _run_stream( # noqa: C901 if self._events_iterator is None: # Ensure that the stream is only run once - async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]: + async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]: # noqa: C901 texts: list[str] = [] tool_calls: list[_messages.ToolCallPart] = [] + thinking_parts: list[_messages.ThinkingPart] = [] + for part in self.model_response.parts: if isinstance(part, _messages.TextPart): # ignore empty content for text parts, see #437 @@ -449,11 +451,7 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]: elif isinstance(part, _messages.BuiltinToolReturnPart): yield _messages.BuiltinToolResultEvent(part) elif isinstance(part, _messages.ThinkingPart): - # We don't need to do anything with thinking parts in this tool-calling node. - # We need to handle text parts in case there are no tool calls and/or the desired output comes - # from the text, but thinking parts should not directly influence the execution of tools or - # determination of the next node of graph execution here. - pass + thinking_parts.append(part) else: assert_never(part) @@ -467,8 +465,18 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]: elif texts: # No events are emitted during the handling of text responses, so we don't need to yield anything self._next_node = await self._handle_text_response(ctx, texts) + elif thinking_parts: + # handle thinking-only responses (responses that contain only ThinkingPart instances) + # this can happen with models that support thinking mode when they don't provide + # actionable output alongside their thinking content. + self._next_node = ModelRequestNode[DepsT, NodeRunEndT]( + _messages.ModelRequest( + parts=[_messages.RetryPromptPart('Responses without text or tool calls are not permitted.')] + ) + ) else: - # we've got an empty response, this sometimes happens with anthropic (and perhaps other models) + # we got an empty response with no tool calls, text, or thinking + # this sometimes happens with anthropic (and perhaps other models) # when the model has already returned text along side tool calls # in this scenario, if text responses are allowed, we return text from the most recent model # response, if any diff --git a/tests/test_agent.py b/tests/test_agent.py index 83bdcef5eb..7f93b8e3a4 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -4087,3 +4087,71 @@ def bar() -> str: assert run.result.output == snapshot(Foo(a=0, b='a')) assert test_model.last_model_request_parameters is not None assert [t.name for t in test_model.last_model_request_parameters.function_tools] == snapshot(['bar']) + + +async def test_thinking_only_response_retry(): + """Test that thinking-only responses trigger a retry mechanism.""" + from pydantic_ai.messages import ThinkingPart + from pydantic_ai.models.function import FunctionModel + + call_count = 0 + + def model_function(messages: list[ModelMessage], info: AgentInfo) -> ModelResponse: + nonlocal call_count + call_count += 1 + + if call_count == 1: + # First call: return thinking-only response + return ModelResponse( + parts=[ThinkingPart(content='Let me think about this...')], + model_name='thinking-test-model', + ) + else: + # Second call: return proper response + return ModelResponse( + parts=[TextPart(content='Final answer')], + model_name='thinking-test-model', + ) + + model = FunctionModel(model_function) + agent = Agent(model, system_prompt='You are a helpful assistant.') + + result = await agent.run('Hello') + + assert result.all_messages() == snapshot( + [ + ModelRequest( + parts=[ + SystemPromptPart( + content='You are a helpful assistant.', + timestamp=IsDatetime(), + ), + UserPromptPart( + content='Hello', + timestamp=IsDatetime(), + ), + ] + ), + ModelResponse( + parts=[ThinkingPart(content='Let me think about this...')], + usage=Usage(requests=1, request_tokens=57, response_tokens=6, total_tokens=63), + model_name='function:model_function:', + timestamp=IsDatetime(), + ), + ModelRequest( + parts=[ + RetryPromptPart( + content='Responses without text or tool calls are not permitted.', + tool_call_id=IsStr(), + timestamp=IsDatetime(), + ) + ] + ), + ModelResponse( + parts=[TextPart(content='Final answer')], + usage=Usage(requests=1, request_tokens=75, response_tokens=8, total_tokens=83), + model_name='function:model_function:', + timestamp=IsDatetime(), + ), + ] + )