Fix potential infinite tool call loop by resetting tool_choice after … (#263)

rm-openai · web-flow · commit 927a29c56b2c · 2025-03-25T11:30:53.000-04:00
# Fix potential infinite tool call loop by resetting tool_choice after
tool execution

## Summary

This PR fixes an issue where setting `tool_choice` to "required" or a
specific function name could cause models to get stuck in an infinite
tool call loop.

When `tool_choice` is set to force tool usage, this setting persists
across model invocations. This PR automatically resets `tool_choice` to
"auto" after tool execution, allowing the model to decide whether to
make additional tool calls in subsequent turns.

Unlike using `tool_use_behavior="stop_on_first_tool"`, this approach
lets the model continue processing tool results while preventing forced
repeated tool calls.

## Test plan

- Added tests to verify tool_choice reset behavior for both agent and
run_config settings
- Added integration test to verify the solution prevents infinite loops
- All tests pass

## Checks

- [x] I've added new tests for the fix
- [x] I've updated the relevant documentation (added comment in code)
- [x] I've run `make lint` and `make format`
- [x] I've made sure tests pass
diff --git a/docs/agents.md b/docs/agents.md
@@ -142,4 +142,11 @@ Supplying a list of tools doesn't always mean the LLM will use a tool. You can f
 
 !!! note
 
-    If requiring tool use, you should consider setting [`Agent.tool_use_behavior`] to stop the Agent from running when a tool output is produced. Otherwise, the Agent might run in an infinite loop, where the LLM produces a tool call , and the tool result is sent to the LLM, and this infinite loops because the LLM is always forced to use a tool.
+    To prevent infinite loops, the framework automatically resets `tool_choice` to "auto" after a tool call in the following scenarios:
+    
+    1. When `tool_choice` is set to a specific function name (any string that's not "auto", "required", or "none")
+    2. When `tool_choice` is set to "required" AND there is only one tool available
+    
+    This targeted reset mechanism allows the model to decide whether to make additional tool calls in subsequent turns while avoiding infinite loops in these specific cases.
+    
+    If you want the Agent to completely stop after a tool call (rather than continuing with auto mode), you can set [`Agent.tool_use_behavior="stop_on_first_tool"`] which will directly use the tool output as the final response without further LLM processing.
diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import asyncio
+import dataclasses
 import inspect
 from collections.abc import Awaitable
 from dataclasses import dataclass
@@ -47,10 +48,11 @@
 )
 from .lifecycle import RunHooks
 from .logger import logger
+from .model_settings import ModelSettings
 from .models.interface import ModelTracing
 from .run_context import RunContextWrapper, TContext
 from .stream_events import RunItemStreamEvent, StreamEvent
-from .tool import ComputerTool, FunctionTool, FunctionToolResult
+from .tool import ComputerTool, FunctionTool, FunctionToolResult, Tool
 from .tracing import (
     SpanError,
     Trace,
@@ -206,6 +208,29 @@ async def execute_tools_and_side_effects(
         new_step_items.extend([result.run_item for result in function_results])
         new_step_items.extend(computer_results)
 
+        # Reset tool_choice to "auto" after tool execution to prevent infinite loops
+        if processed_response.functions or processed_response.computer_actions:
+            tools = agent.tools
+
+            if (
+                run_config.model_settings and
+                cls._should_reset_tool_choice(run_config.model_settings, tools)
+            ):
+                # update the run_config model settings with a copy
+                new_run_config_settings = dataclasses.replace(
+                    run_config.model_settings,
+                    tool_choice="auto"
+                )
+                run_config = dataclasses.replace(run_config, model_settings=new_run_config_settings)
+
+            if cls._should_reset_tool_choice(agent.model_settings, tools):
+                # Create a modified copy instead of modifying the original agent
+                new_model_settings = dataclasses.replace(
+                    agent.model_settings,
+                    tool_choice="auto"
+                )
+                agent = dataclasses.replace(agent, model_settings=new_model_settings)
+
         # Second, check if there are any handoffs
         if run_handoffs := processed_response.handoffs:
             return await cls.execute_handoffs(
@@ -296,6 +321,24 @@ async def execute_tools_and_side_effects(
                 next_step=NextStepRunAgain(),
             )
 
+    @classmethod
+    def _should_reset_tool_choice(cls, model_settings: ModelSettings, tools: list[Tool]) -> bool:
+        if model_settings is None or model_settings.tool_choice is None:
+            return False
+
+        # for specific tool choices
+        if (
+            isinstance(model_settings.tool_choice, str) and
+            model_settings.tool_choice not in ["auto", "required", "none"]
+        ):
+            return True
+
+        # for one tool and required tool choice
+        if model_settings.tool_choice == "required":
+            return len(tools) == 1
+
+        return False
+
     @classmethod
     def process_model_response(
         cls,
diff --git a/tests/test_tool_choice_reset.py b/tests/test_tool_choice_reset.py
@@ -0,0 +1,161 @@
+import pytest
+
+from agents import Agent, ModelSettings, Runner, Tool
+from agents._run_impl import RunImpl
+
+from .fake_model import FakeModel
+from .test_responses import (
+    get_function_tool,
+    get_function_tool_call,
+    get_text_message,
+)
+
+
+class TestToolChoiceReset:
+
+    def test_should_reset_tool_choice_direct(self):
+        """
+        Test the _should_reset_tool_choice method directly with various inputs
+        to ensure it correctly identifies cases where reset is needed.
+        """
+        # Case 1: tool_choice = None should not reset
+        model_settings = ModelSettings(tool_choice=None)
+        tools1: list[Tool] = [get_function_tool("tool1")]
+        # Cast to list[Tool] to fix type checking issues
+        assert not RunImpl._should_reset_tool_choice(model_settings, tools1)
+
+        # Case 2: tool_choice = "auto" should not reset
+        model_settings = ModelSettings(tool_choice="auto")
+        assert not RunImpl._should_reset_tool_choice(model_settings, tools1)
+
+        # Case 3: tool_choice = "none" should not reset
+        model_settings = ModelSettings(tool_choice="none")
+        assert not RunImpl._should_reset_tool_choice(model_settings, tools1)
+
+        # Case 4: tool_choice = "required" with one tool should reset
+        model_settings = ModelSettings(tool_choice="required")
+        assert RunImpl._should_reset_tool_choice(model_settings, tools1)
+
+        # Case 5: tool_choice = "required" with multiple tools should not reset
+        model_settings = ModelSettings(tool_choice="required")
+        tools2: list[Tool] = [get_function_tool("tool1"), get_function_tool("tool2")]
+        assert not RunImpl._should_reset_tool_choice(model_settings, tools2)
+
+        # Case 6: Specific tool choice should reset
+        model_settings = ModelSettings(tool_choice="specific_tool")
+        assert RunImpl._should_reset_tool_choice(model_settings, tools1)
+
+    @pytest.mark.asyncio
+    async def test_required_tool_choice_with_multiple_runs(self):
+        """
+        Test scenario 1: When multiple runs are executed with tool_choice="required"
+        Ensure each run works correctly and doesn't get stuck in infinite loop
+        Also verify that tool_choice remains "required" between runs
+        """
+        # Set up our fake model with responses for two runs
+        fake_model = FakeModel()
+        fake_model.add_multiple_turn_outputs([
+            [get_text_message("First run response")],
+            [get_text_message("Second run response")]
+        ])
+
+        # Create agent with a custom tool and tool_choice="required"
+        custom_tool = get_function_tool("custom_tool")
+        agent = Agent(
+            name="test_agent",
+            model=fake_model,
+            tools=[custom_tool],
+            model_settings=ModelSettings(tool_choice="required"),
+        )
+
+        # First run should work correctly and preserve tool_choice
+        result1 = await Runner.run(agent, "first run")
+        assert result1.final_output == "First run response"
+        assert agent.model_settings.tool_choice == "required", "tool_choice should stay required"
+
+        # Second run should also work correctly with tool_choice still required
+        result2 = await Runner.run(agent, "second run")
+        assert result2.final_output == "Second run response"
+        assert agent.model_settings.tool_choice == "required", "tool_choice should stay required"
+
+    @pytest.mark.asyncio
+    async def test_required_with_stop_at_tool_name(self):
+        """
+        Test scenario 2: When using required tool_choice with stop_at_tool_names behavior
+        Ensure it correctly stops at the specified tool
+        """
+        # Set up fake model to return a tool call for second_tool
+        fake_model = FakeModel()
+        fake_model.set_next_output([
+            get_function_tool_call("second_tool", "{}")
+        ])
+
+        # Create agent with two tools and tool_choice="required" and stop_at_tool behavior
+        first_tool = get_function_tool("first_tool", return_value="first tool result")
+        second_tool = get_function_tool("second_tool", return_value="second tool result")
+
+        agent = Agent(
+            name="test_agent",
+            model=fake_model,
+            tools=[first_tool, second_tool],
+            model_settings=ModelSettings(tool_choice="required"),
+            tool_use_behavior={"stop_at_tool_names": ["second_tool"]},
+        )
+
+        # Run should stop after using second_tool
+        result = await Runner.run(agent, "run test")
+        assert result.final_output == "second tool result"
+
+    @pytest.mark.asyncio
+    async def test_specific_tool_choice(self):
+        """
+        Test scenario 3: When using a specific tool choice name
+        Ensure it doesn't cause infinite loops
+        """
+        # Set up fake model to return a text message
+        fake_model = FakeModel()
+        fake_model.set_next_output([get_text_message("Test message")])
+
+        # Create agent with specific tool_choice
+        tool1 = get_function_tool("tool1")
+        tool2 = get_function_tool("tool2")
+        tool3 = get_function_tool("tool3")
+
+        agent = Agent(
+            name="test_agent",
+            model=fake_model,
+            tools=[tool1, tool2, tool3],
+            model_settings=ModelSettings(tool_choice="tool1"),  # Specific tool
+        )
+
+        # Run should complete without infinite loops
+        result = await Runner.run(agent, "first run")
+        assert result.final_output == "Test message"
+
+    @pytest.mark.asyncio
+    async def test_required_with_single_tool(self):
+        """
+        Test scenario 4: When using required tool_choice with only one tool
+        Ensure it doesn't cause infinite loops
+        """
+        # Set up fake model to return a tool call followed by a text message
+        fake_model = FakeModel()
+        fake_model.add_multiple_turn_outputs([
+            # First call returns a tool call
+            [get_function_tool_call("custom_tool", "{}")],
+            # Second call returns a text message
+            [get_text_message("Final response")]
+        ])
+
+        # Create agent with a single tool and tool_choice="required"
+        custom_tool = get_function_tool("custom_tool", return_value="tool result")
+        agent = Agent(
+            name="test_agent",
+            model=fake_model,
+            tools=[custom_tool],
+            model_settings=ModelSettings(tool_choice="required"),
+        )
+
+        # Run should complete without infinite loops
+        result = await Runner.run(agent, "first run")
+        assert result.final_output == "Final response"