fix(integrations): hooking into error tracing function to find out if an execute tool span should be set to error (#4986)

constantinius · shellmayr · commit 4e6b9d907668 · 2025-10-30T15:23:28.000+01:00
diff --git a/sentry_sdk/integrations/openai_agents/__init__.py b/sentry_sdk/integrations/openai_agents/__init__.py
@@ -5,6 +5,7 @@
     _create_get_all_tools_wrapper,
     _create_run_wrapper,
     _patch_agent_run,
+    _patch_error_tracing,
 )
 
 try:
@@ -48,6 +49,7 @@ class OpenAIAgentsIntegration(Integration):
     @staticmethod
     def setup_once():
         # type: () -> None
+        _patch_error_tracing()
         _patch_tools()
         _patch_model()
         _patch_runner()
diff --git a/sentry_sdk/integrations/openai_agents/patches/__init__.py b/sentry_sdk/integrations/openai_agents/patches/__init__.py
@@ -2,3 +2,4 @@
 from .tools import _create_get_all_tools_wrapper  # noqa: F401
 from .runner import _create_run_wrapper  # noqa: F401
 from .agent_run import _patch_agent_run  # noqa: F401
+from .error_tracing import _patch_error_tracing  # noqa: F401
diff --git a/sentry_sdk/integrations/openai_agents/patches/error_tracing.py b/sentry_sdk/integrations/openai_agents/patches/error_tracing.py
@@ -0,0 +1,77 @@
+from functools import wraps
+
+import sentry_sdk
+from sentry_sdk.consts import SPANSTATUS
+from sentry_sdk.tracing_utils import set_span_errored
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from typing import Any, Callable, Optional
+
+
+def _patch_error_tracing():
+    # type: () -> None
+    """
+    Patches agents error tracing function to inject our span error logic
+    when a tool execution fails.
+
+    In newer versions, the function is at: agents.util._error_tracing.attach_error_to_current_span
+    In older versions, it was at: agents._utils.attach_error_to_current_span
+
+    This works even when the module or function doesn't exist.
+    """
+    error_tracing_module = None
+
+    # Try newer location first (agents.util._error_tracing)
+    try:
+        from agents.util import _error_tracing
+
+        error_tracing_module = _error_tracing
+    except (ImportError, AttributeError):
+        pass
+
+    # Try older location (agents._utils)
+    if error_tracing_module is None:
+        try:
+            import agents._utils
+
+            error_tracing_module = agents._utils
+        except (ImportError, AttributeError):
+            # Module doesn't exist in either location, nothing to patch
+            return
+
+    # Check if the function exists
+    if not hasattr(error_tracing_module, "attach_error_to_current_span"):
+        return
+
+    original_attach_error = error_tracing_module.attach_error_to_current_span
+
+    @wraps(original_attach_error)
+    def sentry_attach_error_to_current_span(error, *args, **kwargs):
+        # type: (Any, *Any, **Any) -> Any
+        """
+        Wraps agents' error attachment to also set Sentry span status to error.
+        This allows us to properly track tool execution errors even though
+        the agents library swallows exceptions.
+        """
+        # Set the current Sentry span to errored
+        current_span = sentry_sdk.get_current_span()
+        if current_span is not None:
+            set_span_errored(current_span)
+            current_span.set_data("span.status", "error")
+
+            # Optionally capture the error details if we have them
+            if hasattr(error, "__class__"):
+                current_span.set_data("error.type", error.__class__.__name__)
+            if hasattr(error, "__str__"):
+                error_message = str(error)
+                if error_message:
+                    current_span.set_data("error.message", error_message)
+
+        # Call the original function
+        return original_attach_error(error, *args, **kwargs)
+
+    error_tracing_module.attach_error_to_current_span = (
+        sentry_attach_error_to_current_span
+    )
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
@@ -1082,6 +1082,7 @@ async def test_openai_agents_message_truncation(
     sentry_init, capture_events, test_agent, mock_usage
 ):
     """Test that large messages are truncated properly in OpenAI Agents integration."""
+
     with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
         with patch(
             "agents.models.openai_responses.OpenAIResponsesModel.get_response"
@@ -1155,3 +1156,110 @@ async def test_openai_agents_message_truncation(
         assert isinstance(parsed_messages, list)
         # Verify messages were processed
         assert len(parsed_messages) >= 1
+
+
+@pytest.mark.asyncio
+async def test_tool_execution_error_tracing(sentry_init, capture_events, test_agent):
+    """
+    Test that tool execution errors are properly tracked via error tracing patch.
+
+    This tests the patch of agents error tracing function to ensure execute_tool
+    spans are set to error status when tool execution fails.
+
+    The function location varies by version:
+    - Newer versions: agents.util._error_tracing.attach_error_to_current_span
+    - Older versions: agents._utils.attach_error_to_current_span
+    """
+
+    @agents.function_tool
+    def failing_tool(message: str) -> str:
+        """A tool that fails"""
+        raise ValueError("Tool execution failed")
+
+    # Create agent with the failing tool
+    agent_with_tool = test_agent.clone(tools=[failing_tool])
+
+    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
+        with patch(
+            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+        ) as mock_get_response:
+            # Create a mock response that includes tool call
+            tool_call = ResponseFunctionToolCall(
+                id="call_123",
+                call_id="call_123",
+                name="failing_tool",
+                type="function_call",
+                arguments='{"message": "test"}',
+                function=MagicMock(
+                    name="failing_tool", arguments='{"message": "test"}'
+                ),
+            )
+
+            # First response with tool call
+            tool_response = ModelResponse(
+                output=[tool_call],
+                usage=Usage(
+                    requests=1, input_tokens=10, output_tokens=5, total_tokens=15
+                ),
+                response_id="resp_tool_123",
+            )
+
+            # Second response after tool error (agents library handles the error and continues)
+            final_response = ModelResponse(
+                output=[
+                    ResponseOutputMessage(
+                        id="msg_final",
+                        type="message",
+                        status="completed",
+                        content=[
+                            ResponseOutputText(
+                                text="An error occurred while running the tool",
+                                type="output_text",
+                                annotations=[],
+                            )
+                        ],
+                        role="assistant",
+                    )
+                ],
+                usage=Usage(
+                    requests=1, input_tokens=15, output_tokens=10, total_tokens=25
+                ),
+                response_id="resp_final_123",
+            )
+
+            mock_get_response.side_effect = [tool_response, final_response]
+
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+            )
+
+            events = capture_events()
+
+            # Note: The agents library catches tool exceptions internally,
+            # so we don't expect this to raise
+            await agents.Runner.run(
+                agent_with_tool,
+                "Please use the failing tool",
+                run_config=test_run_config,
+            )
+
+    (transaction,) = events
+    spans = transaction["spans"]
+
+    # Find the execute_tool span
+    execute_tool_span = None
+    for span in spans:
+        if span.get("description", "").startswith("execute_tool failing_tool"):
+            execute_tool_span = span
+            break
+
+    # Verify the execute_tool span was created
+    assert execute_tool_span is not None, "execute_tool span was not created"
+    assert execute_tool_span["description"] == "execute_tool failing_tool"
+    assert execute_tool_span["data"]["gen_ai.tool.name"] == "failing_tool"
+
+    # Verify error status was set (this is the key test for our patch)
+    # The span should be marked as error because the tool execution failed
+    assert execute_tool_span["tags"]["status"] == "error"