@@ -1082,6 +1082,7 @@ async def test_openai_agents_message_truncation(
10821082 sentry_init , capture_events , test_agent , mock_usage
10831083):
10841084 """Test that large messages are truncated properly in OpenAI Agents integration."""
1085+
10851086 with patch .dict (os .environ , {"OPENAI_API_KEY" : "test-key" }):
10861087 with patch (
10871088 "agents.models.openai_responses.OpenAIResponsesModel.get_response"
@@ -1155,3 +1156,110 @@ async def test_openai_agents_message_truncation(
11551156 assert isinstance (parsed_messages , list )
11561157 # Verify messages were processed
11571158 assert len (parsed_messages ) >= 1
1159+
1160+
1161+ @pytest .mark .asyncio
1162+ async def test_tool_execution_error_tracing (sentry_init , capture_events , test_agent ):
1163+ """
1164+ Test that tool execution errors are properly tracked via error tracing patch.
1165+
1166+ This tests the patch of agents error tracing function to ensure execute_tool
1167+ spans are set to error status when tool execution fails.
1168+
1169+ The function location varies by version:
1170+ - Newer versions: agents.util._error_tracing.attach_error_to_current_span
1171+ - Older versions: agents._utils.attach_error_to_current_span
1172+ """
1173+
1174+ @agents .function_tool
1175+ def failing_tool (message : str ) -> str :
1176+ """A tool that fails"""
1177+ raise ValueError ("Tool execution failed" )
1178+
1179+ # Create agent with the failing tool
1180+ agent_with_tool = test_agent .clone (tools = [failing_tool ])
1181+
1182+ with patch .dict (os .environ , {"OPENAI_API_KEY" : "test-key" }):
1183+ with patch (
1184+ "agents.models.openai_responses.OpenAIResponsesModel.get_response"
1185+ ) as mock_get_response :
1186+ # Create a mock response that includes tool call
1187+ tool_call = ResponseFunctionToolCall (
1188+ id = "call_123" ,
1189+ call_id = "call_123" ,
1190+ name = "failing_tool" ,
1191+ type = "function_call" ,
1192+ arguments = '{"message": "test"}' ,
1193+ function = MagicMock (
1194+ name = "failing_tool" , arguments = '{"message": "test"}'
1195+ ),
1196+ )
1197+
1198+ # First response with tool call
1199+ tool_response = ModelResponse (
1200+ output = [tool_call ],
1201+ usage = Usage (
1202+ requests = 1 , input_tokens = 10 , output_tokens = 5 , total_tokens = 15
1203+ ),
1204+ response_id = "resp_tool_123" ,
1205+ )
1206+
1207+ # Second response after tool error (agents library handles the error and continues)
1208+ final_response = ModelResponse (
1209+ output = [
1210+ ResponseOutputMessage (
1211+ id = "msg_final" ,
1212+ type = "message" ,
1213+ status = "completed" ,
1214+ content = [
1215+ ResponseOutputText (
1216+ text = "An error occurred while running the tool" ,
1217+ type = "output_text" ,
1218+ annotations = [],
1219+ )
1220+ ],
1221+ role = "assistant" ,
1222+ )
1223+ ],
1224+ usage = Usage (
1225+ requests = 1 , input_tokens = 15 , output_tokens = 10 , total_tokens = 25
1226+ ),
1227+ response_id = "resp_final_123" ,
1228+ )
1229+
1230+ mock_get_response .side_effect = [tool_response , final_response ]
1231+
1232+ sentry_init (
1233+ integrations = [OpenAIAgentsIntegration ()],
1234+ traces_sample_rate = 1.0 ,
1235+ send_default_pii = True ,
1236+ )
1237+
1238+ events = capture_events ()
1239+
1240+ # Note: The agents library catches tool exceptions internally,
1241+ # so we don't expect this to raise
1242+ await agents .Runner .run (
1243+ agent_with_tool ,
1244+ "Please use the failing tool" ,
1245+ run_config = test_run_config ,
1246+ )
1247+
1248+ (transaction ,) = events
1249+ spans = transaction ["spans" ]
1250+
1251+ # Find the execute_tool span
1252+ execute_tool_span = None
1253+ for span in spans :
1254+ if span .get ("description" , "" ).startswith ("execute_tool failing_tool" ):
1255+ execute_tool_span = span
1256+ break
1257+
1258+ # Verify the execute_tool span was created
1259+ assert execute_tool_span is not None , "execute_tool span was not created"
1260+ assert execute_tool_span ["description" ] == "execute_tool failing_tool"
1261+ assert execute_tool_span ["data" ]["gen_ai.tool.name" ] == "failing_tool"
1262+
1263+ # Verify error status was set (this is the key test for our patch)
1264+ # The span should be marked as error because the tool execution failed
1265+ assert execute_tool_span ["tags" ]["status" ] == "error"
0 commit comments