diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 286cb4df50..386aa1040e 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -659,6 +659,7 @@ class OP: GEN_AI_EXECUTE_TOOL = "gen_ai.execute_tool" GEN_AI_HANDOFF = "gen_ai.handoff" GEN_AI_INVOKE_AGENT = "gen_ai.invoke_agent" + GEN_AI_RESPONSES = "gen_ai.responses" GRAPHQL_EXECUTE = "graphql.execute" GRAPHQL_MUTATION = "graphql.mutation" GRAPHQL_PARSE = "graphql.parse" diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 5621f5b345..bf310e5fdc 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -1,4 +1,5 @@ from functools import wraps +import json import sentry_sdk from sentry_sdk import consts @@ -27,6 +28,13 @@ except ImportError: raise DidNotEnable("OpenAI not installed") +RESPONSES_API_ENABLED = True +try: + # responses API support was introduced in v1.66.0 + from openai.resources.responses import Responses, AsyncResponses +except ImportError: + RESPONSES_API_ENABLED = False + class OpenAIIntegration(Integration): identifier = "openai" @@ -46,13 +54,17 @@ def __init__(self, include_prompts=True, tiktoken_encoding_name=None): def setup_once(): # type: () -> None Completions.create = _wrap_chat_completion_create(Completions.create) - Embeddings.create = _wrap_embeddings_create(Embeddings.create) - AsyncCompletions.create = _wrap_async_chat_completion_create( AsyncCompletions.create ) + + Embeddings.create = _wrap_embeddings_create(Embeddings.create) AsyncEmbeddings.create = _wrap_async_embeddings_create(AsyncEmbeddings.create) + if RESPONSES_API_ENABLED: + Responses.create = _wrap_responses_create(Responses.create) + AsyncResponses.create = _wrap_async_responses_create(AsyncResponses.create) + def count_tokens(self, s): # type: (OpenAIIntegration, str) -> int if self.tiktoken_encoding is not None: @@ -62,6 +74,12 @@ def count_tokens(self, s): def _capture_exception(exc): # type: (Any) -> None + # Close an eventually open span + # We need to do this by hand because we are not using the start_span context manager + current_span = sentry_sdk.get_current_span() + if current_span is not None: + current_span.__exit__(None, None, None) + event, hint = event_from_exception( exc, client_options=sentry_sdk.get_client().options, @@ -140,7 +158,7 @@ def _calculate_token_usage( def _new_chat_completion_common(f, *args, **kwargs): - # type: (Any, *Any, **Any) -> Any + # type: (Any, Any, Any) -> Any integration = sentry_sdk.get_client().get_integration(OpenAIIntegration) if integration is None: return f(*args, **kwargs) @@ -270,7 +288,7 @@ async def new_iterator_async(): def _wrap_chat_completion_create(f): # type: (Callable[..., Any]) -> Callable[..., Any] def _execute_sync(f, *args, **kwargs): - # type: (Any, *Any, **Any) -> Any + # type: (Any, Any, Any) -> Any gen = _new_chat_completion_common(f, *args, **kwargs) try: @@ -291,7 +309,7 @@ def _execute_sync(f, *args, **kwargs): @wraps(f) def _sentry_patched_create_sync(*args, **kwargs): - # type: (*Any, **Any) -> Any + # type: (Any, Any) -> Any integration = sentry_sdk.get_client().get_integration(OpenAIIntegration) if integration is None or "messages" not in kwargs: # no "messages" means invalid call (in all versions of openai), let it return error @@ -305,7 +323,7 @@ def _sentry_patched_create_sync(*args, **kwargs): def _wrap_async_chat_completion_create(f): # type: (Callable[..., Any]) -> Callable[..., Any] async def _execute_async(f, *args, **kwargs): - # type: (Any, *Any, **Any) -> Any + # type: (Any, Any, Any) -> Any gen = _new_chat_completion_common(f, *args, **kwargs) try: @@ -326,7 +344,7 @@ async def _execute_async(f, *args, **kwargs): @wraps(f) async def _sentry_patched_create_async(*args, **kwargs): - # type: (*Any, **Any) -> Any + # type: (Any, Any) -> Any integration = sentry_sdk.get_client().get_integration(OpenAIIntegration) if integration is None or "messages" not in kwargs: # no "messages" means invalid call (in all versions of openai), let it return error @@ -338,7 +356,7 @@ async def _sentry_patched_create_async(*args, **kwargs): def _new_embeddings_create_common(f, *args, **kwargs): - # type: (Any, *Any, **Any) -> Any + # type: (Any, Any, Any) -> Any integration = sentry_sdk.get_client().get_integration(OpenAIIntegration) if integration is None: return f(*args, **kwargs) @@ -350,6 +368,8 @@ def _new_embeddings_create_common(f, *args, **kwargs): name=f"{consts.OP.GEN_AI_EMBEDDINGS} {model}", origin=OpenAIIntegration.origin, ) as span: + set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model) + if "input" in kwargs and ( should_send_default_pii() and integration.include_prompts ): @@ -365,8 +385,6 @@ def _new_embeddings_create_common(f, *args, **kwargs): set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, kwargs["input"] ) - if "model" in kwargs: - set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, kwargs["model"]) response = yield f, args, kwargs @@ -397,7 +415,7 @@ def _new_embeddings_create_common(f, *args, **kwargs): def _wrap_embeddings_create(f): # type: (Any) -> Any def _execute_sync(f, *args, **kwargs): - # type: (Any, *Any, **Any) -> Any + # type: (Any, Any, Any) -> Any gen = _new_embeddings_create_common(f, *args, **kwargs) try: @@ -418,7 +436,7 @@ def _execute_sync(f, *args, **kwargs): @wraps(f) def _sentry_patched_create_sync(*args, **kwargs): - # type: (*Any, **Any) -> Any + # type: (Any, Any) -> Any integration = sentry_sdk.get_client().get_integration(OpenAIIntegration) if integration is None: return f(*args, **kwargs) @@ -431,7 +449,7 @@ def _sentry_patched_create_sync(*args, **kwargs): def _wrap_async_embeddings_create(f): # type: (Any) -> Any async def _execute_async(f, *args, **kwargs): - # type: (Any, *Any, **Any) -> Any + # type: (Any, Any, Any) -> Any gen = _new_embeddings_create_common(f, *args, **kwargs) try: @@ -452,7 +470,7 @@ async def _execute_async(f, *args, **kwargs): @wraps(f) async def _sentry_patched_create_async(*args, **kwargs): - # type: (*Any, **Any) -> Any + # type: (Any, Any) -> Any integration = sentry_sdk.get_client().get_integration(OpenAIIntegration) if integration is None: return await f(*args, **kwargs) @@ -460,3 +478,111 @@ async def _sentry_patched_create_async(*args, **kwargs): return await _execute_async(f, *args, **kwargs) return _sentry_patched_create_async + + +def _new_responses_create_common(f, *args, **kwargs): + # type: (Any, Any, Any) -> Any + integration = sentry_sdk.get_client().get_integration(OpenAIIntegration) + if integration is None: + return f(*args, **kwargs) + + model = kwargs.get("model") + input = kwargs.get("input") + + span = sentry_sdk.start_span( + op=consts.OP.GEN_AI_RESPONSES, + name=f"{consts.OP.GEN_AI_RESPONSES} {model}", + origin=OpenAIIntegration.origin, + ) + span.__enter__() + + set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MODEL, model) + + if should_send_default_pii() and integration.include_prompts: + set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, input) + + res = yield f, args, kwargs + + if hasattr(res, "output"): + if should_send_default_pii() and integration.include_prompts: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_TEXT, + json.dumps([item.to_dict() for item in res.output]), + ) + _calculate_token_usage([], res, span, None, integration.count_tokens) + + else: + set_data_normalized(span, "unknown_response", True) + + span.__exit__(None, None, None) + + return res + + +def _wrap_responses_create(f): + # type: (Any) -> Any + def _execute_sync(f, *args, **kwargs): + # type: (Any, Any, Any) -> Any + gen = _new_responses_create_common(f, *args, **kwargs) + + try: + f, args, kwargs = next(gen) + except StopIteration as e: + return e.value + + try: + try: + result = f(*args, **kwargs) + except Exception as e: + _capture_exception(e) + raise e from None + + return gen.send(result) + except StopIteration as e: + return e.value + + @wraps(f) + def _sentry_patched_create_sync(*args, **kwargs): + # type: (Any, Any) -> Any + integration = sentry_sdk.get_client().get_integration(OpenAIIntegration) + if integration is None: + return f(*args, **kwargs) + + return _execute_sync(f, *args, **kwargs) + + return _sentry_patched_create_sync + + +def _wrap_async_responses_create(f): + # type: (Any) -> Any + async def _execute_async(f, *args, **kwargs): + # type: (Any, Any, Any) -> Any + gen = _new_responses_create_common(f, *args, **kwargs) + + try: + f, args, kwargs = next(gen) + except StopIteration as e: + return await e.value + + try: + try: + result = await f(*args, **kwargs) + except Exception as e: + _capture_exception(e) + raise e from None + + return gen.send(result) + except StopIteration as e: + return e.value + + @wraps(f) + async def _sentry_patched_responses_async(*args, **kwargs): + # type: (Any, Any) -> Any + integration = sentry_sdk.get_client().get_integration(OpenAIIntegration) + if integration is None: + return await f(*args, **kwargs) + + return await _execute_async(f, *args, **kwargs) + + return _sentry_patched_responses_async diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 0f763836be..f6b18e6908 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -6,6 +6,22 @@ from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as DeltaChoice from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage +SKIP_RESPONSES_TESTS = False + +try: + from openai.types.responses.response_usage import ( + InputTokensDetails, + OutputTokensDetails, + ) + from openai.types.responses import ( + Response, + ResponseUsage, + ResponseOutputMessage, + ResponseOutputText, + ) +except ImportError: + SKIP_RESPONSES_TESTS = True + from sentry_sdk import start_transaction from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations.openai import ( @@ -46,6 +62,46 @@ async def __call__(self, *args, **kwargs): ) +if SKIP_RESPONSES_TESTS: + EXAMPLE_RESPONSE = None +else: + EXAMPLE_RESPONSE = Response( + id="chat-id", + output=[ + ResponseOutputMessage( + id="message-id", + content=[ + ResponseOutputText( + annotations=[], + text="the model response", + type="output_text", + ), + ], + role="assistant", + status="completed", + type="message", + ), + ], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="model-id", + object="response", + usage=ResponseUsage( + input_tokens=20, + input_tokens_details=InputTokensDetails( + cached_tokens=5, + ), + output_tokens=10, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=8, + ), + total_tokens=30, + ), + ) + + async def async_iterator(values): for value in values: yield value @@ -903,3 +959,196 @@ def count_tokens(msg): output_tokens_reasoning=None, total_tokens=None, ) + + +@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") +def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events): + sentry_init( + integrations=[OpenAIIntegration()], + traces_sample_rate=1.0, + ) + events = capture_events() + + client = OpenAI(api_key="z") + client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE) + + with start_transaction(name="openai tx"): + client.responses.create( + model="gpt-4o", + instructions="You are a coding assistant that talks like a pirate.", + input="How do I check if a Python object is an instance of a class?", + ) + + (transaction,) = events + spans = transaction["spans"] + + assert len(spans) == 1 + assert spans[0]["op"] == "gen_ai.responses" + assert spans[0]["origin"] == "auto.ai.openai" + assert spans[0]["data"] == { + "gen_ai.request.model": "gpt-4o", + "gen_ai.usage.input_tokens": 20, + "gen_ai.usage.input_tokens.cached": 5, + "gen_ai.usage.output_tokens": 10, + "gen_ai.usage.output_tokens.reasoning": 8, + "gen_ai.usage.total_tokens": 30, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + assert "gen_ai.request.messages" not in spans[0]["data"] + assert "gen_ai.response.text" not in spans[0]["data"] + + +@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") +def test_ai_client_span_responses_api(sentry_init, capture_events): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = OpenAI(api_key="z") + client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE) + + with start_transaction(name="openai tx"): + client.responses.create( + model="gpt-4o", + instructions="You are a coding assistant that talks like a pirate.", + input="How do I check if a Python object is an instance of a class?", + ) + + (transaction,) = events + spans = transaction["spans"] + + assert len(spans) == 1 + assert spans[0]["op"] == "gen_ai.responses" + assert spans[0]["origin"] == "auto.ai.openai" + assert spans[0]["data"] == { + "gen_ai.request.messages": "How do I check if a Python object is an instance of a class?", + "gen_ai.request.model": "gpt-4o", + "gen_ai.usage.input_tokens": 20, + "gen_ai.usage.input_tokens.cached": 5, + "gen_ai.usage.output_tokens": 10, + "gen_ai.usage.output_tokens.reasoning": 8, + "gen_ai.usage.total_tokens": 30, + "gen_ai.response.text": '[{"id": "message-id", "content": [{"annotations": [], "text": "the model response", "type": "output_text"}], "role": "assistant", "status": "completed", "type": "message"}]', + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + +@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") +def test_error_in_responses_api(sentry_init, capture_events): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = OpenAI(api_key="z") + client.responses._post = mock.Mock( + side_effect=OpenAIError("API rate limit reached") + ) + + with start_transaction(name="openai tx"): + with pytest.raises(OpenAIError): + client.responses.create( + model="gpt-4o", + instructions="You are a coding assistant that talks like a pirate.", + input="How do I check if a Python object is an instance of a class?", + ) + + (error_event, transaction_event) = events + + assert transaction_event["type"] == "transaction" + # make sure the span where the error occurred is captured + assert transaction_event["spans"][0]["op"] == "gen_ai.responses" + + assert error_event["level"] == "error" + assert error_event["exception"]["values"][0]["type"] == "OpenAIError" + + assert ( + error_event["contexts"]["trace"]["trace_id"] + == transaction_event["contexts"]["trace"]["trace_id"] + ) + + +@pytest.mark.asyncio +@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") +async def test_ai_client_span_responses_async_api(sentry_init, capture_events): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = AsyncOpenAI(api_key="z") + client.responses._post = AsyncMock(return_value=EXAMPLE_RESPONSE) + + with start_transaction(name="openai tx"): + await client.responses.create( + model="gpt-4o", + instructions="You are a coding assistant that talks like a pirate.", + input="How do I check if a Python object is an instance of a class?", + ) + + (transaction,) = events + spans = transaction["spans"] + + assert len(spans) == 1 + assert spans[0]["op"] == "gen_ai.responses" + assert spans[0]["origin"] == "auto.ai.openai" + assert spans[0]["data"] == { + "gen_ai.request.messages": "How do I check if a Python object is an instance of a class?", + "gen_ai.request.model": "gpt-4o", + "gen_ai.usage.input_tokens": 20, + "gen_ai.usage.input_tokens.cached": 5, + "gen_ai.usage.output_tokens": 10, + "gen_ai.usage.output_tokens.reasoning": 8, + "gen_ai.usage.total_tokens": 30, + "gen_ai.response.text": '[{"id": "message-id", "content": [{"annotations": [], "text": "the model response", "type": "output_text"}], "role": "assistant", "status": "completed", "type": "message"}]', + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + +@pytest.mark.asyncio +@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") +async def test_error_in_responses_async_api(sentry_init, capture_events): + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = AsyncOpenAI(api_key="z") + client.responses._post = AsyncMock( + side_effect=OpenAIError("API rate limit reached") + ) + + with start_transaction(name="openai tx"): + with pytest.raises(OpenAIError): + await client.responses.create( + model="gpt-4o", + instructions="You are a coding assistant that talks like a pirate.", + input="How do I check if a Python object is an instance of a class?", + ) + + (error_event, transaction_event) = events + + assert transaction_event["type"] == "transaction" + # make sure the span where the error occurred is captured + assert transaction_event["spans"][0]["op"] == "gen_ai.responses" + + assert error_event["level"] == "error" + assert error_event["exception"]["values"][0]["type"] == "OpenAIError" + + assert ( + error_event["contexts"]["trace"]["trace_id"] + == transaction_event["contexts"]["trace"]["trace_id"] + )