diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 0c0b937006..525ef82495 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -1,14 +1,24 @@ import json - from typing import TYPE_CHECKING if TYPE_CHECKING: from typing import Any, Callable + from sentry_sdk.tracing import Span +from typing import TYPE_CHECKING + import sentry_sdk from sentry_sdk.utils import logger +if TYPE_CHECKING: + from typing import Any, Dict, List, Optional + +from sentry_sdk._types import AnnotatedValue +from sentry_sdk.serializer import serialize + +MAX_GEN_AI_MESSAGE_BYTES = 20_000 # 20KB + class GEN_AI_ALLOWED_MESSAGE_ROLES: SYSTEM = "system" @@ -95,3 +105,79 @@ def get_start_span_function(): current_span is not None and current_span.containing_transaction is not None ) return sentry_sdk.start_span if transaction_exists else sentry_sdk.start_transaction + + +def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): + # type: (List[Dict[str, Any]], int) -> List[Dict[str, Any]] + if not messages: + return messages + + truncated_messages = list(messages) + + while len(truncated_messages) > 1: + serialized_json = json.dumps(truncated_messages, separators=(",", ":")) + current_size = len(serialized_json.encode("utf-8")) + + if current_size <= max_bytes: + break + + truncated_messages.pop(0) + + serialized_json = json.dumps(truncated_messages, separators=(",", ":")) + current_size = len(serialized_json.encode("utf-8")) + + if current_size > max_bytes and len(truncated_messages) == 1: + message = truncated_messages[0].copy() + content = message.get("content", "") + + if isinstance(content, str): + max_content_length = max_bytes // 2 + while True: + message["content"] = content[:max_content_length] + test_json = json.dumps([message], separators=(",", ":")) + if len(test_json.encode("utf-8")) <= max_bytes: + break + max_content_length = int(max_content_length * 0.9) + if max_content_length < 100: + message["content"] = "" + break + + truncated_messages = [message] + elif isinstance(content, list): + content_copy = list(content) + while len(content_copy) > 0: + message["content"] = content_copy + test_json = json.dumps([message], separators=(",", ":")) + if len(test_json.encode("utf-8")) <= max_bytes: + break + content_copy = content_copy[:-1] + + if len(content_copy) == 0: + message["content"] = [] + + truncated_messages = [message] + + return truncated_messages + + +def truncate_and_annotate_messages( + messages, span, scope, max_bytes=MAX_GEN_AI_MESSAGE_BYTES +): + # type: (Optional[List[Dict[str, Any]]], Any, Any, int) -> Optional[List[Dict[str, Any]]] + if not messages: + return None + + original_count = len(messages) + truncated_messages = truncate_messages_by_size(messages, max_bytes) + + if not truncated_messages: + return None + + truncated_count = len(truncated_messages) + n_removed = original_count - truncated_count + + if n_removed > 0: + scope._gen_ai_messages_truncated[span.span_id] = n_removed + span.set_data("_gen_ai_messages_original_count", original_count) + + return truncated_messages diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index c06043ebe2..da4c972b86 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -614,6 +614,32 @@ def _prepare_event( event["breadcrumbs"] = AnnotatedValue( event.get("breadcrumbs", []), {"len": previous_total_breadcrumbs} ) + + # Annotate truncated gen_ai messages in spans + if scope is not None and scope._gen_ai_messages_truncated: + spans = event.get("spans", []) + if isinstance(spans, AnnotatedValue): + spans = spans.value + + for span in spans: + if isinstance(span, dict): + span_id = span.get("span_id") + if span_id and span_id in scope._gen_ai_messages_truncated: + span_data = span.get("data", {}) + original_count = span_data.pop( + "_gen_ai_messages_original_count", None + ) + if ( + original_count is not None + and SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data + ): + span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES] = ( + AnnotatedValue( + span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES], + {"len": original_count}, + ) + ) + # Postprocess the event here so that annotated types do # generally not surface in before_send if event is not None: diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py index 46c6b2a766..e61a3556e1 100644 --- a/sentry_sdk/integrations/anthropic.py +++ b/sentry_sdk/integrations/anthropic.py @@ -6,6 +6,7 @@ from sentry_sdk.ai.utils import ( set_data_normalized, normalize_message_roles, + truncate_and_annotate_messages, get_start_span_function, ) from sentry_sdk.consts import OP, SPANDATA, SPANSTATUS @@ -145,12 +146,14 @@ def _set_input_data(span, kwargs, integration): normalized_messages.append(message) role_normalized_messages = normalize_message_roles(normalized_messages) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - role_normalized_messages, - unpack=False, + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages( + role_normalized_messages, span, scope ) + if messages_data is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False + ) set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_STREAMING, kwargs.get("stream", False) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 724d908665..c10898036d 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -9,6 +9,7 @@ normalize_message_roles, set_data_normalized, get_start_span_function, + truncate_and_annotate_messages, ) from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration @@ -221,12 +222,17 @@ def on_llm_start( } for prompt in prompts ] - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_messages, - unpack=False, + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages( + normalized_messages, span, scope ) + if messages_data is not None: + set_data_normalized( + span, + SPANDATA.GEN_AI_REQUEST_MESSAGES, + messages_data, + unpack=False, + ) def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs): # type: (SentryLangchainCallback, Dict[str, Any], List[List[BaseMessage]], UUID, Any) -> Any @@ -278,13 +284,17 @@ def on_chat_model_start(self, serialized, messages, *, run_id, **kwargs): self._normalize_langchain_message(message) ) normalized_messages = normalize_message_roles(normalized_messages) - - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_messages, - unpack=False, + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages( + normalized_messages, span, scope ) + if messages_data is not None: + set_data_normalized( + span, + SPANDATA.GEN_AI_REQUEST_MESSAGES, + messages_data, + unpack=False, + ) def on_chat_model_end(self, response, *, run_id, **kwargs): # type: (SentryLangchainCallback, LLMResult, UUID, Any) -> Any @@ -758,12 +768,17 @@ def new_invoke(self, *args, **kwargs): and integration.include_prompts ): normalized_messages = normalize_message_roles([input]) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_messages, - unpack=False, + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages( + normalized_messages, span, scope ) + if messages_data is not None: + set_data_normalized( + span, + SPANDATA.GEN_AI_REQUEST_MESSAGES, + messages_data, + unpack=False, + ) output = result.get("output") if ( @@ -813,12 +828,13 @@ def new_stream(self, *args, **kwargs): and integration.include_prompts ): normalized_messages = normalize_message_roles([input]) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_messages, - unpack=False, + messages_data = truncate_and_annotate_messages( + normalized_messages, span, sentry_sdk.get_current_scope() ) + if messages_data is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False + ) # Run the agent result = f(self, *args, **kwargs) diff --git a/sentry_sdk/integrations/langgraph.py b/sentry_sdk/integrations/langgraph.py index 11aa1facf4..5bb0e0fd08 100644 --- a/sentry_sdk/integrations/langgraph.py +++ b/sentry_sdk/integrations/langgraph.py @@ -2,7 +2,11 @@ from typing import Any, Callable, List, Optional import sentry_sdk -from sentry_sdk.ai.utils import set_data_normalized, normalize_message_roles +from sentry_sdk.ai.utils import ( + set_data_normalized, + normalize_message_roles, + truncate_and_annotate_messages, +) from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii @@ -181,12 +185,17 @@ def new_invoke(self, *args, **kwargs): input_messages = _parse_langgraph_messages(args[0]) if input_messages: normalized_input_messages = normalize_message_roles(input_messages) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_input_messages, - unpack=False, + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages( + normalized_input_messages, span, scope ) + if messages_data is not None: + set_data_normalized( + span, + SPANDATA.GEN_AI_REQUEST_MESSAGES, + messages_data, + unpack=False, + ) result = f(self, *args, **kwargs) @@ -232,12 +241,17 @@ async def new_ainvoke(self, *args, **kwargs): input_messages = _parse_langgraph_messages(args[0]) if input_messages: normalized_input_messages = normalize_message_roles(input_messages) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_input_messages, - unpack=False, + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages( + normalized_input_messages, span, scope ) + if messages_data is not None: + set_data_normalized( + span, + SPANDATA.GEN_AI_REQUEST_MESSAGES, + messages_data, + unpack=False, + ) result = await f(self, *args, **kwargs) diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py index 2582c2bc05..c285291326 100644 --- a/sentry_sdk/integrations/litellm.py +++ b/sentry_sdk/integrations/litellm.py @@ -3,7 +3,12 @@ import sentry_sdk from sentry_sdk import consts from sentry_sdk.ai.monitoring import record_token_usage -from sentry_sdk.ai.utils import get_start_span_function, set_data_normalized +from sentry_sdk.ai.utils import ( + get_start_span_function, + set_data_normalized, + truncate_and_annotate_messages, + normalize_message_roles, +) from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii @@ -72,9 +77,13 @@ def _input_callback(kwargs): # Record messages if allowed if messages and should_send_default_pii() and integration.include_prompts: - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages, unpack=False - ) + normalized_messages = normalize_message_roles(messages) + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) + if messages_data is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False + ) # Record other parameters params = { diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index e9bd2efa23..f2f463bcd7 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -3,7 +3,11 @@ import sentry_sdk from sentry_sdk import consts from sentry_sdk.ai.monitoring import record_token_usage -from sentry_sdk.ai.utils import set_data_normalized, normalize_message_roles +from sentry_sdk.ai.utils import ( + set_data_normalized, + normalize_message_roles, + truncate_and_annotate_messages, +) from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii @@ -183,9 +187,12 @@ def _set_input_data(span, kwargs, operation, integration): and integration.include_prompts ): normalized_messages = normalize_message_roles(messages) - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_MESSAGES, normalized_messages, unpack=False - ) + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) + if messages_data is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False + ) # Input attributes: Common set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai") diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py index 2a9c5ebe66..2362db54a5 100644 --- a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py +++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py @@ -3,6 +3,7 @@ get_start_span_function, set_data_normalized, normalize_message_roles, + truncate_and_annotate_messages, ) from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.scope import should_send_default_pii @@ -61,12 +62,14 @@ def invoke_agent_span(context, agent, kwargs): if len(messages) > 0: normalized_messages = normalize_message_roles(messages) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalized_messages, - unpack=False, + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages( + normalized_messages, span, scope ) + if messages_data is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False + ) _set_agent_data(span, agent) diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index 125ff1175b..1bae07b617 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -4,6 +4,7 @@ normalize_message_roles, set_data_normalized, normalize_message_role, + truncate_and_annotate_messages, ) from sentry_sdk.consts import SPANDATA, SPANSTATUS, OP from sentry_sdk.integrations import DidNotEnable @@ -135,12 +136,15 @@ def _set_input_data(span, get_response_kwargs): } ) - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - normalize_message_roles(request_messages), - unpack=False, + role_normalized_messages = normalize_message_roles(request_messages) + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages( + role_normalized_messages, span, scope ) + if messages_data is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False + ) def _set_output_data(span, result): diff --git a/sentry_sdk/scope.py b/sentry_sdk/scope.py index c871e6a467..e9d716d1a2 100644 --- a/sentry_sdk/scope.py +++ b/sentry_sdk/scope.py @@ -188,6 +188,7 @@ class Scope: "_extras", "_breadcrumbs", "_n_breadcrumbs_truncated", + "_gen_ai_messages_truncated", "_event_processors", "_error_processors", "_should_capture", @@ -213,6 +214,7 @@ def __init__(self, ty=None, client=None): self._name = None # type: Optional[str] self._propagation_context = None # type: Optional[PropagationContext] self._n_breadcrumbs_truncated = 0 # type: int + self._gen_ai_messages_truncated = {} # type: Dict[str, int] self.client = NonRecordingClient() # type: sentry_sdk.client.BaseClient @@ -247,6 +249,7 @@ def __copy__(self): rv._breadcrumbs = copy(self._breadcrumbs) rv._n_breadcrumbs_truncated = self._n_breadcrumbs_truncated + rv._gen_ai_messages_truncated = self._gen_ai_messages_truncated.copy() rv._event_processors = self._event_processors.copy() rv._error_processors = self._error_processors.copy() rv._propagation_context = self._propagation_context @@ -1583,6 +1586,8 @@ def update_from_scope(self, scope): self._n_breadcrumbs_truncated = ( self._n_breadcrumbs_truncated + scope._n_breadcrumbs_truncated ) + if scope._gen_ai_messages_truncated: + self._gen_ai_messages_truncated.update(scope._gen_ai_messages_truncated) if scope._span: self._span = scope._span if scope._attachments: diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index e9065e2d32..e174f59d42 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -1,6 +1,7 @@ -import pytest -from unittest import mock import json +from unittest import mock + +import pytest try: from unittest.mock import AsyncMock @@ -41,16 +42,18 @@ async def __call__(self, *args, **kwargs): except ImportError: from anthropic.types.content_block import ContentBlock as TextBlock -from sentry_sdk import start_transaction, start_span +from sentry_sdk import start_span, start_transaction +from sentry_sdk._types import AnnotatedValue +from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations.anthropic import ( AnthropicIntegration, - _set_output_data, _collect_ai_data, + _set_output_data, ) +from sentry_sdk.serializer import serialize from sentry_sdk.utils import package_version - ANTHROPIC_VERSION = package_version("anthropic") EXAMPLE_MESSAGE = Message( @@ -891,9 +894,8 @@ def test_anthropic_message_role_mapping(sentry_init, capture_events): events = capture_events() client = Anthropic(api_key="z") - - def mock_messages_create(*args, **kwargs): - return Message( + client.messages._post = mock.Mock( + return_value=Message( id="msg_1", content=[TextBlock(text="Hi there!", type="text")], model="claude-3-opus", @@ -903,15 +905,13 @@ def mock_messages_create(*args, **kwargs): type="message", usage=Usage(input_tokens=10, output_tokens=5), ) + ) - client.messages._post = mock.Mock(return_value=mock_messages_create()) - - # Test messages with mixed roles including "ai" that should be mapped to "assistant" test_messages = [ {"role": "system", "content": "You are helpful."}, {"role": "user", "content": "Hello"}, - {"role": "ai", "content": "Hi there!"}, # Should be mapped to "assistant" - {"role": "assistant", "content": "How can I help?"}, # Should stay "assistant" + {"role": "ai", "content": "Hi there!"}, + {"role": "assistant", "content": "How can I help?"}, ] with start_transaction(name="anthropic tx"): @@ -920,28 +920,72 @@ def mock_messages_create(*args, **kwargs): ) (event,) = events - span = event["spans"][0] + (span,) = event["spans"] - # Verify that the span was created correctly assert span["op"] == "gen_ai.chat" assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - # Parse the stored messages stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - - # Verify that "ai" role was mapped to "assistant" assert len(stored_messages) == 4 assert stored_messages[0]["role"] == "system" assert stored_messages[1]["role"] == "user" - assert ( - stored_messages[2]["role"] == "assistant" - ) # "ai" should be mapped to "assistant" - assert stored_messages[3]["role"] == "assistant" # should stay "assistant" + assert stored_messages[2]["role"] == "assistant" + assert stored_messages[3]["role"] == "assistant" - # Verify content is preserved assert stored_messages[2]["content"] == "Hi there!" assert stored_messages[3]["content"] == "How can I help?" - # Verify no "ai" roles remain roles = [msg["role"] for msg in stored_messages] assert "ai" not in roles + + +def test_anthropic_message_truncation(sentry_init, capture_events): + """Test that large messages are truncated properly in Anthropic integration.""" + sentry_init( + integrations=[AnthropicIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = Anthropic(api_key="test-api-key") + client.messages._post = mock.Mock( + return_value=Message( + id="test", + content=[TextBlock(text="Hello", type="text")], + model="claude-3", + role="assistant", + type="message", + usage=Usage(input_tokens=10, output_tokens=20), + ) + ) + + large_content = ( + "This is a very long message that will exceed our size limits. " * 1000 + ) + large_messages = [ + {"role": "user", "content": large_content}, + {"role": "assistant", "content": large_content}, + {"role": "user", "content": large_content}, + ] + + with start_transaction(name="anthropic tx"): + client.messages.create( + model="claude-3-sonnet-20240229", + messages=large_messages, + max_tokens=100, + ) + + (event,) = events + (span,) = event["spans"] + + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) <= len(large_messages) + + result_size = len(messages_data.encode("utf-8")) + assert result_size <= MAX_GEN_AI_MESSAGE_BYTES diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index b600c32905..c50100feee 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -1,3 +1,4 @@ +import json import pytest from unittest import mock from datetime import datetime @@ -24,6 +25,9 @@ async def __call__(self, *args, **kwargs): _success_callback, _failure_callback, ) +from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES +from sentry_sdk._types import AnnotatedValue +from sentry_sdk.serializer import serialize from sentry_sdk.utils import package_version @@ -545,3 +549,53 @@ def dict(self): # Should have extracted the response message assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"] + + +def test_litellm_message_truncation(sentry_init, capture_events): + """Test that large messages are truncated properly in LiteLLM integration.""" + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + large_content = ( + "This is a very long message that will exceed our size limits. " * 1000 + ) + large_messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": large_content}, + {"role": "assistant", "content": large_content}, + {"role": "user", "content": large_content}, + ] + + mock_response = MockCompletionResponse() + + with start_transaction(name="litellm test"): + kwargs = { + "model": "gpt-3.5-turbo", + "messages": large_messages, + } + + _input_callback(kwargs) + _success_callback( + kwargs, + mock_response, + datetime.now(), + datetime.now(), + ) + + (event,) = events + (span,) = event["spans"] + + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) <= len(large_messages) + + result_size = len(messages_data.encode("utf-8")) + assert result_size <= MAX_GEN_AI_MESSAGE_BYTES diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 06e0a09fcf..b89635fa5e 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1,3 +1,4 @@ +import json import pytest from sentry_sdk.utils import package_version @@ -39,6 +40,9 @@ OpenAIIntegration, _calculate_token_usage, ) +from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES +from sentry_sdk._types import AnnotatedValue +from sentry_sdk.serializer import serialize from unittest import mock # python 3.3 and above @@ -1451,6 +1455,7 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): def test_openai_message_role_mapping(sentry_init, capture_events): """Test that OpenAI integration properly maps message roles like 'ai' to 'assistant'""" + sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, @@ -1460,7 +1465,6 @@ def test_openai_message_role_mapping(sentry_init, capture_events): client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) - # Test messages with mixed roles including "ai" that should be mapped to "assistant" test_messages = [ {"role": "system", "content": "You are helpful."}, @@ -1471,11 +1475,9 @@ def test_openai_message_role_mapping(sentry_init, capture_events): with start_transaction(name="openai tx"): client.chat.completions.create(model="test-model", messages=test_messages) - + # Verify that the span was created correctly (event,) = events span = event["spans"][0] - - # Verify that the span was created correctly assert span["op"] == "gen_ai.chat" assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] @@ -1500,3 +1502,55 @@ def test_openai_message_role_mapping(sentry_init, capture_events): # Verify no "ai" roles remain roles = [msg["role"] for msg in stored_messages] assert "ai" not in roles + + +def test_openai_message_truncation(sentry_init, capture_events): + """Test that large messages are truncated properly in OpenAI integration.""" + sentry_init( + integrations=[OpenAIIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) + + large_content = ( + "This is a very long message that will exceed our size limits. " * 1000 + ) + large_messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": large_content}, + {"role": "assistant", "content": large_content}, + {"role": "user", "content": large_content}, + ] + + with start_transaction(name="openai tx"): + client.chat.completions.create( + model="some-model", + messages=large_messages, + ) + + (event,) = events + span = event["spans"][0] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + + messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) <= len(large_messages) + + if "_meta" in event and len(parsed_messages) < len(large_messages): + meta_path = event["_meta"] + if ( + "spans" in meta_path + and "0" in meta_path["spans"] + and "data" in meta_path["spans"]["0"] + ): + span_meta = meta_path["spans"]["0"]["data"] + if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_meta: + messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "len" in messages_meta.get("", {}) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index e647ce9fad..0b5be79d61 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -1,11 +1,16 @@ import asyncio +import json import re import pytest from unittest.mock import MagicMock, patch import os +import sentry_sdk from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration from sentry_sdk.integrations.openai_agents.utils import safe_serialize +from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES +from sentry_sdk._types import AnnotatedValue +from sentry_sdk.serializer import serialize from sentry_sdk.utils import parse_version import agents @@ -138,17 +143,16 @@ async def test_agent_invocation_span( assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" assert invoke_agent_span["description"] == "invoke_agent test_agent" - assert invoke_agent_span["data"]["gen_ai.request.messages"] == safe_serialize( - [ - { - "content": [ - {"text": "You are a helpful test assistant.", "type": "text"} - ], - "role": "system", - }, - {"content": [{"text": "Test input", "type": "text"}], "role": "user"}, - ] - ) + messages_data = invoke_agent_span["data"]["gen_ai.request.messages"] + assert isinstance(messages_data, str) + parsed_messages = json.loads(messages_data) + assert parsed_messages == [ + { + "content": [{"text": "You are a helpful test assistant.", "type": "text"}], + "role": "system", + }, + {"content": [{"text": "Test input", "type": "text"}], "role": "user"}, + ] assert ( invoke_agent_span["data"]["gen_ai.response.text"] == "Hello, how can I help you?" @@ -483,22 +487,19 @@ def simple_test_tool(message: str) -> str: assert ai_client_span1["data"]["gen_ai.agent.name"] == "test_agent" assert ai_client_span1["data"]["gen_ai.request.available_tools"] == available_tools assert ai_client_span1["data"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span1["data"]["gen_ai.request.messages"] == safe_serialize( - [ - { - "role": "system", - "content": [ - {"type": "text", "text": "You are a helpful test assistant."} - ], - }, - { - "role": "user", - "content": [ - {"type": "text", "text": "Please use the simple test tool"} - ], - }, - ] - ) + messages_data = ai_client_span1["data"]["gen_ai.request.messages"] + assert isinstance(messages_data, str) + parsed_messages = json.loads(messages_data) + assert parsed_messages == [ + { + "role": "system", + "content": [{"type": "text", "text": "You are a helpful test assistant."}], + }, + { + "role": "user", + "content": [{"type": "text", "text": "Please use the simple test tool"}], + }, + ] assert ai_client_span1["data"]["gen_ai.request.model"] == "gpt-4" assert ai_client_span1["data"]["gen_ai.request.temperature"] == 0.7 assert ai_client_span1["data"]["gen_ai.request.top_p"] == 1.0 @@ -559,49 +560,49 @@ def simple_test_tool(message: str) -> str: == available_tools ) assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100 - assert re.sub( + + # Convert list to JSON, do regex replacement, then parse back + messages_json = json.dumps(ai_client_span2["data"]["gen_ai.request.messages"]) + messages_json_cleaned = re.sub( r"SerializationIterator\(.*\)", "NOT_CHECKED", - ai_client_span2["data"]["gen_ai.request.messages"], - ) == safe_serialize( - [ - { - "role": "system", - "content": [ - {"type": "text", "text": "You are a helpful test assistant."} - ], - }, - { - "role": "user", - "content": [ - {"type": "text", "text": "Please use the simple test tool"} - ], - }, - { - "role": "assistant", - "content": [ - { - "arguments": '{"message": "hello"}', - "call_id": "call_123", - "name": "simple_test_tool", - "type": "function_call", - "id": "call_123", - "function": "NOT_CHECKED", - } - ], - }, - { - "role": "tool", - "content": [ - { - "call_id": "call_123", - "output": "Tool executed with: hello", - "type": "function_call_output", - } - ], - }, - ] + messages_json, ) + messages_cleaned = json.loads(messages_json_cleaned) + + assert messages_cleaned == [ + { + "role": "system", + "content": [{"type": "text", "text": "You are a helpful test assistant."}], + }, + { + "role": "user", + "content": [{"type": "text", "text": "Please use the simple test tool"}], + }, + { + "role": "assistant", + "content": [ + { + "arguments": '{"message": "hello"}', + "call_id": "call_123", + "name": "simple_test_tool", + "type": "function_call", + "id": "call_123", + "function": "NOT_CHECKED", + } + ], + }, + { + "role": "tool", + "content": [ + { + "call_id": "call_123", + "output": "Tool executed with: hello", + "type": "function_call_output", + } + ], + }, + ] assert ai_client_span2["data"]["gen_ai.request.model"] == "gpt-4" assert ai_client_span2["data"]["gen_ai.request.temperature"] == 0.7 assert ai_client_span2["data"]["gen_ai.request.top_p"] == 1.0 @@ -1077,3 +1078,61 @@ def test_openai_agents_message_role_mapping(sentry_init, capture_events): # Verify no "ai" roles remain in any message for message in stored_messages: assert message["role"] != "ai" + + +def test_openai_agents_message_truncation( + sentry_init, capture_events, mock_model_response +): + """Test that large messages are truncated properly in OpenAI Agents integration.""" + # Create messages that will definitely exceed size limits + large_system_prompt = ( + "This is a very long system prompt that will exceed our size limits. " * 1000 + ) # ~64KB + large_user_message = ( + "This is a very long user message that will exceed our size limits. " * 1000 + ) # ~64KB + + agent = Agent( + name="test_agent", + model="gpt-4", + instructions=large_system_prompt, + ) + + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + mock_get_response.return_value = mock_model_response + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) + + events = capture_events() + + result = agents.Runner.run_sync( + agent, large_user_message, run_config=test_run_config + ) + + assert result is not None + + (event,) = events + spans = event["spans"] + invoke_agent_span, ai_client_span = spans + assert "gen_ai.request.messages" in invoke_agent_span["data"] + + messages_data = invoke_agent_span["data"]["gen_ai.request.messages"] + assert isinstance(messages_data, str) + + parsed_messages = json.loads(messages_data) + assert isinstance(parsed_messages, list) + assert len(parsed_messages) >= 1 + + result_size = len(messages_data.encode("utf-8")) + assert result_size <= MAX_GEN_AI_MESSAGE_BYTES + + total_original_size = len(large_system_prompt) + len(large_user_message) + total_parsed_size = sum(len(str(msg)) for msg in parsed_messages) + assert total_parsed_size < total_original_size diff --git a/tests/test_ai_message_utils.py b/tests/test_ai_message_utils.py new file mode 100644 index 0000000000..720e8cb4e9 --- /dev/null +++ b/tests/test_ai_message_utils.py @@ -0,0 +1,283 @@ +import json +import pytest + +from sentry_sdk.ai.utils import ( + MAX_GEN_AI_MESSAGE_BYTES, + set_data_normalized, + truncate_messages_by_size, + truncate_and_annotate_messages, +) +from sentry_sdk._types import AnnotatedValue +from sentry_sdk.serializer import serialize +from sentry_sdk.utils import safe_serialize + + +@pytest.fixture +def sample_messages(): + """Sample messages similar to what gen_ai integrations would use""" + return [ + {"role": "system", "content": "You are a helpful assistant."}, + { + "role": "user", + "content": "What is the difference between a list and a tuple in Python?", + }, + { + "role": "assistant", + "content": "Lists are mutable and use [], tuples are immutable and use ().", + }, + {"role": "user", "content": "Can you give me some examples?"}, + { + "role": "assistant", + "content": "Sure! Here are examples:\n\n```python\n# List\nmy_list = [1, 2, 3]\nmy_list.append(4)\n\n# Tuple\nmy_tuple = (1, 2, 3)\n# my_tuple.append(4) would error\n```", + }, + ] + + +@pytest.fixture +def large_messages(): + """Messages that will definitely exceed size limits""" + large_content = "This is a very long message. " * 100 + return [ + {"role": "system", "content": large_content}, + {"role": "user", "content": large_content}, + {"role": "assistant", "content": large_content}, + {"role": "user", "content": large_content}, + ] + + +class TestTruncateMessagesBySize: + def test_no_truncation_needed(self, sample_messages): + """Test that messages under the limit are not truncated""" + result = truncate_messages_by_size( + sample_messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES + ) + assert len(result) == len(sample_messages) + assert result == sample_messages + + def test_truncation_removes_oldest_first(self, large_messages): + """Test that oldest messages are removed first during truncation""" + small_limit = 3000 + result = truncate_messages_by_size(large_messages, max_bytes=small_limit) + assert len(result) < len(large_messages) + + if result: + assert result[-1] == large_messages[-1] + + def test_empty_messages_list(self): + """Test handling of empty messages list""" + result = truncate_messages_by_size( + [], max_bytes=MAX_GEN_AI_MESSAGE_BYTES // 500 + ) + assert result == [] + + def test_progressive_truncation(self, large_messages): + """Test that truncation works progressively with different limits""" + limits = [ + MAX_GEN_AI_MESSAGE_BYTES // 5, + MAX_GEN_AI_MESSAGE_BYTES // 10, + MAX_GEN_AI_MESSAGE_BYTES // 25, + MAX_GEN_AI_MESSAGE_BYTES // 100, + MAX_GEN_AI_MESSAGE_BYTES // 500, + ] + prev_count = len(large_messages) + + for limit in limits: + result = truncate_messages_by_size(large_messages, max_bytes=limit) + current_count = len(result) + + assert current_count <= prev_count + assert current_count >= 1 + prev_count = current_count + + def test_exact_size_boundary(self): + """Test behavior at exact size boundaries""" + messages = [{"role": "user", "content": "test"}] + + serialized = serialize(messages, is_vars=False) + json_str = json.dumps(serialized, separators=(",", ":")) + exact_size = len(json_str.encode("utf-8")) + + result = truncate_messages_by_size(messages, max_bytes=exact_size) + assert len(result) == 1 + + result = truncate_messages_by_size(messages, max_bytes=exact_size - 1) + assert len(result) == 1 + + +class TestTruncateAndAnnotateMessages: + def test_no_truncation_returns_list(self, sample_messages): + class MockSpan: + def __init__(self): + self.span_id = "test_span_id" + self.data = {} + + def set_data(self, key, value): + self.data[key] = value + + class MockScope: + def __init__(self): + self._gen_ai_messages_truncated = {} + + span = MockSpan() + scope = MockScope() + result = truncate_and_annotate_messages(sample_messages, span, scope) + + assert isinstance(result, list) + assert not isinstance(result, AnnotatedValue) + assert len(result) == len(sample_messages) + assert result == sample_messages + assert span.span_id not in scope._gen_ai_messages_truncated + assert "_gen_ai_messages_original_count" not in span.data + + def test_truncation_sets_metadata_on_scope(self, large_messages): + class MockSpan: + def __init__(self): + self.span_id = "test_span_id" + self.data = {} + + def set_data(self, key, value): + self.data[key] = value + + class MockScope: + def __init__(self): + self._gen_ai_messages_truncated = {} + + small_limit = 1000 + span = MockSpan() + scope = MockScope() + original_count = len(large_messages) + result = truncate_and_annotate_messages( + large_messages, span, scope, max_bytes=small_limit + ) + + assert isinstance(result, list) + assert not isinstance(result, AnnotatedValue) + assert len(result) < len(large_messages) + n_removed = original_count - len(result) + assert scope._gen_ai_messages_truncated[span.span_id] == n_removed + assert span.data["_gen_ai_messages_original_count"] == original_count + + def test_metadata_contains_original_count(self, large_messages): + class MockSpan: + def __init__(self): + self.span_id = "test_span_id" + self.data = {} + + def set_data(self, key, value): + self.data[key] = value + + class MockScope: + def __init__(self): + self._gen_ai_messages_truncated = {} + + small_limit = 1000 + original_count = len(large_messages) + span = MockSpan() + scope = MockScope() + + result = truncate_and_annotate_messages( + large_messages, span, scope, max_bytes=small_limit + ) + + assert span.data["_gen_ai_messages_original_count"] == original_count + n_removed = original_count - len(result) + assert scope._gen_ai_messages_truncated[span.span_id] == n_removed + + def test_empty_messages_returns_none(self): + class MockSpan: + def __init__(self): + self.span_id = "test_span_id" + self.data = {} + + def set_data(self, key, value): + self.data[key] = value + + class MockScope: + def __init__(self): + self._gen_ai_messages_truncated = {} + + span = MockSpan() + scope = MockScope() + result = truncate_and_annotate_messages([], span, scope) + assert result is None + + result = truncate_and_annotate_messages(None, span, scope) + assert result is None + + def test_truncated_messages_newest_first(self, large_messages): + class MockSpan: + def __init__(self): + self.span_id = "test_span_id" + self.data = {} + + def set_data(self, key, value): + self.data[key] = value + + class MockScope: + def __init__(self): + self._gen_ai_messages_truncated = {} + + small_limit = 3000 + span = MockSpan() + scope = MockScope() + result = truncate_and_annotate_messages( + large_messages, span, scope, max_bytes=small_limit + ) + + assert isinstance(result, list) + assert result[0] == large_messages[-len(result)] + + +class TestClientAnnotation: + def test_client_wraps_truncated_messages_in_annotated_value(self, large_messages): + """Test that client.py properly wraps truncated messages in AnnotatedValue""" + from sentry_sdk.consts import SPANDATA + from sentry_sdk._types import AnnotatedValue + + class MockSpan: + def __init__(self): + self.span_id = "test_span_123" + self.data = {} + + def set_data(self, key, value): + self.data[key] = value + + class MockScope: + def __init__(self): + self._gen_ai_messages_truncated = {} + + small_limit = 3000 + span = MockSpan() + scope = MockScope() + original_count = len(large_messages) + + # Simulate what integrations do + truncated_messages = truncate_and_annotate_messages( + large_messages, span, scope, max_bytes=small_limit + ) + span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, truncated_messages) + + # Verify metadata was set on scope and span + assert span.span_id in scope._gen_ai_messages_truncated + assert scope._gen_ai_messages_truncated[span.span_id] > 0 + assert "_gen_ai_messages_original_count" in span.data + + # Simulate what client.py does + event = {"spans": [{"span_id": span.span_id, "data": span.data.copy()}]} + + # Mimic client.py logic + for event_span in event["spans"]: + span_data = event_span.get("data", {}) + orig_count = span_data.pop("_gen_ai_messages_original_count", None) + if orig_count is not None and SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data: + span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES] = AnnotatedValue( + safe_serialize(span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES]), + {"len": orig_count}, + ) + + # Verify the annotation happened + messages_value = event["spans"][0]["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert isinstance(messages_value, AnnotatedValue) + assert messages_value.metadata["len"] == original_count + assert isinstance(messages_value.value, str) + assert "_gen_ai_messages_original_count" not in event["spans"][0]["data"]