Skip to content
Open
33 changes: 33 additions & 0 deletions sentry_sdk/ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,39 @@ class GEN_AI_ALLOWED_MESSAGE_ROLES:
GEN_AI_MESSAGE_ROLE_MAPPING[source_role] = target_role


def parse_data_uri(url):
# type: (str) -> Tuple[str, str]
"""
Parse a data URI and return (mime_type, content).

Data URI format (RFC 2397): data:[<mediatype>][;base64],<data>

Examples:
... → ("image/jpeg", "/9j/4AAQ...")
data:text/plain,Hello → ("text/plain", "Hello")
data:;base64,SGVsbG8= → ("", "SGVsbG8=")

Raises:
ValueError: If the URL is not a valid data URI (missing comma separator)
"""
if "," not in url:
raise ValueError("Invalid data URI: missing comma separator")

header, content = url.split(",", 1)

# Extract mime type from header
# Format: "data:<mime>[;param1][;param2]..." e.g. "data:image/jpeg;base64"
# Remove "data:" prefix, then take everything before the first semicolon
if header.startswith("data:"):
mime_part = header[5:] # Remove "data:" prefix
else:
mime_part = header

mime_type = mime_part.split(";")[0]

return mime_type, content


def _normalize_data(data: "Any", unpack: bool = True) -> "Any":
# convert pydantic data (e.g. OpenAI v1+) to json compatible format
if hasattr(data, "model_dump"):
Expand Down
83 changes: 82 additions & 1 deletion sentry_sdk/integrations/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from sentry_sdk.ai.utils import (
set_data_normalized,
normalize_message_roles,
parse_data_uri,
truncate_and_annotate_messages,
)
from sentry_sdk.consts import SPANDATA
Expand All @@ -18,7 +19,7 @@
safe_serialize,
)

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Dict

if TYPE_CHECKING:
from typing import Any, Iterable, List, Optional, Callable, AsyncIterator, Iterator
Expand Down Expand Up @@ -180,6 +181,84 @@ def _calculate_token_usage(
)


def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]":
"""
Convert the message parts from OpenAI format to the `gen_ai.request.messages` format.
e.g:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "image_url",
"image_url": {
"url": "data:image/jpeg;base64,...",
"detail": "high"
}
}
]
}
becomes:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,..."
}
]
}
"""

def _map_item(item: "Dict[str, Any]") -> "Dict[str, Any]":
if not isinstance(item, dict):
return item

if item.get("type") == "image_url":
image_url = item.get("image_url") or {}
url = image_url.get("url", "")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing type check for image_url causes crash on string input

Medium Severity

The _convert_message_parts function assumes image_url is always a dict, but if it's a non-empty string, calling .get("url", "") on it raises an AttributeError. The equivalent code in openai_agents/utils.py handles this case with isinstance(image_url, dict) check and falls back to str(image_url). Since _set_input_data runs before the actual API call, this crash would prevent the user's OpenAI call from executing entirely.

Fix in Cursor Fix in Web

if url.startswith("data:"):
try:
mime_type, content = parse_data_uri(url)
return {
"type": "blob",
"modality": "image",
"mime_type": mime_type,
"content": content,
}
except ValueError:
# If parsing fails, return as URI
return {
"type": "uri",
"modality": "image",
"uri": url,
}
else:
return {
"type": "uri",
"modality": "image",
"uri": url,
}
return item

for message in messages:
if not isinstance(message, dict):
continue
content = message.get("content")
if isinstance(content, list):
message["content"] = [_map_item(item) for item in content]
return messages


def _set_input_data(
span: "Span",
kwargs: "dict[str, Any]",
Expand All @@ -201,6 +280,8 @@ def _set_input_data(
and integration.include_prompts
):
normalized_messages = normalize_message_roles(messages)
normalized_messages = _convert_message_parts(normalized_messages)

scope = sentry_sdk.get_current_scope()
messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
if messages_data is not None:
Expand Down
52 changes: 40 additions & 12 deletions sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,19 @@
get_start_span_function,
set_data_normalized,
normalize_message_roles,
normalize_message_role,
truncate_and_annotate_messages,
)
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.scope import should_send_default_pii
from sentry_sdk.utils import safe_serialize

from ..consts import SPAN_ORIGIN
from ..utils import _set_agent_data, _set_usage_data
from ..utils import (
_set_agent_data,
_set_usage_data,
_transform_openai_agents_message_content,
)

from typing import TYPE_CHECKING

Expand Down Expand Up @@ -49,17 +54,40 @@ def invoke_agent_span(

original_input = kwargs.get("original_input")
if original_input is not None:
message = (
original_input
if isinstance(original_input, str)
else safe_serialize(original_input)
)
messages.append(
{
"content": [{"text": message, "type": "text"}],
"role": "user",
}
)
if isinstance(original_input, str):
# String input: wrap in text block
messages.append(
{
"content": [{"text": original_input, "type": "text"}],
"role": "user",
}
)
elif isinstance(original_input, list) and len(original_input) > 0:
# Check if list contains message objects (with type="message")
# or content parts (input_text, input_image, etc.)
first_item = original_input[0]
if isinstance(first_item, dict) and first_item.get("type") == "message":
# List of message objects - process each individually
for msg in original_input:
if isinstance(msg, dict) and msg.get("type") == "message":
role = normalize_message_role(msg.get("role", "user"))
content = msg.get("content")
transformed = _transform_openai_agents_message_content(
content
)
if isinstance(transformed, str):
transformed = [{"text": transformed, "type": "text"}]
elif not isinstance(transformed, list):
transformed = [
{"text": str(transformed), "type": "text"}
]
messages.append({"content": transformed, "role": role})
else:
# List of content parts - transform and wrap as user message
content = _transform_openai_agents_message_content(original_input)
if not isinstance(content, list):
content = [{"text": str(content), "type": "text"}]
messages.append({"content": content, "role": "user"})

if len(messages) > 0:
normalized_messages = normalize_message_roles(messages)
Expand Down
127 changes: 124 additions & 3 deletions sentry_sdk/integrations/openai_agents/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from sentry_sdk.ai.utils import (
GEN_AI_ALLOWED_MESSAGE_ROLES,
normalize_message_roles,
parse_data_uri,
set_data_normalized,
normalize_message_role,
truncate_and_annotate_messages,
Expand All @@ -27,6 +28,124 @@
raise DidNotEnable("OpenAI Agents not installed")


def _transform_openai_agents_content_part(
content_part: "dict[str, Any]",
) -> "dict[str, Any]":
"""
Transform an OpenAI Agents content part to Sentry-compatible format.
Handles multimodal content (images, audio, files) by converting them
to the standardized format:
- base64 encoded data -> type: "blob"
- URL references -> type: "uri"
- file_id references -> type: "file"
"""
if not isinstance(content_part, dict):
return content_part

part_type = content_part.get("type")

# Handle input_text (OpenAI Agents SDK text format) -> normalize to standard text format
if part_type == "input_text":
return {
"type": "text",
"text": content_part.get("text", ""),
}

# Handle image_url (OpenAI vision format) and input_image (OpenAI Agents SDK format)
if part_type in ("image_url", "input_image"):
# Get URL from either format
if part_type == "image_url":
image_url = content_part.get("image_url", {})
url = (
image_url.get("url", "")
if isinstance(image_url, dict)
else str(image_url)
)
else:
# input_image format has image_url directly
url = content_part.get("image_url", "")

if url.startswith("data:"):
try:
mime_type, content = parse_data_uri(url)
return {
"type": "blob",
"modality": "image",
"mime_type": mime_type,
"content": content,
}
except ValueError:
# If parsing fails, return as URI
return {
"type": "uri",
"modality": "image",
"mime_type": "",
"uri": url,
}
else:
return {
"type": "uri",
"modality": "image",
"mime_type": "",
"uri": url,
}

# Handle input_audio (OpenAI audio input format)
if part_type == "input_audio":
input_audio = content_part.get("input_audio", {})
audio_format = input_audio.get("format", "")
Comment on lines +96 to +97
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The code assumes input_audio, image_file, and file content parts are dictionaries. If they are not, calling .get() will raise an AttributeError.
Severity: CRITICAL

🔍 Detailed Analysis

The function _transform_openai_agents_content_part lacks defensive type checks for input_audio, image_file, and file content parts. While the image_url part is safely handled with an isinstance(..., dict) check, the other parts directly call .get() on the value returned from content_part.get(...). If the OpenAI API returns a non-dictionary value (e.g., a string) for these keys, the subsequent .get() call will raise an AttributeError, crashing the message processing pipeline. This inconsistency suggests an oversight, as the need for such checks was recognized elsewhere in the same function.

💡 Suggested Fix

Add isinstance checks for input_audio, image_file, and file_data to ensure they are dictionaries before attempting to call .get() on them. This will make their handling consistent with the safe handling of image_url. For example: audio_format = input_audio.get("format", "") if isinstance(input_audio, dict) else "".

🤖 Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: sentry_sdk/integrations/openai_agents/utils.py#L96-L97

Potential issue: The function `_transform_openai_agents_content_part` lacks defensive
type checks for `input_audio`, `image_file`, and `file` content parts. While the
`image_url` part is safely handled with an `isinstance(..., dict)` check, the other
parts directly call `.get()` on the value returned from `content_part.get(...)`. If the
OpenAI API returns a non-dictionary value (e.g., a string) for these keys, the
subsequent `.get()` call will raise an `AttributeError`, crashing the message processing
pipeline. This inconsistency suggests an oversight, as the need for such checks was
recognized elsewhere in the same function.

Did we get this right? 👍 / 👎 to inform future reviews.
Reference ID: 8537545

mime_type = f"audio/{audio_format}" if audio_format else ""
return {
"type": "blob",
"modality": "audio",
"mime_type": mime_type,
"content": input_audio.get("data", ""),
}

# Handle image_file (Assistants API file-based images)
if part_type == "image_file":
image_file = content_part.get("image_file", {})
return {
"type": "file",
"modality": "image",
"mime_type": "",
"file_id": image_file.get("file_id", ""),
}

# Handle file (document attachments)
if part_type == "file":
file_data = content_part.get("file", {})
return {
"type": "file",
"modality": "document",
"mime_type": "",
"file_id": file_data.get("file_id", ""),
}

return content_part


def _transform_openai_agents_message_content(content: "Any") -> "Any":
"""
Transform OpenAI Agents message content, handling both string content and
list of content parts.
"""
if isinstance(content, str):
return content

if isinstance(content, (list, tuple)):
transformed = []
for item in content:
if isinstance(item, dict):
transformed.append(_transform_openai_agents_content_part(item))
else:
transformed.append(item)
return transformed

return content


def _capture_exception(exc: "Any") -> None:
set_span_errored()

Expand Down Expand Up @@ -128,13 +247,15 @@ def _set_input_data(
if "role" in message:
normalized_role = normalize_message_role(message.get("role"))
content = message.get("content")
# Transform content to handle multimodal data (images, audio, files)
transformed_content = _transform_openai_agents_message_content(content)
request_messages.append(
{
"role": normalized_role,
"content": (
[{"type": "text", "text": content}]
if isinstance(content, str)
else content
[{"type": "text", "text": transformed_content}]
if isinstance(transformed_content, str)
else transformed_content
),
}
)
Expand Down
Loading
Loading