Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: handle whitespace-only content in structured output parsing #2216

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion src/openai/lib/_parsing/_completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,13 @@ def maybe_parse_content(
message: ChatCompletionMessage | ParsedChatCompletionMessage[object],
) -> ResponseFormatT | None:
if has_rich_response_format(response_format) and message.content and not message.refusal:
return _parse_content(response_format, message.content)
try:
return _parse_content(response_format, message.content)
except ValueError as e:
# if parsing fails due to whitespace content, log a warning and return None
import logging
logging.warning(f"Failed to parse content: {e}")
return None

return None

Expand Down Expand Up @@ -217,6 +223,13 @@ def is_parseable_tool(input_tool: ChatCompletionToolParam) -> bool:


def _parse_content(response_format: type[ResponseFormatT], content: str) -> ResponseFormatT:
# checking here if the content is empty or contains only whitespace
if not content or content.isspace():
raise ValueError(
f"Cannot parse empty or whitespace-only content as {response_format.__name__}. "
"The model returned content with no valid JSON."
)

if is_basemodel_type(response_format):
return cast(ResponseFormatT, model_parse_json(response_format, content))

Expand Down
22 changes: 14 additions & 8 deletions src/openai/lib/streaming/chat/_completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,10 +435,13 @@ def _accumulate_chunk(self, chunk: ChatCompletionChunk) -> ParsedChatCompletionS
and not choice_snapshot.message.refusal
and is_given(self._rich_response_format)
):
choice_snapshot.message.parsed = from_json(
bytes(choice_snapshot.message.content, "utf-8"),
partial_mode=True,
)
# skipping parsing if content is just whitespace
content = choice_snapshot.message.content
if content.strip():
choice_snapshot.message.parsed = from_json(
bytes(content, "utf-8"),
partial_mode=True,
)

for tool_call_chunk in choice.delta.tool_calls or []:
tool_call_snapshot = (choice_snapshot.message.tool_calls or [])[tool_call_chunk.index]
Expand All @@ -453,10 +456,13 @@ def _accumulate_chunk(self, chunk: ChatCompletionChunk) -> ParsedChatCompletionS
and input_tool.get("function", {}).get("strict")
and tool_call_snapshot.function.arguments
):
tool_call_snapshot.function.parsed_arguments = from_json(
bytes(tool_call_snapshot.function.arguments, "utf-8"),
partial_mode=True,
)
arguments = tool_call_snapshot.function.arguments
# skipping parsing if arguments is just whitespace
if arguments.strip():
tool_call_snapshot.function.parsed_arguments = from_json(
bytes(arguments, "utf-8"),
partial_mode=True,
)
elif TYPE_CHECKING: # type: ignore[unreachable]
assert_never(tool_call_snapshot)

Expand Down