Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 77 additions & 3 deletions openhands-sdk/openhands/sdk/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1010,6 +1010,12 @@ def format_messages_for_responses(
- Uses Message.to_responses_value to get either instructions (system)
or input items (others)
- Concatenates system instructions into a single instructions string

Codex subscription endpoints can reject complex/long `instructions`
("Instructions are not valid"). When using the ChatGPT subscription
transport (chatgpt.com/backend-api/codex), avoid sending system prompts
as top-level instructions and instead prepend them to the first user
message.
"""
msgs = copy.deepcopy(messages)

Expand All @@ -1019,18 +1025,86 @@ def format_messages_for_responses(
# Assign system instructions as a string, collect input items
instructions: str | None = None
input_items: list[dict[str, Any]] = []
system_chunks: list[str] = []

# Subscription transport gate: only apply this workaround when calling
# ChatGPT subscription Codex backend (not the standard OpenAI API).
base = (self.base_url or "").lower()
is_subscription_codex_transport = (
"chatgpt.com" in base and "backend-api" in base and "codex" in base
)
Comment on lines +1030 to +1035
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Critical: The detection logic only checks base_url but ignores the model name. This creates inconsistency with the PR description which states "Expanded Responses-model detection from only codex-mini-latest to any model containing codex".

Issues:

  1. A non-Codex model (e.g., "gpt-4") with base_url="https://chatgpt.com/backend-api/codex" will get Codex handling (likely incorrect)
  2. A Codex model (e.g., "gpt-5-codex") with standard OpenAI base_url won't get Codex handling (may be intended, but contradicts PR description)

Suggestion: Include model name in the detection logic for consistency:

Suggested change
# Subscription transport gate: only apply this workaround when calling
# ChatGPT subscription Codex backend (not the standard OpenAI API).
base = (self.base_url or "").lower()
is_subscription_codex_transport = (
"chatgpt.com" in base and "backend-api" in base and "codex" in base
)
# Subscription transport gate: only apply this workaround when calling
# ChatGPT subscription Codex backend (not the standard OpenAI API).
base = (self.base_url or "").lower()
model_name = (self.model or "").lower()
is_subscription_codex_transport = (
"chatgpt.com" in base and "backend-api" in base and "codex" in base
and "codex" in model_name
)

Or clarify in documentation that only the base_url matters, not the model name.


DEFAULT_CODEX_INSTRUCTIONS = (
"You are OpenHands agent, a helpful AI assistant that can interact "
"with a computer to solve tasks."
)
Comment on lines +1037 to +1040
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Suggestion: The hardcoded default instructions should be documented. Add a comment explaining:

  1. Why this specific text is used
  2. Why it's required by the endpoint
  3. Whether it could/should be configurable

Alternatively, consider making it a class constant with better visibility:

Suggested change
DEFAULT_CODEX_INSTRUCTIONS = (
"You are OpenHands agent, a helpful AI assistant that can interact "
"with a computer to solve tasks."
)
# Codex subscription endpoints require a simple, stable instructions string.
# Complex system prompts are moved to user content to avoid validation errors.
DEFAULT_CODEX_INSTRUCTIONS = (
"You are OpenHands agent, a helpful AI assistant that can interact "
"with a computer to solve tasks."
)


for m in msgs:
val = m.to_responses_value(vision_enabled=vision_active)
if isinstance(val, str):
s = val.strip()
if not s:
continue
instructions = (
s if instructions is None else f"{instructions}\n\n---\n\n{s}"
)
if is_subscription_codex_transport:
system_chunks.append(s)
else:
instructions = (
s
if instructions is None
else f"{instructions}\n\n---\n\n{s}"
)
else:
if val:
input_items.extend(val)

if is_subscription_codex_transport and system_chunks:
merged_system = "\n\n---\n\n".join(system_chunks).strip()
if merged_system:
prefix = f"Context (system prompt):\n{merged_system}\n\n"
injected = False
for item in input_items:
if item.get("type") == "message" and item.get("role") == "user":
content = item.get("content")
if not isinstance(content, list):
content = [content] if content else []
item["content"] = (
[{"type": "input_text", "text": prefix}] + content
)
Comment on lines +1067 to +1072
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟠 Important: Unsafe content manipulation. The code assumes that if content is not a list, it can be safely wrapped as [content]. However, this doesn't validate what type content actually is.

Issues:

  1. If content is a dict (which is possible from to_responses_dict), wrapping it as [dict] creates [{...}] instead of the expected list of content items
  2. If content is already a string or other type, this could create incorrect structure

Suggestion: Add type validation and proper handling:

Suggested change
content = item.get("content")
if not isinstance(content, list):
content = [content] if content else []
item["content"] = (
[{"type": "input_text", "text": prefix}] + content
)
if item.get("type") == "message" and item.get("role") == "user":
content = item.get("content")
if not isinstance(content, list):
# Content should already be a list from to_responses_dict
# If not, this indicates an unexpected state
content = [] if not content else [{"type": "input_text", "text": str(content)}]
item["content"] = (
[{"type": "input_text", "text": prefix}] + content
)

injected = True
break

if not injected:
input_items.insert(
0,
{
"role": "user",
"content": [{"type": "input_text", "text": prefix}],
},
)
Comment on lines +1060 to +1083
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Suggestion: These two operations (system prompt injection + normalization) iterate through input_items twice. For better performance, consider combining them into a single loop:

if is_subscription_codex_transport and system_chunks:
    merged_system = "\n\n---\n\n".join(system_chunks).strip()
    prefix = f"Context (system prompt):\n{merged_system}\n\n" if merged_system else None
    
    normalized: list[dict[str, Any]] = []
    injected = False
    
    for item in input_items:
        # Normalize first
        if item.get("type") == "message":
            normalized_item = {
                "role": item.get("role"),
                "content": item.get("content") or [],
            }
        else:
            normalized_item = item
        
        # Then inject prefix if applicable
        if prefix and not injected and normalized_item.get("role") == "user":
            content = normalized_item["content"] if isinstance(normalized_item["content"], list) else []
            normalized_item["content"] = [{"type": "input_text", "text": prefix}] + content
            injected = True
        
        normalized.append(normalized_item)
    
    # If we never found a user message, add synthetic one
    if prefix and not injected:
        normalized.insert(0, {
            "role": "user",
            "content": [{"type": "input_text", "text": prefix}],
        })
    
    input_items = normalized


# For subscription Codex transport, normalize message items to match
# the shape used by OpenCode's Codex client:
# {"role": "user", "content": [{"type": "input_text", ...}]}
# instead of our generic {"type": "message", ...} wrapper.
if is_subscription_codex_transport and input_items:
normalized: list[dict[str, Any]] = []
for item in input_items:
if item.get("type") == "message":
normalized.append(
{
"role": item.get("role"),
"content": item.get("content") or [],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟠 Important: Inconsistent empty content handling. Compare this line with line 1069:

  • Line 1069: content = [content] if content else [] - preserves truthy content, uses empty list for falsy
  • Line 1096: item.get("content") or [] - replaces ANY falsy value (including empty string "") with empty list

This inconsistency could cause bugs if content is legitimately an empty string or 0.

Suggestion: Use consistent handling:

Suggested change
"content": item.get("content") or [],
"content": item.get("content") if item.get("content") is not None else [],

}
)
else:
normalized.append(item)
input_items = normalized

# For subscription Codex transport, use a small, stable instructions string
# (required by the endpoint) and move the full system prompt into user content.
if is_subscription_codex_transport:
return DEFAULT_CODEX_INSTRUCTIONS, input_items

return instructions, input_items

def get_token_count(self, messages: list[Message]) -> int:
Expand Down
41 changes: 41 additions & 0 deletions tests/sdk/llm/test_responses_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,47 @@ def test_system_to_responses_value_instructions_concat():
assert inputs == []


def test_subscription_codex_transport_does_not_use_top_level_instructions_and_prepend_system_to_user(): # noqa: E501
m_sys = Message(role="system", content=[TextContent(text="SYS")])
m_user = Message(role="user", content=[TextContent(text="USER")])

llm = LLM(model="gpt-5.1-codex", base_url="https://chatgpt.com/backend-api/codex")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟢 Nit: The test uses model="gpt-5.1-codex" but the actual implementation doesn't check the model name at all - only the base_url. This makes the test somewhat misleading.

Either:

  1. Update the implementation to check model name (see comment on llm.py:1033)
  2. Or update this test to make it clear that only the base_url matters:
Suggested change
llm = LLM(model="gpt-5.1-codex", base_url="https://chatgpt.com/backend-api/codex")
llm = LLM(model="any-model", base_url="https://chatgpt.com/backend-api/codex")

instr, inputs = llm.format_messages_for_responses([m_sys, m_user])

assert instr is not None
assert "OpenHands agent" in instr
Comment on lines +58 to +59
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟢 Nit: The assertion is too weak. It should explicitly verify that the default instructions are used, not the system prompt content:

Suggested change
assert instr is not None
assert "OpenHands agent" in instr
# Verify default instructions are used, not the system prompt
assert instr == "You are OpenHands agent, a helpful AI assistant that can interact with a computer to solve tasks."
assert "SYS" not in instr # System prompt should NOT be in instructions

assert len(inputs) >= 1
first_user = next(it for it in inputs if it.get("role") == "user")
content = first_user.get("content")
assert isinstance(content, list)
assert content[0]["type"] == "input_text"
assert "SYS" in content[0]["text"]


def test_subscription_codex_transport_injects_synthetic_user_message_when_none_exists():
m_sys = Message(role="system", content=[TextContent(text="SYS")])
m_asst = Message(role="assistant", content=[TextContent(text="ASST")])

llm = LLM(model="gpt-5.1-codex", base_url="https://chatgpt.com/backend-api/codex")
instr, inputs = llm.format_messages_for_responses([m_sys, m_asst])

assert instr is not None
assert "OpenHands agent" in instr
assert len(inputs) >= 1
first = inputs[0]
assert first.get("role") == "user"
assert "SYS" in first["content"][0]["text"]


def test_api_codex_models_keep_system_as_instructions():
m_sys = Message(role="system", content=[TextContent(text="SYS")])
llm = LLM(model="gpt-5.1-codex")
instr, inputs = llm.format_messages_for_responses([m_sys])

assert instr == "SYS"
assert inputs == []


def test_user_to_responses_dict_with_and_without_vision():
m = Message(
role="user",
Expand Down
Loading