diff --git a/codex-rs/app-server-protocol/schema/json/ClientRequest.json b/codex-rs/app-server-protocol/schema/json/ClientRequest.json index 34615727799..3d56626da7f 100644 --- a/codex-rs/app-server-protocol/schema/json/ClientRequest.json +++ b/codex-rs/app-server-protocol/schema/json/ClientRequest.json @@ -480,6 +480,19 @@ }, "type": "object" }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -526,19 +539,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/FunctionCallOutputBody" }, "success": { "type": [ @@ -548,7 +552,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, diff --git a/codex-rs/app-server-protocol/schema/json/DynamicToolCallResponse.json b/codex-rs/app-server-protocol/schema/json/DynamicToolCallResponse.json index 662d3bda4f8..86c305237c9 100644 --- a/codex-rs/app-server-protocol/schema/json/DynamicToolCallResponse.json +++ b/codex-rs/app-server-protocol/schema/json/DynamicToolCallResponse.json @@ -1,15 +1,110 @@ { "$schema": "http://json-schema.org/draft-07/schema#", + "definitions": { + "DynamicToolCallOutputContentItem": { + "oneOf": [ + { + "properties": { + "text": { + "type": "string" + }, + "type": { + "enum": [ + "inputText" + ], + "title": "InputTextDynamicToolCallOutputContentItemType", + "type": "string" + } + }, + "required": [ + "text", + "type" + ], + "title": "InputTextDynamicToolCallOutputContentItem", + "type": "object" + }, + { + "properties": { + "imageUrl": { + "type": "string" + }, + "type": { + "enum": [ + "inputImage" + ], + "title": "InputImageDynamicToolCallOutputContentItemType", + "type": "string" + } + }, + "required": [ + "imageUrl", + "type" + ], + "title": "InputImageDynamicToolCallOutputContentItem", + "type": "object" + } + ] + }, + "DynamicToolCallResult": { + "oneOf": [ + { + "description": "Preferred structured tool output (for example text + images) that is forwarded directly to the model as content items.", + "properties": { + "contentItems": { + "items": { + "$ref": "#/definitions/DynamicToolCallOutputContentItem" + }, + "type": "array" + }, + "type": { + "enum": [ + "contentItems" + ], + "title": "ContentItemsDynamicToolCallResultType", + "type": "string" + } + }, + "required": [ + "contentItems", + "type" + ], + "title": "ContentItemsDynamicToolCallResult", + "type": "object" + }, + { + "description": "Plain-text tool output.", + "properties": { + "output": { + "type": "string" + }, + "type": { + "enum": [ + "output" + ], + "title": "OutputDynamicToolCallResultType", + "type": "string" + } + }, + "required": [ + "output", + "type" + ], + "title": "OutputDynamicToolCallResult", + "type": "object" + } + ] + } + }, "properties": { - "output": { - "type": "string" + "result": { + "$ref": "#/definitions/DynamicToolCallResult" }, "success": { "type": "boolean" } }, "required": [ - "output", + "result", "success" ], "title": "DynamicToolCallResponse", diff --git a/codex-rs/app-server-protocol/schema/json/EventMsg.json b/codex-rs/app-server-protocol/schema/json/EventMsg.json index 1274e1cd7f2..a3a570f57ef 100644 --- a/codex-rs/app-server-protocol/schema/json/EventMsg.json +++ b/codex-rs/app-server-protocol/schema/json/EventMsg.json @@ -2811,6 +2811,19 @@ } ] }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -2857,19 +2870,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/FunctionCallOutputBody" }, "success": { "type": [ @@ -2879,7 +2883,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, diff --git a/codex-rs/app-server-protocol/schema/json/ServerNotification.json b/codex-rs/app-server-protocol/schema/json/ServerNotification.json index f89bcf24e4f..fc4972acf59 100644 --- a/codex-rs/app-server-protocol/schema/json/ServerNotification.json +++ b/codex-rs/app-server-protocol/schema/json/ServerNotification.json @@ -3431,6 +3431,19 @@ ], "type": "object" }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -3477,19 +3490,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/FunctionCallOutputBody" }, "success": { "type": [ @@ -3499,7 +3503,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json index bc72ceb21ff..8a9fe6aae9d 100644 --- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json +++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json @@ -2240,6 +2240,50 @@ ], "type": "object" }, + "DynamicToolCallOutputContentItem": { + "oneOf": [ + { + "properties": { + "text": { + "type": "string" + }, + "type": { + "enum": [ + "inputText" + ], + "title": "InputTextDynamicToolCallOutputContentItemType", + "type": "string" + } + }, + "required": [ + "text", + "type" + ], + "title": "InputTextDynamicToolCallOutputContentItem", + "type": "object" + }, + { + "properties": { + "imageUrl": { + "type": "string" + }, + "type": { + "enum": [ + "inputImage" + ], + "title": "InputImageDynamicToolCallOutputContentItemType", + "type": "string" + } + }, + "required": [ + "imageUrl", + "type" + ], + "title": "InputImageDynamicToolCallOutputContentItem", + "type": "object" + } + ] + }, "DynamicToolCallParams": { "$schema": "http://json-schema.org/draft-07/schema#", "properties": { @@ -2270,20 +2314,69 @@ "DynamicToolCallResponse": { "$schema": "http://json-schema.org/draft-07/schema#", "properties": { - "output": { - "type": "string" + "result": { + "$ref": "#/definitions/DynamicToolCallResult" }, "success": { "type": "boolean" } }, "required": [ - "output", + "result", "success" ], "title": "DynamicToolCallResponse", "type": "object" }, + "DynamicToolCallResult": { + "oneOf": [ + { + "description": "Preferred structured tool output (for example text + images) that is forwarded directly to the model as content items.", + "properties": { + "contentItems": { + "items": { + "$ref": "#/definitions/DynamicToolCallOutputContentItem" + }, + "type": "array" + }, + "type": { + "enum": [ + "contentItems" + ], + "title": "ContentItemsDynamicToolCallResultType", + "type": "string" + } + }, + "required": [ + "contentItems", + "type" + ], + "title": "ContentItemsDynamicToolCallResult", + "type": "object" + }, + { + "description": "Plain-text tool output.", + "properties": { + "output": { + "type": "string" + }, + "type": { + "enum": [ + "output" + ], + "title": "OutputDynamicToolCallResultType", + "type": "string" + } + }, + "required": [ + "output", + "type" + ], + "title": "OutputDynamicToolCallResult", + "type": "object" + } + ] + }, "EventMsg": { "$schema": "http://json-schema.org/draft-07/schema#", "description": "Response event from the agent NOTE: Make sure none of these values have optional types, as it will mess up the extension code-gen.", @@ -4919,6 +5012,19 @@ "title": "ForkConversationResponse", "type": "object" }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -4965,19 +5071,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/FunctionCallOutputBody" }, "success": { "type": [ @@ -4987,7 +5084,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, @@ -11102,6 +11199,19 @@ ], "type": "string" }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/v2/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -11148,19 +11258,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/v2/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/v2/FunctionCallOutputBody" }, "success": { "type": [ @@ -11170,7 +11271,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, diff --git a/codex-rs/app-server-protocol/schema/json/v1/ForkConversationResponse.json b/codex-rs/app-server-protocol/schema/json/v1/ForkConversationResponse.json index 9894c47c1e0..ca4c6699d1e 100644 --- a/codex-rs/app-server-protocol/schema/json/v1/ForkConversationResponse.json +++ b/codex-rs/app-server-protocol/schema/json/v1/ForkConversationResponse.json @@ -2811,6 +2811,19 @@ } ] }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -2857,19 +2870,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/FunctionCallOutputBody" }, "success": { "type": [ @@ -2879,7 +2883,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, diff --git a/codex-rs/app-server-protocol/schema/json/v1/ResumeConversationParams.json b/codex-rs/app-server-protocol/schema/json/v1/ResumeConversationParams.json index 1261306e619..9ce52963d92 100644 --- a/codex-rs/app-server-protocol/schema/json/v1/ResumeConversationParams.json +++ b/codex-rs/app-server-protocol/schema/json/v1/ResumeConversationParams.json @@ -98,6 +98,19 @@ } ] }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -144,19 +157,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/FunctionCallOutputBody" }, "success": { "type": [ @@ -166,7 +170,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, diff --git a/codex-rs/app-server-protocol/schema/json/v1/ResumeConversationResponse.json b/codex-rs/app-server-protocol/schema/json/v1/ResumeConversationResponse.json index fe4037a40fc..0a48d8683f3 100644 --- a/codex-rs/app-server-protocol/schema/json/v1/ResumeConversationResponse.json +++ b/codex-rs/app-server-protocol/schema/json/v1/ResumeConversationResponse.json @@ -2811,6 +2811,19 @@ } ] }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -2857,19 +2870,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/FunctionCallOutputBody" }, "success": { "type": [ @@ -2879,7 +2883,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, diff --git a/codex-rs/app-server-protocol/schema/json/v1/SessionConfiguredNotification.json b/codex-rs/app-server-protocol/schema/json/v1/SessionConfiguredNotification.json index 8e4c361979c..cc99970435f 100644 --- a/codex-rs/app-server-protocol/schema/json/v1/SessionConfiguredNotification.json +++ b/codex-rs/app-server-protocol/schema/json/v1/SessionConfiguredNotification.json @@ -2811,6 +2811,19 @@ } ] }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -2857,19 +2870,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/FunctionCallOutputBody" }, "success": { "type": [ @@ -2879,7 +2883,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json b/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json index 1b307c9b898..c1e36ad8e8c 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json +++ b/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json @@ -65,6 +65,19 @@ } ] }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -111,19 +124,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/FunctionCallOutputBody" }, "success": { "type": [ @@ -133,7 +137,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json b/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json index cc05de490a3..63aba5345af 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json +++ b/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json @@ -74,6 +74,19 @@ } ] }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -120,19 +133,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/FunctionCallOutputBody" }, "success": { "type": [ @@ -142,7 +146,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, diff --git a/codex-rs/app-server-protocol/schema/typescript/FunctionCallOutputBody.ts b/codex-rs/app-server-protocol/schema/typescript/FunctionCallOutputBody.ts new file mode 100644 index 00000000000..6bcb7e25d63 --- /dev/null +++ b/codex-rs/app-server-protocol/schema/typescript/FunctionCallOutputBody.ts @@ -0,0 +1,6 @@ +// GENERATED CODE! DO NOT MODIFY BY HAND! + +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { FunctionCallOutputContentItem } from "./FunctionCallOutputContentItem"; + +export type FunctionCallOutputBody = string | Array; diff --git a/codex-rs/app-server-protocol/schema/typescript/FunctionCallOutputPayload.ts b/codex-rs/app-server-protocol/schema/typescript/FunctionCallOutputPayload.ts index 94370f582de..6376c5b8eb0 100644 --- a/codex-rs/app-server-protocol/schema/typescript/FunctionCallOutputPayload.ts +++ b/codex-rs/app-server-protocol/schema/typescript/FunctionCallOutputPayload.ts @@ -1,14 +1,12 @@ // GENERATED CODE! DO NOT MODIFY BY HAND! // This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. -import type { FunctionCallOutputContentItem } from "./FunctionCallOutputContentItem"; +import type { FunctionCallOutputBody } from "./FunctionCallOutputBody"; /** * The payload we send back to OpenAI when reporting a tool call result. * - * `content` preserves the historical plain-string payload so downstream - * integrations (tests, logging, etc.) can keep treating tool output as - * `String`. When an MCP server returns richer data we additionally populate - * `content_items` with the structured form that the Responses API understands. + * `body` serializes directly as the wire value for `function_call_output.output`. + * `success` remains internal metadata for downstream handling. */ -export type FunctionCallOutputPayload = { content: string, content_items: Array | null, success: boolean | null, }; +export type FunctionCallOutputPayload = { body: FunctionCallOutputBody, success: boolean | null, }; diff --git a/codex-rs/app-server-protocol/schema/typescript/index.ts b/codex-rs/app-server-protocol/schema/typescript/index.ts index 91da0708e10..0ea9d28e065 100644 --- a/codex-rs/app-server-protocol/schema/typescript/index.ts +++ b/codex-rs/app-server-protocol/schema/typescript/index.ts @@ -69,6 +69,7 @@ export type { FileChange } from "./FileChange"; export type { ForcedLoginMethod } from "./ForcedLoginMethod"; export type { ForkConversationParams } from "./ForkConversationParams"; export type { ForkConversationResponse } from "./ForkConversationResponse"; +export type { FunctionCallOutputBody } from "./FunctionCallOutputBody"; export type { FunctionCallOutputContentItem } from "./FunctionCallOutputContentItem"; export type { FunctionCallOutputPayload } from "./FunctionCallOutputPayload"; export type { FuzzyFileSearchParams } from "./FuzzyFileSearchParams"; diff --git a/codex-rs/app-server-protocol/schema/typescript/v2/DynamicToolCallOutputContentItem.ts b/codex-rs/app-server-protocol/schema/typescript/v2/DynamicToolCallOutputContentItem.ts new file mode 100644 index 00000000000..8f432109d1b --- /dev/null +++ b/codex-rs/app-server-protocol/schema/typescript/v2/DynamicToolCallOutputContentItem.ts @@ -0,0 +1,5 @@ +// GENERATED CODE! DO NOT MODIFY BY HAND! + +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +export type DynamicToolCallOutputContentItem = { "type": "inputText", text: string, } | { "type": "inputImage", imageUrl: string, }; diff --git a/codex-rs/app-server-protocol/schema/typescript/v2/DynamicToolCallResponse.ts b/codex-rs/app-server-protocol/schema/typescript/v2/DynamicToolCallResponse.ts index a35b9b394a8..3b6b127a5f1 100644 --- a/codex-rs/app-server-protocol/schema/typescript/v2/DynamicToolCallResponse.ts +++ b/codex-rs/app-server-protocol/schema/typescript/v2/DynamicToolCallResponse.ts @@ -1,5 +1,6 @@ // GENERATED CODE! DO NOT MODIFY BY HAND! // This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { DynamicToolCallResult } from "./DynamicToolCallResult"; -export type DynamicToolCallResponse = { output: string, success: boolean, }; +export type DynamicToolCallResponse = { result: DynamicToolCallResult, success: boolean, }; diff --git a/codex-rs/app-server-protocol/schema/typescript/v2/DynamicToolCallResult.ts b/codex-rs/app-server-protocol/schema/typescript/v2/DynamicToolCallResult.ts new file mode 100644 index 00000000000..6753610eb28 --- /dev/null +++ b/codex-rs/app-server-protocol/schema/typescript/v2/DynamicToolCallResult.ts @@ -0,0 +1,6 @@ +// GENERATED CODE! DO NOT MODIFY BY HAND! + +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { DynamicToolCallOutputContentItem } from "./DynamicToolCallOutputContentItem"; + +export type DynamicToolCallResult = { "type": "contentItems", contentItems: Array, } | { "type": "output", output: string, }; diff --git a/codex-rs/app-server-protocol/schema/typescript/v2/index.ts b/codex-rs/app-server-protocol/schema/typescript/v2/index.ts index 0c50231a809..1f52ea4bd6d 100644 --- a/codex-rs/app-server-protocol/schema/typescript/v2/index.ts +++ b/codex-rs/app-server-protocol/schema/typescript/v2/index.ts @@ -46,8 +46,10 @@ export type { ConfigWriteResponse } from "./ConfigWriteResponse"; export type { ContextCompactedNotification } from "./ContextCompactedNotification"; export type { CreditsSnapshot } from "./CreditsSnapshot"; export type { DeprecationNoticeNotification } from "./DeprecationNoticeNotification"; +export type { DynamicToolCallOutputContentItem } from "./DynamicToolCallOutputContentItem"; export type { DynamicToolCallParams } from "./DynamicToolCallParams"; export type { DynamicToolCallResponse } from "./DynamicToolCallResponse"; +export type { DynamicToolCallResult } from "./DynamicToolCallResult"; export type { DynamicToolSpec } from "./DynamicToolSpec"; export type { ErrorNotification } from "./ErrorNotification"; export type { ExecPolicyAmendment } from "./ExecPolicyAmendment"; diff --git a/codex-rs/app-server-protocol/src/protocol/v2.rs b/codex-rs/app-server-protocol/src/protocol/v2.rs index 7ce8cc2ed3d..2ac06a25913 100644 --- a/codex-rs/app-server-protocol/src/protocol/v2.rs +++ b/codex-rs/app-server-protocol/src/protocol/v2.rs @@ -2775,10 +2775,37 @@ pub struct DynamicToolCallParams { #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] pub struct DynamicToolCallResponse { - pub output: String, + pub result: DynamicToolCallResult, pub success: bool, } +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] +#[serde(tag = "type", rename_all = "camelCase")] +#[ts(tag = "type")] +#[ts(export_to = "v2/")] +pub enum DynamicToolCallResult { + /// Preferred structured tool output (for example text + images) that is + /// forwarded directly to the model as content items. + #[serde(rename_all = "camelCase")] + ContentItems { + content_items: Vec, + }, + /// Plain-text tool output. + #[serde(rename_all = "camelCase")] + Output { output: String }, +} + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] +#[serde(tag = "type", rename_all = "camelCase")] +#[ts(tag = "type")] +#[ts(export_to = "v2/")] +pub enum DynamicToolCallOutputContentItem { + #[serde(rename_all = "camelCase")] + InputText { text: String }, + #[serde(rename_all = "camelCase")] + InputImage { image_url: String }, +} + #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] @@ -3138,4 +3165,55 @@ mod tests { }) ); } + + #[test] + fn dynamic_tool_response_serializes_content_items() { + let value = serde_json::to_value(DynamicToolCallResponse { + result: DynamicToolCallResult::ContentItems { + content_items: vec![DynamicToolCallOutputContentItem::InputText { + text: "dynamic-ok".to_string(), + }], + }, + success: true, + }) + .unwrap(); + + assert_eq!( + value, + json!({ + "result": { + "type": "contentItems", + "contentItems": [ + { + "type": "inputText", + "text": "dynamic-ok" + } + ] + }, + "success": true, + }) + ); + } + + #[test] + fn dynamic_tool_response_serializes_plain_text_output() { + let value = serde_json::to_value(DynamicToolCallResponse { + result: DynamicToolCallResult::Output { + output: "dynamic-ok".to_string(), + }, + success: true, + }) + .unwrap(); + + assert_eq!( + value, + json!({ + "result": { + "type": "output", + "output": "dynamic-ok" + }, + "success": true, + }) + ); + } } diff --git a/codex-rs/app-server/src/bespoke_event_handling.rs b/codex-rs/app-server/src/bespoke_event_handling.rs index 55b185e5a86..153549ff5b8 100644 --- a/codex-rs/app-server/src/bespoke_event_handling.rs +++ b/codex-rs/app-server/src/bespoke_event_handling.rs @@ -89,6 +89,7 @@ use codex_core::review_format::format_review_findings_block; use codex_core::review_prompts; use codex_protocol::ThreadId; use codex_protocol::dynamic_tools::DynamicToolResponse as CoreDynamicToolResponse; +use codex_protocol::dynamic_tools::DynamicToolResult as CoreDynamicToolResult; use codex_protocol::plan_tool::UpdatePlanArgs; use codex_protocol::protocol::ReviewOutputEvent; use codex_protocol::request_user_input::RequestUserInputAnswer as CoreRequestUserInputAnswer; @@ -352,7 +353,9 @@ pub(crate) async fn apply_bespoke_event_handling( id: call_id.clone(), response: CoreDynamicToolResponse { call_id, - output: "dynamic tool calls require api v2".to_string(), + result: CoreDynamicToolResult::Output { + output: "dynamic tool calls require api v2".to_string(), + }, success: false, }, }) diff --git a/codex-rs/app-server/src/dynamic_tools.rs b/codex-rs/app-server/src/dynamic_tools.rs index a1b424d0ee7..542dab8b498 100644 --- a/codex-rs/app-server/src/dynamic_tools.rs +++ b/codex-rs/app-server/src/dynamic_tools.rs @@ -1,6 +1,9 @@ use codex_app_server_protocol::DynamicToolCallResponse; +use codex_app_server_protocol::DynamicToolCallResult; use codex_core::CodexThread; +use codex_protocol::dynamic_tools::DynamicToolCallOutputContentItem as CoreDynamicToolCallOutputContentItem; use codex_protocol::dynamic_tools::DynamicToolResponse as CoreDynamicToolResponse; +use codex_protocol::dynamic_tools::DynamicToolResult as CoreDynamicToolResult; use codex_protocol::protocol::Op; use std::sync::Arc; use tokio::sync::oneshot; @@ -18,7 +21,9 @@ pub(crate) async fn on_call_response( error!("request failed: {err:?}"); let fallback = CoreDynamicToolResponse { call_id: call_id.clone(), - output: "dynamic tool request failed".to_string(), + result: CoreDynamicToolResult::Output { + output: "dynamic tool request failed".to_string(), + }, success: false, }; if let Err(err) = conversation @@ -37,13 +42,24 @@ pub(crate) async fn on_call_response( let response = serde_json::from_value::(value).unwrap_or_else(|err| { error!("failed to deserialize DynamicToolCallResponse: {err}"); DynamicToolCallResponse { - output: "dynamic tool response was invalid".to_string(), + result: DynamicToolCallResult::Output { + output: "dynamic tool response was invalid".to_string(), + }, success: false, } }); + + let result = match response.result { + DynamicToolCallResult::ContentItems { content_items } => { + CoreDynamicToolResult::ContentItems { + content_items: content_items.into_iter().map(map_content_item).collect(), + } + } + DynamicToolCallResult::Output { output } => CoreDynamicToolResult::Output { output }, + }; let response = CoreDynamicToolResponse { call_id: call_id.clone(), - output: response.output, + result, success: response.success, }; if let Err(err) = conversation @@ -56,3 +72,16 @@ pub(crate) async fn on_call_response( error!("failed to submit DynamicToolResponse: {err}"); } } + +fn map_content_item( + item: codex_app_server_protocol::DynamicToolCallOutputContentItem, +) -> CoreDynamicToolCallOutputContentItem { + match item { + codex_app_server_protocol::DynamicToolCallOutputContentItem::InputText { text } => { + CoreDynamicToolCallOutputContentItem::InputText { text } + } + codex_app_server_protocol::DynamicToolCallOutputContentItem::InputImage { image_url } => { + CoreDynamicToolCallOutputContentItem::InputImage { image_url } + } + } +} diff --git a/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs b/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs index dc985ac49f1..03ec181fd25 100644 --- a/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs +++ b/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs @@ -4,8 +4,10 @@ use app_test_support::McpProcess; use app_test_support::create_final_assistant_message_sse_response; use app_test_support::create_mock_responses_server_sequence_unchecked; use app_test_support::to_response; +use codex_app_server_protocol::DynamicToolCallOutputContentItem; use codex_app_server_protocol::DynamicToolCallParams; use codex_app_server_protocol::DynamicToolCallResponse; +use codex_app_server_protocol::DynamicToolCallResult; use codex_app_server_protocol::DynamicToolSpec; use codex_app_server_protocol::JSONRPCResponse; use codex_app_server_protocol::RequestId; @@ -15,6 +17,9 @@ use codex_app_server_protocol::ThreadStartResponse; use codex_app_server_protocol::TurnStartParams; use codex_app_server_protocol::TurnStartResponse; use codex_app_server_protocol::UserInput as V2UserInput; +use codex_protocol::models::FunctionCallOutputBody; +use codex_protocol::models::FunctionCallOutputContentItem; +use codex_protocol::models::FunctionCallOutputPayload; use core_test_support::responses; use pretty_assertions::assert_eq; use serde_json::Value; @@ -200,7 +205,9 @@ async fn dynamic_tool_call_round_trip_sends_output_to_model() -> Result<()> { // Respond to the tool call so the model receives a function_call_output. let response = DynamicToolCallResponse { - output: "dynamic-ok".to_string(), + result: DynamicToolCallResult::Output { + output: "dynamic-ok".to_string(), + }, success: true, }; mcp.send_response(request_id, serde_json::to_value(response)?) @@ -213,11 +220,169 @@ async fn dynamic_tool_call_round_trip_sends_output_to_model() -> Result<()> { .await??; let bodies = responses_bodies(&server).await?; - let output = bodies + let payload = bodies .iter() - .find_map(|body| function_call_output_text(body, call_id)) + .find_map(|body| function_call_output_payload(body, call_id)) .context("expected function_call_output in follow-up request")?; - assert_eq!(output, "dynamic-ok"); + let expected_payload = FunctionCallOutputPayload::from_text("dynamic-ok".to_string()); + assert_eq!(payload, expected_payload); + + Ok(()) +} + +/// Ensures dynamic tool call responses can include structured content items. +#[tokio::test] +async fn dynamic_tool_call_round_trip_sends_content_items_to_model() -> Result<()> { + let call_id = "dyn-call-items-1"; + let tool_name = "demo_tool"; + let tool_args = json!({ "city": "Paris" }); + let tool_call_arguments = serde_json::to_string(&tool_args)?; + + let responses = vec![ + responses::sse(vec![ + responses::ev_response_created("resp-1"), + responses::ev_function_call(call_id, tool_name, &tool_call_arguments), + responses::ev_completed("resp-1"), + ]), + create_final_assistant_message_sse_response("Done")?, + ]; + let server = create_mock_responses_server_sequence_unchecked(responses).await; + + let codex_home = TempDir::new()?; + create_config_toml(codex_home.path(), &server.uri())?; + + let mut mcp = McpProcess::new(codex_home.path()).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + let dynamic_tool = DynamicToolSpec { + name: tool_name.to_string(), + description: "Demo dynamic tool".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "city": { "type": "string" } + }, + "required": ["city"], + "additionalProperties": false, + }), + }; + + let thread_req = mcp + .send_thread_start_request(ThreadStartParams { + dynamic_tools: Some(vec![dynamic_tool]), + ..Default::default() + }) + .await?; + let thread_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(thread_req)), + ) + .await??; + let ThreadStartResponse { thread, .. } = to_response::(thread_resp)?; + + let turn_req = mcp + .send_turn_start_request(TurnStartParams { + thread_id: thread.id.clone(), + input: vec![V2UserInput::Text { + text: "Run the tool".to_string(), + text_elements: Vec::new(), + }], + ..Default::default() + }) + .await?; + let turn_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(turn_req)), + ) + .await??; + let TurnStartResponse { turn } = to_response::(turn_resp)?; + + let request = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_request_message(), + ) + .await??; + let (request_id, params) = match request { + ServerRequest::DynamicToolCall { request_id, params } => (request_id, params), + other => panic!("expected DynamicToolCall request, got {other:?}"), + }; + + let expected = DynamicToolCallParams { + thread_id: thread.id, + turn_id: turn.id, + call_id: call_id.to_string(), + tool: tool_name.to_string(), + arguments: tool_args, + }; + assert_eq!(params, expected); + + let response_content_items = vec![ + DynamicToolCallOutputContentItem::InputText { + text: "dynamic-ok".to_string(), + }, + DynamicToolCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,AAA".to_string(), + }, + ]; + let content_items = response_content_items + .clone() + .into_iter() + .map(|item| match item { + DynamicToolCallOutputContentItem::InputText { text } => { + FunctionCallOutputContentItem::InputText { text } + } + DynamicToolCallOutputContentItem::InputImage { image_url } => { + FunctionCallOutputContentItem::InputImage { image_url } + } + }) + .collect::>(); + let response = DynamicToolCallResponse { + result: DynamicToolCallResult::ContentItems { + content_items: response_content_items, + }, + success: true, + }; + mcp.send_response(request_id, serde_json::to_value(response)?) + .await?; + + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("turn/completed"), + ) + .await??; + + let bodies = responses_bodies(&server).await?; + let output_value = bodies + .iter() + .find_map(|body| function_call_output_raw_output(body, call_id)) + .context("expected function_call_output output in follow-up request")?; + assert_eq!( + output_value, + json!([ + { + "type": "input_text", + "text": "dynamic-ok" + }, + { + "type": "input_image", + "image_url": "data:image/png;base64,AAA" + } + ]) + ); + + let payload = bodies + .iter() + .find_map(|body| function_call_output_payload(body, call_id)) + .context("expected function_call_output in follow-up request")?; + assert_eq!( + payload.body, + FunctionCallOutputBody::ContentItems(content_items.clone()) + ); + assert_eq!(payload.success, None); + assert_eq!( + serde_json::to_string(&payload)?, + serde_json::to_string(&content_items)? + ); Ok(()) } @@ -248,7 +413,12 @@ fn find_tool<'a>(body: &'a Value, name: &str) -> Option<&'a Value> { }) } -fn function_call_output_text(body: &Value, call_id: &str) -> Option { +fn function_call_output_payload(body: &Value, call_id: &str) -> Option { + function_call_output_raw_output(body, call_id) + .and_then(|output| serde_json::from_value(output).ok()) +} + +fn function_call_output_raw_output(body: &Value, call_id: &str) -> Option { body.get("input") .and_then(Value::as_array) .and_then(|items| { @@ -258,8 +428,7 @@ fn function_call_output_text(body: &Value, call_id: &str) -> Option { }) }) .and_then(|item| item.get("output")) - .and_then(Value::as_str) - .map(str::to_string) + .cloned() } fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> { diff --git a/codex-rs/core/src/client_common.rs b/codex-rs/core/src/client_common.rs index 2614ce83efa..5a4eea8836c 100644 --- a/codex-rs/core/src/client_common.rs +++ b/codex-rs/core/src/client_common.rs @@ -3,6 +3,7 @@ use crate::config::types::Personality; use crate::error::Result; pub use codex_api::common::ResponseEvent; use codex_protocol::models::BaseInstructions; +use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::ResponseItem; use futures::Stream; use serde::Deserialize; @@ -97,9 +98,11 @@ fn reserialize_shell_outputs(items: &mut [ResponseItem]) { } ResponseItem::FunctionCallOutput { call_id, output } => { if shell_call_ids.remove(call_id) - && let Some(structured) = parse_structured_shell_output(&output.content) + && let Some(structured) = output + .text_content() + .and_then(parse_structured_shell_output) { - output.content = structured + output.body = FunctionCallOutputBody::Text(structured); } } _ => {} diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index ba34af5ed8c..fd8c2aef971 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -5114,13 +5114,14 @@ mod tests { let got = FunctionCallOutputPayload::from(&ctr); let expected = FunctionCallOutputPayload { - content: serde_json::to_string(&json!({ - "ok": true, - "value": 42 - })) - .unwrap(), + body: codex_protocol::models::FunctionCallOutputBody::Text( + serde_json::to_string(&json!({ + "ok": true, + "value": 42 + })) + .unwrap(), + ), success: Some(true), - ..Default::default() }; assert_eq!(expected, got); @@ -5157,10 +5158,10 @@ mod tests { let got = FunctionCallOutputPayload::from(&ctr); let expected = FunctionCallOutputPayload { - content: serde_json::to_string(&vec![text_block("hello"), text_block("world")]) - .unwrap(), + body: codex_protocol::models::FunctionCallOutputBody::Text( + serde_json::to_string(&vec![text_block("hello"), text_block("world")]).unwrap(), + ), success: Some(true), - ..Default::default() }; assert_eq!(expected, got); @@ -5177,9 +5178,10 @@ mod tests { let got = FunctionCallOutputPayload::from(&ctr); let expected = FunctionCallOutputPayload { - content: serde_json::to_string(&json!({ "message": "bad" })).unwrap(), + body: codex_protocol::models::FunctionCallOutputBody::Text( + serde_json::to_string(&json!({ "message": "bad" })).unwrap(), + ), success: Some(false), - ..Default::default() }; assert_eq!(expected, got); @@ -5196,9 +5198,10 @@ mod tests { let got = FunctionCallOutputPayload::from(&ctr); let expected = FunctionCallOutputPayload { - content: serde_json::to_string(&vec![text_block("alpha")]).unwrap(), + body: codex_protocol::models::FunctionCallOutputBody::Text( + serde_json::to_string(&vec![text_block("alpha")]).unwrap(), + ), success: Some(true), - ..Default::default() }; assert_eq!(expected, got); diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index a29f7df7e03..65a7cf99602 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -10,6 +10,7 @@ use crate::truncate::truncate_function_output_items_with_policy; use crate::truncate::truncate_text; use crate::user_shell_command::is_user_shell_command_text; use codex_protocol::models::ContentItem; +use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseItem; @@ -136,7 +137,7 @@ impl ContextManager { match &mut self.items[index] { ResponseItem::FunctionCallOutput { output, .. } => { - let Some(content_items) = output.content_items.as_mut() else { + let Some(content_items) = output.content_items_mut() else { return false; }; let mut replaced = false; @@ -270,19 +271,23 @@ impl ContextManager { let policy_with_serialization_budget = policy * 1.2; match item { ResponseItem::FunctionCallOutput { call_id, output } => { - let truncated = - truncate_text(output.content.as_str(), policy_with_serialization_budget); - let truncated_items = output.content_items.as_ref().map(|items| { - truncate_function_output_items_with_policy( - items, - policy_with_serialization_budget, - ) - }); + let body = match &output.body { + FunctionCallOutputBody::Text(content) => FunctionCallOutputBody::Text( + truncate_text(content, policy_with_serialization_budget), + ), + FunctionCallOutputBody::ContentItems(items) => { + FunctionCallOutputBody::ContentItems( + truncate_function_output_items_with_policy( + items, + policy_with_serialization_budget, + ), + ) + } + }; ResponseItem::FunctionCallOutput { call_id: call_id.clone(), output: FunctionCallOutputPayload { - content: truncated, - content_items: truncated_items, + body, success: output.success, }, } diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index a6eba62f1ab..c1801e202d0 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -3,6 +3,7 @@ use crate::truncate; use crate::truncate::TruncationPolicy; use codex_git::GhostCommit; use codex_protocol::models::ContentItem; +use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::LocalShellAction; @@ -63,10 +64,7 @@ fn user_input_text_msg(text: &str) -> ResponseItem { fn function_call_output(call_id: &str, content: &str) -> ResponseItem { ResponseItem::FunctionCallOutput { call_id: call_id.to_string(), - output: FunctionCallOutputPayload { - content: content.to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text(content.to_string()), } } @@ -263,10 +261,7 @@ fn remove_first_item_removes_matching_output_for_function_call() { }, ResponseItem::FunctionCallOutput { call_id: "call-1".to_string(), - output: FunctionCallOutputPayload { - content: "ok".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("ok".to_string()), }, ]; let mut h = create_history_with_items(items); @@ -279,10 +274,7 @@ fn remove_first_item_removes_matching_call_for_output() { let items = vec![ ResponseItem::FunctionCallOutput { call_id: "call-2".to_string(), - output: FunctionCallOutputPayload { - content: "ok".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("ok".to_string()), }, ResponseItem::FunctionCall { id: None, @@ -308,10 +300,7 @@ fn remove_last_item_removes_matching_call_for_output() { }, ResponseItem::FunctionCallOutput { call_id: "call-delete-last".to_string(), - output: FunctionCallOutputPayload { - content: "ok".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("ok".to_string()), }, ]; let mut h = create_history_with_items(items); @@ -327,10 +316,11 @@ fn replace_last_turn_images_replaces_tool_output_images() { ResponseItem::FunctionCallOutput { call_id: "call-1".to_string(), output: FunctionCallOutputPayload { - content: "ok".to_string(), - content_items: Some(vec![FunctionCallOutputContentItem::InputImage { - image_url: "data:image/png;base64,AAA".to_string(), - }]), + body: FunctionCallOutputBody::ContentItems(vec![ + FunctionCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,AAA".to_string(), + }, + ]), success: Some(true), }, }, @@ -346,10 +336,11 @@ fn replace_last_turn_images_replaces_tool_output_images() { ResponseItem::FunctionCallOutput { call_id: "call-1".to_string(), output: FunctionCallOutputPayload { - content: "ok".to_string(), - content_items: Some(vec![FunctionCallOutputContentItem::InputText { - text: "Invalid image".to_string(), - }]), + body: FunctionCallOutputBody::ContentItems(vec![ + FunctionCallOutputContentItem::InputText { + text: "Invalid image".to_string(), + }, + ]), success: Some(true), }, }, @@ -391,10 +382,7 @@ fn remove_first_item_handles_local_shell_pair() { }, ResponseItem::FunctionCallOutput { call_id: "call-3".to_string(), - output: FunctionCallOutputPayload { - content: "ok".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("ok".to_string()), }, ]; let mut h = create_history_with_items(items); @@ -560,10 +548,7 @@ fn normalization_retains_local_shell_outputs() { }, ResponseItem::FunctionCallOutput { call_id: "shell-1".to_string(), - output: FunctionCallOutputPayload { - content: "Total output lines: 1\n\nok".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("Total output lines: 1\n\nok".to_string()), }, ]; @@ -583,9 +568,8 @@ fn record_items_truncates_function_call_output_content() { let item = ResponseItem::FunctionCallOutput { call_id: "call-100".to_string(), output: FunctionCallOutputPayload { - content: long_output.clone(), + body: FunctionCallOutputBody::Text(long_output.clone()), success: Some(true), - ..Default::default() }, }; @@ -594,16 +578,15 @@ fn record_items_truncates_function_call_output_content() { assert_eq!(history.items.len(), 1); match &history.items[0] { ResponseItem::FunctionCallOutput { output, .. } => { - assert_ne!(output.content, long_output); + let content = output.text_content().unwrap_or_default(); + assert_ne!(content, long_output); assert!( - output.content.contains("tokens truncated"), - "expected token-based truncation marker, got {}", - output.content + content.contains("tokens truncated"), + "expected token-based truncation marker, got {content}" ); assert!( - output.content.contains("tokens truncated"), - "expected truncation marker, got {}", - output.content + content.contains("tokens truncated"), + "expected truncation marker, got {content}" ); } other => panic!("unexpected history item: {other:?}"), @@ -648,9 +631,8 @@ fn record_items_respects_custom_token_limit() { let item = ResponseItem::FunctionCallOutput { call_id: "call-custom-limit".to_string(), output: FunctionCallOutputPayload { - content: long_output, + body: FunctionCallOutputBody::Text(long_output), success: Some(true), - ..Default::default() }, }; @@ -660,7 +642,11 @@ fn record_items_respects_custom_token_limit() { ResponseItem::FunctionCallOutput { output, .. } => output, other => panic!("unexpected history item: {other:?}"), }; - assert!(stored.content.contains("tokens truncated")); + assert!( + stored + .text_content() + .is_some_and(|content| content.contains("tokens truncated")) + ); } fn assert_truncated_message_matches(message: &str, line: &str, expected_removed: usize) { @@ -782,10 +768,7 @@ fn normalize_adds_missing_output_for_function_call() { }, ResponseItem::FunctionCallOutput { call_id: "call-x".to_string(), - output: FunctionCallOutputPayload { - content: "aborted".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("aborted".to_string()), }, ] ); @@ -859,10 +842,7 @@ fn normalize_adds_missing_output_for_local_shell_call_with_id() { }, ResponseItem::FunctionCallOutput { call_id: "shell-1".to_string(), - output: FunctionCallOutputPayload { - content: "aborted".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("aborted".to_string()), }, ] ); @@ -873,10 +853,7 @@ fn normalize_adds_missing_output_for_local_shell_call_with_id() { fn normalize_removes_orphan_function_call_output() { let items = vec![ResponseItem::FunctionCallOutput { call_id: "orphan-1".to_string(), - output: FunctionCallOutputPayload { - content: "ok".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("ok".to_string()), }]; let mut h = create_history_with_items(items); @@ -913,10 +890,7 @@ fn normalize_mixed_inserts_and_removals() { // Orphan output that should be removed ResponseItem::FunctionCallOutput { call_id: "c2".to_string(), - output: FunctionCallOutputPayload { - content: "ok".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("ok".to_string()), }, // Will get an inserted custom tool output ResponseItem::CustomToolCall { @@ -955,10 +929,7 @@ fn normalize_mixed_inserts_and_removals() { }, ResponseItem::FunctionCallOutput { call_id: "c1".to_string(), - output: FunctionCallOutputPayload { - content: "aborted".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("aborted".to_string()), }, ResponseItem::CustomToolCall { id: None, @@ -985,10 +956,7 @@ fn normalize_mixed_inserts_and_removals() { }, ResponseItem::FunctionCallOutput { call_id: "s1".to_string(), - output: FunctionCallOutputPayload { - content: "aborted".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("aborted".to_string()), }, ] ); @@ -1015,10 +983,7 @@ fn normalize_adds_missing_output_for_function_call_inserts_output() { }, ResponseItem::FunctionCallOutput { call_id: "call-x".to_string(), - output: FunctionCallOutputPayload { - content: "aborted".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("aborted".to_string()), }, ] ); @@ -1065,10 +1030,7 @@ fn normalize_adds_missing_output_for_local_shell_call_with_id_panics_in_debug() fn normalize_removes_orphan_function_call_output_panics_in_debug() { let items = vec![ResponseItem::FunctionCallOutput { call_id: "orphan-1".to_string(), - output: FunctionCallOutputPayload { - content: "ok".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("ok".to_string()), }]; let mut h = create_history_with_items(items); h.normalize_history(); @@ -1099,10 +1061,7 @@ fn normalize_mixed_inserts_and_removals_panics_in_debug() { }, ResponseItem::FunctionCallOutput { call_id: "c2".to_string(), - output: FunctionCallOutputPayload { - content: "ok".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("ok".to_string()), }, ResponseItem::CustomToolCall { id: None, diff --git a/codex-rs/core/src/context_manager/normalize.rs b/codex-rs/core/src/context_manager/normalize.rs index 85e25e32aa8..37e177900fc 100644 --- a/codex-rs/core/src/context_manager/normalize.rs +++ b/codex-rs/core/src/context_manager/normalize.rs @@ -1,5 +1,6 @@ use std::collections::HashSet; +use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseItem; @@ -29,7 +30,7 @@ pub(crate) fn ensure_call_outputs_present(items: &mut Vec) { ResponseItem::FunctionCallOutput { call_id: call_id.clone(), output: FunctionCallOutputPayload { - content: "aborted".to_string(), + body: FunctionCallOutputBody::Text("aborted".to_string()), ..Default::default() }, }, @@ -76,7 +77,7 @@ pub(crate) fn ensure_call_outputs_present(items: &mut Vec) { ResponseItem::FunctionCallOutput { call_id: call_id.clone(), output: FunctionCallOutputPayload { - content: "aborted".to_string(), + body: FunctionCallOutputBody::Text("aborted".to_string()), ..Default::default() }, }, diff --git a/codex-rs/core/src/mcp_tool_call.rs b/codex-rs/core/src/mcp_tool_call.rs index 75248f34cc1..95a41e1ebf9 100644 --- a/codex-rs/core/src/mcp_tool_call.rs +++ b/codex-rs/core/src/mcp_tool_call.rs @@ -11,6 +11,7 @@ use crate::protocol::McpInvocation; use crate::protocol::McpToolCallBeginEvent; use crate::protocol::McpToolCallEndEvent; use codex_protocol::mcp::CallToolResult; +use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseInputItem; use codex_protocol::protocol::AskForApproval; @@ -44,9 +45,8 @@ pub(crate) async fn handle_mcp_tool_call( return ResponseInputItem::FunctionCallOutput { call_id: call_id.clone(), output: FunctionCallOutputPayload { - content: format!("err: {e}"), + body: FunctionCallOutputBody::Text(format!("err: {e}")), success: Some(false), - ..Default::default() }, }; } diff --git a/codex-rs/core/src/stream_events_utils.rs b/codex-rs/core/src/stream_events_utils.rs index 02d98225102..a81519daf21 100644 --- a/codex-rs/core/src/stream_events_utils.rs +++ b/codex-rs/core/src/stream_events_utils.rs @@ -14,6 +14,7 @@ use crate::parse_turn_item; use crate::proposed_plan_parser::strip_proposed_plan_blocks; use crate::tools::parallel::ToolCallRuntime; use crate::tools::router::ToolRouter; +use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseInputItem; use codex_protocol::models::ResponseItem; @@ -108,7 +109,7 @@ pub(crate) async fn handle_output_item_done( let response = ResponseInputItem::FunctionCallOutput { call_id: String::new(), output: FunctionCallOutputPayload { - content: msg.to_string(), + body: FunctionCallOutputBody::Text(msg.to_string()), ..Default::default() }, }; @@ -131,7 +132,7 @@ pub(crate) async fn handle_output_item_done( let response = ResponseInputItem::FunctionCallOutput { call_id: String::new(), output: FunctionCallOutputPayload { - content: message, + body: FunctionCallOutputBody::Text(message), ..Default::default() }, }; @@ -236,9 +237,8 @@ pub(crate) fn response_input_to_response_item(input: &ResponseInputItem) -> Opti let output = match result { Ok(call_tool_result) => FunctionCallOutputPayload::from(call_tool_result), Err(err) => FunctionCallOutputPayload { - content: err.clone(), + body: FunctionCallOutputBody::Text(err.clone()), success: Some(false), - ..Default::default() }, }; Some(ResponseItem::FunctionCallOutput { diff --git a/codex-rs/core/src/tools/context.rs b/codex-rs/core/src/tools/context.rs index f0bbb158f5f..22e2d0b69f7 100644 --- a/codex-rs/core/src/tools/context.rs +++ b/codex-rs/core/src/tools/context.rs @@ -5,6 +5,7 @@ use crate::tools::TELEMETRY_PREVIEW_MAX_LINES; use crate::tools::TELEMETRY_PREVIEW_TRUNCATION_NOTICE; use crate::turn_diff_tracker::TurnDiffTracker; use codex_protocol::mcp::CallToolResult; +use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseInputItem; @@ -97,13 +98,13 @@ impl ToolOutput { output: content, } } else { + let body = match content_items { + Some(content_items) => FunctionCallOutputBody::ContentItems(content_items), + None => FunctionCallOutputBody::Text(content), + }; ResponseInputItem::FunctionCallOutput { call_id: call_id.to_string(), - output: FunctionCallOutputPayload { - content, - content_items, - success, - }, + output: FunctionCallOutputPayload { body, success }, } } } @@ -196,8 +197,8 @@ mod tests { match response { ResponseInputItem::FunctionCallOutput { call_id, output } => { assert_eq!(call_id, "fn-1"); - assert_eq!(output.content, "ok"); - assert!(output.content_items.is_none()); + assert_eq!(output.text_content(), Some("ok")); + assert!(output.content_items().is_none()); assert_eq!(output.success, Some(true)); } other => panic!("expected FunctionCallOutput, got {other:?}"), diff --git a/codex-rs/core/src/tools/handlers/dynamic.rs b/codex-rs/core/src/tools/handlers/dynamic.rs index a68c70b98da..2188ff6ebb2 100644 --- a/codex-rs/core/src/tools/handlers/dynamic.rs +++ b/codex-rs/core/src/tools/handlers/dynamic.rs @@ -8,8 +8,11 @@ use crate::tools::handlers::parse_arguments; use crate::tools::registry::ToolHandler; use crate::tools::registry::ToolKind; use async_trait::async_trait; +use codex_protocol::dynamic_tools::DynamicToolCallOutputContentItem; use codex_protocol::dynamic_tools::DynamicToolCallRequest; use codex_protocol::dynamic_tools::DynamicToolResponse; +use codex_protocol::dynamic_tools::DynamicToolResult; +use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::protocol::EventMsg; use serde_json::Value; use tokio::sync::oneshot; @@ -55,10 +58,21 @@ impl ToolHandler for DynamicToolHandler { ) })?; + let DynamicToolResponse { + result, success, .. + } = response; + let (content, content_items) = match result { + DynamicToolResult::Output { output } => (output, None), + DynamicToolResult::ContentItems { content_items } => ( + content_items_to_text(Some(&content_items)).unwrap_or_default(), + Some(content_items.into_iter().map(map_content_item).collect()), + ), + }; + Ok(ToolOutput::Function { - content: response.output, - content_items: None, - success: Some(response.success), + content, + content_items, + success: Some(success), }) } } @@ -96,3 +110,73 @@ async fn request_dynamic_tool( session.send_event(turn_context, event).await; rx_response.await.ok() } + +fn content_items_to_text( + content_items: Option<&[DynamicToolCallOutputContentItem]>, +) -> Option { + let mut text = Vec::new(); + + for item in content_items.unwrap_or_default() { + if let DynamicToolCallOutputContentItem::InputText { text: segment } = item + && !segment.trim().is_empty() + { + text.push(segment.as_str()); + } + } + + if text.is_empty() { + None + } else { + Some(text.join("\n")) + } +} + +fn map_content_item(item: DynamicToolCallOutputContentItem) -> FunctionCallOutputContentItem { + match item { + DynamicToolCallOutputContentItem::InputText { text } => { + FunctionCallOutputContentItem::InputText { text } + } + DynamicToolCallOutputContentItem::InputImage { image_url } => { + FunctionCallOutputContentItem::InputImage { image_url } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn content_items_to_text_uses_text_content_items() { + let content_items = vec![ + DynamicToolCallOutputContentItem::InputText { + text: "line 1".to_string(), + }, + DynamicToolCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,AAA".to_string(), + }, + DynamicToolCallOutputContentItem::InputText { + text: "line 2".to_string(), + }, + ]; + + let output = content_items_to_text(Some(&content_items)).unwrap_or_default(); + assert_eq!(output, "line 1\nline 2"); + } + + #[test] + fn content_items_to_text_ignores_empty_and_image_only_content_items() { + let content_items = vec![ + DynamicToolCallOutputContentItem::InputText { + text: " ".to_string(), + }, + DynamicToolCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,AAA".to_string(), + }, + ]; + + let output = content_items_to_text(Some(&content_items)); + assert_eq!(output, None); + } +} diff --git a/codex-rs/core/src/tools/handlers/mcp.rs b/codex-rs/core/src/tools/handlers/mcp.rs index 5138b3cd0d1..0f9d4c592a7 100644 --- a/codex-rs/core/src/tools/handlers/mcp.rs +++ b/codex-rs/core/src/tools/handlers/mcp.rs @@ -57,11 +57,18 @@ impl ToolHandler for McpHandler { Ok(ToolOutput::Mcp { result }) } codex_protocol::models::ResponseInputItem::FunctionCallOutput { output, .. } => { - let codex_protocol::models::FunctionCallOutputPayload { - content, - content_items, - success, - } = output; + let success = output.success; + let (content, content_items) = match output.body { + codex_protocol::models::FunctionCallOutputBody::Text(content) => { + (content, None) + } + codex_protocol::models::FunctionCallOutputBody::ContentItems(content_items) => { + ( + serde_json::to_string(&content_items).unwrap_or_default(), + Some(content_items), + ) + } + }; Ok(ToolOutput::Function { content, content_items, diff --git a/codex-rs/core/src/tools/parallel.rs b/codex-rs/core/src/tools/parallel.rs index dcd3ae40ad6..ca08048bd8c 100644 --- a/codex-rs/core/src/tools/parallel.rs +++ b/codex-rs/core/src/tools/parallel.rs @@ -17,6 +17,7 @@ use crate::tools::context::SharedTurnDiffTracker; use crate::tools::context::ToolPayload; use crate::tools::router::ToolCall; use crate::tools::router::ToolRouter; +use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseInputItem; @@ -119,7 +120,7 @@ impl ToolCallRuntime { _ => ResponseInputItem::FunctionCallOutput { call_id: call.call_id.clone(), output: FunctionCallOutputPayload { - content: Self::abort_message(call, secs), + body: FunctionCallOutputBody::Text(Self::abort_message(call, secs)), ..Default::default() }, }, diff --git a/codex-rs/core/src/tools/router.rs b/codex-rs/core/src/tools/router.rs index 51328ccc9fd..d0390027d04 100644 --- a/codex-rs/core/src/tools/router.rs +++ b/codex-rs/core/src/tools/router.rs @@ -181,9 +181,8 @@ impl ToolRouter { ResponseInputItem::FunctionCallOutput { call_id, output: codex_protocol::models::FunctionCallOutputPayload { - content: message, + body: codex_protocol::models::FunctionCallOutputBody::Text(message), success: Some(false), - ..Default::default() }, } } diff --git a/codex-rs/core/tests/suite/client.rs b/codex-rs/core/tests/suite/client.rs index bd50708a2cc..ed2ae66fa0f 100644 --- a/codex-rs/core/tests/suite/client.rs +++ b/codex-rs/core/tests/suite/client.rs @@ -1251,10 +1251,7 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() { }); prompt.input.push(ResponseItem::FunctionCallOutput { call_id: "function-call-id".into(), - output: FunctionCallOutputPayload { - content: "ok".into(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("ok".into()), }); prompt.input.push(ResponseItem::LocalShellCall { id: Some("local-shell-id".into()), diff --git a/codex-rs/protocol/src/dynamic_tools.rs b/codex-rs/protocol/src/dynamic_tools.rs index e55d372d8ec..9eaae8fd3f4 100644 --- a/codex-rs/protocol/src/dynamic_tools.rs +++ b/codex-rs/protocol/src/dynamic_tools.rs @@ -25,6 +25,28 @@ pub struct DynamicToolCallRequest { #[serde(rename_all = "camelCase")] pub struct DynamicToolResponse { pub call_id: String, - pub output: String, + pub result: DynamicToolResult, pub success: bool, } + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)] +#[serde(tag = "type", rename_all = "camelCase")] +#[ts(tag = "type")] +pub enum DynamicToolResult { + #[serde(rename_all = "camelCase")] + ContentItems { + content_items: Vec, + }, + #[serde(rename_all = "camelCase")] + Output { output: String }, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)] +#[serde(tag = "type", rename_all = "camelCase")] +#[ts(tag = "type")] +pub enum DynamicToolCallOutputContentItem { + #[serde(rename_all = "camelCase")] + InputText { text: String }, + #[serde(rename_all = "camelCase")] + InputImage { image_url: String }, +} diff --git a/codex-rs/protocol/src/models.rs b/codex-rs/protocol/src/models.rs index fe63666e380..4f6f95a3ae5 100644 --- a/codex-rs/protocol/src/models.rs +++ b/codex-rs/protocol/src/models.rs @@ -129,11 +129,11 @@ pub enum ResponseItem { arguments: String, call_id: String, }, - // NOTE: The input schema for `function_call_output` objects that clients send to the - // OpenAI /v1/responses endpoint is NOT the same shape as the objects the server returns on the - // SSE stream. When *sending* we must wrap the string output inside an object that includes a - // required `success` boolean. To ensure we serialize exactly the expected shape we introduce - // a dedicated payload struct and flatten it here. + // NOTE: The `output` field for `function_call_output` uses a dedicated payload type with + // custom serialization. On the wire it is either: + // - a plain string (`content`) + // - an array of structured content items (`content_items`) + // We keep this behavior centralized in `FunctionCallOutputPayload`. FunctionCallOutput { call_id: String, output: FunctionCallOutputPayload, @@ -617,9 +617,8 @@ impl From for ResponseItem { let output = match result { Ok(result) => FunctionCallOutputPayload::from(&result), Err(tool_call_err) => FunctionCallOutputPayload { - content: format!("err: {tool_call_err:?}"), + body: FunctionCallOutputBody::Text(format!("err: {tool_call_err:?}")), success: Some(false), - ..Default::default() }, }; Self::FunctionCallOutput { call_id, output } @@ -782,37 +781,82 @@ pub enum FunctionCallOutputContentItem { /// The payload we send back to OpenAI when reporting a tool call result. /// -/// `content` preserves the historical plain-string payload so downstream -/// integrations (tests, logging, etc.) can keep treating tool output as -/// `String`. When an MCP server returns richer data we additionally populate -/// `content_items` with the structured form that the Responses API understands. +/// `body` serializes directly as the wire value for `function_call_output.output`. +/// `success` remains internal metadata for downstream handling. #[derive(Debug, Default, Clone, PartialEq, JsonSchema, TS)] pub struct FunctionCallOutputPayload { - pub content: String, - #[serde(skip_serializing_if = "Option::is_none")] - pub content_items: Option>, + pub body: FunctionCallOutputBody, pub success: Option, } -#[derive(Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)] #[serde(untagged)] -enum FunctionCallOutputPayloadSerde { +pub enum FunctionCallOutputBody { Text(String), - Items(Vec), + ContentItems(Vec), +} + +impl Default for FunctionCallOutputBody { + fn default() -> Self { + Self::Text(String::new()) + } +} + +impl FunctionCallOutputPayload { + pub fn from_text(content: String) -> Self { + Self { + body: FunctionCallOutputBody::Text(content), + success: None, + } + } + + pub fn from_content_items(content_items: Vec) -> Self { + Self { + body: FunctionCallOutputBody::ContentItems(content_items), + success: None, + } + } + + pub fn text_content(&self) -> Option<&str> { + match &self.body { + FunctionCallOutputBody::Text(content) => Some(content), + FunctionCallOutputBody::ContentItems(_) => None, + } + } + + pub fn text_content_mut(&mut self) -> Option<&mut String> { + match &mut self.body { + FunctionCallOutputBody::Text(content) => Some(content), + FunctionCallOutputBody::ContentItems(_) => None, + } + } + + pub fn content_items(&self) -> Option<&[FunctionCallOutputContentItem]> { + match &self.body { + FunctionCallOutputBody::Text(_) => None, + FunctionCallOutputBody::ContentItems(items) => Some(items), + } + } + + pub fn content_items_mut(&mut self) -> Option<&mut Vec> { + match &mut self.body { + FunctionCallOutputBody::Text(_) => None, + FunctionCallOutputBody::ContentItems(items) => Some(items), + } + } } -// The Responses API expects two *different* shapes depending on success vs failure: -// • success → output is a plain string (no nested object) -// • failure → output is an object { content, success:false } +// `function_call_output.output` is encoded as either: +// - an array of structured content items +// - a plain string impl Serialize for FunctionCallOutputPayload { fn serialize(&self, serializer: S) -> Result where S: Serializer, { - if let Some(items) = &self.content_items { - items.serialize(serializer) - } else { - serializer.serialize_str(&self.content) + match &self.body { + FunctionCallOutputBody::Text(content) => serializer.serialize_str(content), + FunctionCallOutputBody::ContentItems(items) => items.serialize(serializer), } } } @@ -822,20 +866,11 @@ impl<'de> Deserialize<'de> for FunctionCallOutputPayload { where D: Deserializer<'de>, { - match FunctionCallOutputPayloadSerde::deserialize(deserializer)? { - FunctionCallOutputPayloadSerde::Text(content) => Ok(FunctionCallOutputPayload { - content, - ..Default::default() - }), - FunctionCallOutputPayloadSerde::Items(items) => { - let content = serde_json::to_string(&items).map_err(serde::de::Error::custom)?; - Ok(FunctionCallOutputPayload { - content, - content_items: Some(items), - success: None, - }) - } - } + let body = FunctionCallOutputBody::deserialize(deserializer)?; + Ok(FunctionCallOutputPayload { + body, + success: None, + }) } } @@ -856,16 +891,14 @@ impl From<&CallToolResult> for FunctionCallOutputPayload { match serde_json::to_string(structured_content) { Ok(serialized_structured_content) => { return FunctionCallOutputPayload { - content: serialized_structured_content, + body: FunctionCallOutputBody::Text(serialized_structured_content), success: Some(is_success), - ..Default::default() }; } Err(err) => { return FunctionCallOutputPayload { - content: err.to_string(), + body: FunctionCallOutputBody::Text(err.to_string()), success: Some(false), - ..Default::default() }; } } @@ -875,18 +908,21 @@ impl From<&CallToolResult> for FunctionCallOutputPayload { Ok(serialized_content) => serialized_content, Err(err) => { return FunctionCallOutputPayload { - content: err.to_string(), + body: FunctionCallOutputBody::Text(err.to_string()), success: Some(false), - ..Default::default() }; } }; let content_items = convert_mcp_content_to_items(content); + let body = match content_items { + Some(content_items) => FunctionCallOutputBody::ContentItems(content_items), + None => FunctionCallOutputBody::Text(serialized_content), + }; + FunctionCallOutputPayload { - content: serialized_content, - content_items, + body, success: Some(is_success), } } @@ -937,19 +973,18 @@ fn convert_mcp_content_to_items( } // Implement Display so callers can treat the payload like a plain string when logging or doing -// trivial substring checks in tests (existing tests call `.contains()` on the output). Display -// returns the raw `content` field. +// trivial substring checks in tests (existing tests call `.contains()` on the output). For +// `ContentItems`, Display emits a JSON representation. impl std::fmt::Display for FunctionCallOutputPayload { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(&self.content) - } -} - -impl std::ops::Deref for FunctionCallOutputPayload { - type Target = str; - fn deref(&self) -> &Self::Target { - &self.content + match &self.body { + FunctionCallOutputBody::Text(content) => f.write_str(content), + FunctionCallOutputBody::ContentItems(items) => { + let content = serde_json::to_string(items).unwrap_or_default(); + f.write_str(content.as_str()) + } + } } } @@ -1156,10 +1191,7 @@ mod tests { fn serializes_success_as_plain_string() -> Result<()> { let item = ResponseInputItem::FunctionCallOutput { call_id: "call1".into(), - output: FunctionCallOutputPayload { - content: "ok".into(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("ok".into()), }; let json = serde_json::to_string(&item)?; @@ -1175,9 +1207,8 @@ mod tests { let item = ResponseInputItem::FunctionCallOutput { call_id: "call1".into(), output: FunctionCallOutputPayload { - content: "bad".into(), + body: FunctionCallOutputBody::Text("bad".into()), success: Some(false), - ..Default::default() }, }; @@ -1202,7 +1233,10 @@ mod tests { let payload = FunctionCallOutputPayload::from(&call_tool_result); assert_eq!(payload.success, Some(true)); - let items = payload.content_items.clone().expect("content items"); + let Some(items) = payload.content_items() else { + panic!("expected content items"); + }; + let items = items.to_vec(); assert_eq!( items, vec![ @@ -1243,9 +1277,10 @@ mod tests { }; let payload = FunctionCallOutputPayload::from(&call_tool_result); - let Some(items) = payload.content_items else { + let Some(items) = payload.content_items() else { panic!("expected content items"); }; + let items = items.to_vec(); assert_eq!( items, vec![FunctionCallOutputContentItem::InputImage { @@ -1274,10 +1309,14 @@ mod tests { image_url: "data:image/png;base64,XYZ".into(), }, ]; - assert_eq!(payload.content_items, Some(expected_items.clone())); - - let expected_content = serde_json::to_string(&expected_items)?; - assert_eq!(payload.content, expected_content); + assert_eq!( + payload.body, + FunctionCallOutputBody::ContentItems(expected_items.clone()) + ); + assert_eq!( + serde_json::to_string(&payload)?, + serde_json::to_string(&expected_items)? + ); Ok(()) }