diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx index 374645abb35..f530b7f7f42 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx @@ -128,6 +128,9 @@ export function Session() { const [userMessageMarkdown, setUserMessageMarkdown] = createSignal(kv.get("user_message_markdown", true)) const [diffWrapMode, setDiffWrapMode] = createSignal<"word" | "none">("word") const [animationsEnabled, setAnimationsEnabled] = createSignal(kv.get("animations_enabled", true)) + const [compactionMethod, setCompactionMethod] = createSignal<"standard" | "collapse">( + kv.get("compaction_method", sync.data.config.compaction?.method ?? "standard"), + ) const wide = createMemo(() => dimensions().width > 120) const sidebarVisible = createMemo(() => { @@ -395,6 +398,19 @@ export function Session() { dialog.clear() }, }, + { + title: compactionMethod() === "collapse" ? "Use standard compaction" : "Use collapse compaction", + value: "session.toggle.compaction_method", + category: "Session", + onSelect: (dialog) => { + setCompactionMethod((prev) => { + const next = prev === "standard" ? "collapse" : "standard" + kv.set("compaction_method", next) + return next + }) + dialog.clear() + }, + }, { title: "Unshare session", value: "session.unshare", diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx index a9ed042d1bb..3efae65f602 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx @@ -92,6 +92,12 @@ export function Sidebar(props: { sessionID: string }) { Context + + compact{" "} + {sync.data.config.compaction?.auto === false + ? "disabled" + : kv.get("compaction_method", sync.data.config.compaction?.method ?? "standard")} + {context()?.tokens ?? 0} tokens {context()?.percentage ?? 0}% used {cost()} spent diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts index 012e3e12f53..60ebb3c624b 100644 --- a/packages/opencode/src/config/config.ts +++ b/packages/opencode/src/config/config.ts @@ -807,6 +807,42 @@ export namespace Config { .object({ auto: z.boolean().optional().describe("Enable automatic compaction when context is full (default: true)"), prune: z.boolean().optional().describe("Enable pruning of old tool outputs (default: true)"), + method: z + .enum(["standard", "collapse"]) + .optional() + .describe( + "Compaction method: 'standard' summarizes entire conversation, 'collapse' extracts oldest messages and creates summary at breakpoint (default: collapse)", + ), + trigger: z + .number() + .min(0) + .max(1) + .optional() + .describe("Trigger compaction at this fraction of total context (default: 0.85 = 85%)"), + extractRatio: z + .number() + .min(0) + .max(1) + .optional() + .describe("For collapse mode: fraction of oldest tokens to extract and summarize (default: 0.65)"), + recentRatio: z + .number() + .min(0) + .max(1) + .optional() + .describe("For collapse mode: fraction of newest tokens to use as reference context (default: 0.15)"), + summaryMaxTokens: z + .number() + .min(1000) + .max(50000) + .optional() + .describe("For collapse mode: target token count for the summary output (default: 10000)"), + previousSummaries: z + .number() + .min(0) + .max(10) + .optional() + .describe("For collapse mode: number of previous summaries to include for context merging (default: 3)"), }) .optional(), experimental: z diff --git a/packages/opencode/src/id/id.ts b/packages/opencode/src/id/id.ts index ad6e22e1bee..6bc6356ced8 100644 --- a/packages/opencode/src/id/id.ts +++ b/packages/opencode/src/id/id.ts @@ -15,7 +15,11 @@ export namespace Identifier { return z.string().startsWith(prefixes[prefix]) } + // Total ID length after prefix: 6 bytes hex (12 chars) + 14 random chars = 26 chars + // Note: 6-byte format truncates high byte but maintains backwards compatibility + // Use createLike() with a 7-byte reference ID when inserting at past timestamps const LENGTH = 26 + const TIME_BYTES = 6 // State for monotonic ID generation let lastTimestamp = 0 @@ -59,15 +63,140 @@ export namespace Identifier { } counter++ + // Encode timestamp * 0x1000 + counter into 6 bytes (48 bits) + // Note: This truncates the high byte for modern timestamps, but all IDs + // created at "now" will have the same truncation, so they sort correctly. + // The truncation only matters when inserting at past timestamps (use createLike for that). let now = BigInt(currentTimestamp) * BigInt(0x1000) + BigInt(counter) now = descending ? ~now : now - const timeBytes = Buffer.alloc(6) - for (let i = 0; i < 6; i++) { - timeBytes[i] = Number((now >> BigInt(40 - 8 * i)) & BigInt(0xff)) + const timeBytes = Buffer.alloc(TIME_BYTES) + for (let i = 0; i < TIME_BYTES; i++) { + timeBytes[i] = Number((now >> BigInt((TIME_BYTES - 1 - i) * 8)) & BigInt(0xff)) } - return prefixes[prefix] + "_" + timeBytes.toString("hex") + randomBase62(LENGTH - 12) + return prefixes[prefix] + "_" + timeBytes.toString("hex") + randomBase62(LENGTH - TIME_BYTES * 2) + } + + /** + * Detect the byte format (6 or 7) of an existing ID. + * 6-byte IDs: 12 hex chars + 14 random = 26 total after prefix + * 7-byte IDs: 14 hex chars + 12 random = 26 total after prefix + */ + export function detectFormat(id: string): 6 | 7 { + const underscoreIndex = id.indexOf("_") + if (underscoreIndex === -1) return TIME_BYTES as 6 | 7 + + const afterPrefix = id.slice(underscoreIndex + 1) + + // Check if first 14 chars are all valid hex (would indicate 7-byte format) + const first14 = afterPrefix.slice(0, 14) + const isValidHex14 = /^[0-9a-f]{14}$/i.test(first14) + + if (isValidHex14) { + // Could be 7-byte format, verify by checking if it decodes to a valid timestamp + try { + const bigValue = BigInt("0x" + first14) + const ts = Number(bigValue / BigInt(0x1000)) + + // Check if this looks like a valid modern timestamp (after 2020, before 2100) + const year2020 = 1577836800000 + const year2100 = 4102444800000 + if (ts >= year2020 && ts < year2100) { + return 7 + } + } catch { + // Not valid hex, fall through to 6-byte + } + } + + // Otherwise assume 6-byte (old format) + return 6 + } + + /** + * Create an ID that sorts immediately after a reference ID. + * + * This works by extracting the raw encoded value from the reference ID and + * incrementing it, ensuring the new ID sorts correctly regardless of the + * byte format (6 or 7 bytes). + * + * @param referenceId - The ID to sort after + * @param prefix - The prefix for the new ID (e.g., "message", "part") + * @param descending - Whether to use descending order (usually false) + * @param offsetMs - Milliseconds to add to the reference timestamp (default 1) + */ + export function createLike( + referenceId: string, + prefix: keyof typeof prefixes, + descending: boolean, + offsetMs: number = 1, + ): string { + const format = detectFormat(referenceId) + const underscoreIndex = referenceId.indexOf("_") + if (underscoreIndex === -1) { + throw new Error(`Invalid reference ID: ${referenceId}`) + } + + // Extract the hex timestamp portion from the reference ID + const hexPart = referenceId.slice(underscoreIndex + 1, underscoreIndex + 1 + format * 2) + const referenceValue = BigInt("0x" + hexPart) + + // Add offset (in the encoded space: offsetMs * 0x1000) + // This ensures the new ID sorts after the reference regardless of truncation + let newValue = referenceValue + BigInt(offsetMs) * BigInt(0x1000) + BigInt(1) // +1 for counter + + newValue = descending ? ~newValue : newValue + + const timeBytes = Buffer.alloc(format) + for (let i = 0; i < format; i++) { + timeBytes[i] = Number((newValue >> BigInt((format - 1 - i) * 8)) & BigInt(0xff)) + } + + const randomLength = LENGTH - format * 2 + return prefixes[prefix] + "_" + timeBytes.toString("hex") + randomBase62(randomLength) + } + + /** + * Decode the timestamp from an ID. + * Handles both old 6-byte IDs and new 7-byte IDs. + */ + export function decodeTimestamp(id: string): { timestamp: number; counter: number } | null { + const underscoreIndex = id.indexOf("_") + if (underscoreIndex === -1) return null + + const hexPart = id.slice(underscoreIndex + 1) + + // Determine if this is an old 6-byte ID or new 7-byte ID + // Old IDs: 12 hex chars for time + 14 random = 26 total after prefix + // New IDs: 14 hex chars for time + 12 random = 26 total after prefix + // We can detect by checking if the first 14 chars decode to a reasonable timestamp + + // Try 7-byte (new format) first + if (hexPart.length >= 14) { + const hex7 = hexPart.slice(0, 14) + const bigValue7 = BigInt("0x" + hex7) + const ts7 = Number(bigValue7 / BigInt(0x1000)) + const counter7 = Number(bigValue7 % BigInt(0x1000)) + + // Check if this looks like a valid modern timestamp (after 2020, before 2100) + const year2020 = 1577836800000 + const year2100 = 4102444800000 + if (ts7 >= year2020 && ts7 < year2100) { + return { timestamp: ts7, counter: counter7 } + } + } + + // Try 6-byte (old format) + if (hexPart.length >= 12) { + const hex6 = hexPart.slice(0, 12) + const bigValue6 = BigInt("0x" + hex6) + const ts6 = Number(bigValue6 / BigInt(0x1000)) + const counter6 = Number(bigValue6 % BigInt(0x1000)) + return { timestamp: ts6, counter: counter6 } + } + + return null } } diff --git a/packages/opencode/src/server/server.ts b/packages/opencode/src/server/server.ts index f31b8ec44f5..6f32e854447 100644 --- a/packages/opencode/src/server/server.ts +++ b/packages/opencode/src/server/server.ts @@ -1121,6 +1121,8 @@ export namespace Server { break } } + // Create compaction trigger, then loop processes it + // process() will route to the appropriate method (collapse or standard) await SessionCompaction.create({ sessionID, agent: currentAgent, diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index 42bab2eb975..886ebbb161a 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -14,6 +14,8 @@ import { fn } from "@/util/fn" import { Agent } from "@/agent/agent" import { Plugin } from "@/plugin" import { Config } from "@/config/config" +import { Global } from "@/global" +import path from "path" export namespace SessionCompaction { const log = Log.create({ service: "session.compaction" }) @@ -27,15 +29,89 @@ export namespace SessionCompaction { ), } + // Default configuration values + export const DEFAULTS = { + method: "standard" as const, + trigger: 0.85, // Trigger at 85% of usable context to leave headroom + extractRatio: 0.65, + recentRatio: 0.15, + summaryMaxTokens: 10000, // Target token count for collapse summary + previousSummaries: 3, // Number of previous summaries to include in collapse + } + + // Static portion of collapse prompt template for token estimation + const COLLAPSE_PROMPT_TEMPLATE = `You are creating a comprehensive context restoration document from a collapsed conversation segment. This document will serve as the foundation for continued work - it must preserve critical knowledge that would otherwise be lost. + +## Output Structure + +Create a detailed summary with the following sections: + +### 1. Current Task State +### 2. Resolved Code & Lessons Learned +### 3. User Directives +### 4. Custom Utilities & Commands +### 5. Design Decisions & Derived Requirements +### 6. Technical Facts + +## Critical Rules + +- PRESERVE working code verbatim in fenced blocks +- INCLUDE failed approaches with explanations +- Be specific: exact paths, line numbers, function names, config values +- Capture the "why" behind decisions, not just the "what" +- User directives are sacred - never omit explicit user preferences + +## Extracted Context (to distill) +## Recent Context (for reference) + +Generate the context restoration document now:` + + /** + * Get the compaction method. + * Priority: TUI toggle (kv.json) > config file > default + */ + export async function getMethod(): Promise<"standard" | "collapse"> { + const config = await Config.get() + const configMethod = config.compaction?.method + + // Check TUI toggle override + try { + const file = Bun.file(path.join(Global.Path.state, "kv.json")) + if (await file.exists()) { + const kv = await file.json() + const toggle = kv["compaction_method"] + if (toggle === "standard" || toggle === "collapse") { + return toggle + } + } + } catch { + // Ignore KV read errors + } + + return configMethod ?? DEFAULTS.method + } + export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) { const config = await Config.get() if (config.compaction?.auto === false) return false const context = input.model.limit.context if (context === 0) return false - const count = input.tokens.input + input.tokens.cache.read + input.tokens.output - const output = Math.min(input.model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) || SessionPrompt.OUTPUT_TOKEN_MAX - const usable = context - output - return count > usable + + const count = input.tokens.input + input.tokens.cache.read + input.tokens.cache.write + input.tokens.output + const trigger = config.compaction?.trigger ?? DEFAULTS.trigger + const threshold = context * trigger + const isOver = count > threshold + + log.debug("overflow check", { + tokens: input.tokens, + count, + context, + trigger, + threshold, + isOver, + }) + + return isOver } export const PRUNE_MINIMUM = 20_000 @@ -89,13 +165,37 @@ export namespace SessionCompaction { } } + /** + * Process compaction - routes to appropriate method based on config. + * This is called via the create() -> loop() -> process() flow. + */ export async function process(input: { parentID: string messages: MessageV2.WithParts[] sessionID: string abort: AbortSignal auto: boolean - }) { + }): Promise<"continue" | "stop"> { + const method = await getMethod() + log.info("compacting", { method }) + + if (method === "collapse") { + return processCollapse(input) + } + return processStandard(input) + } + + /** + * Standard compaction: Summarizes entire conversation at end. + */ + async function processStandard(input: { + parentID: string + messages: MessageV2.WithParts[] + sessionID: string + abort: AbortSignal + auto: boolean + }): Promise<"continue" | "stop"> { + log.debug("standard", { parentID: input.parentID }) const userMessage = input.messages.findLast((m) => m.info.id === input.parentID)!.info as MessageV2.User const agent = await Agent.get("compaction") const model = agent.model @@ -192,6 +292,333 @@ export namespace SessionCompaction { return "continue" } + /** + * Collapse compaction: Extract oldest messages, distill with AI, insert summary at breakpoint. + * Messages before the breakpoint are filtered out by filterCompacted(). + */ + async function processCollapse(input: { + parentID: string + messages: MessageV2.WithParts[] + sessionID: string + abort: AbortSignal + auto: boolean + }): Promise<"continue" | "stop"> { + const config = await Config.get() + const extractRatio = config.compaction?.extractRatio ?? DEFAULTS.extractRatio + const recentRatio = config.compaction?.recentRatio ?? DEFAULTS.recentRatio + const summaryMaxTokens = config.compaction?.summaryMaxTokens ?? DEFAULTS.summaryMaxTokens + const previousSummariesLimit = config.compaction?.previousSummaries ?? DEFAULTS.previousSummaries + + // Get the user message to determine which model we'll use + const originalUserMessage = input.messages.findLast((m) => m.info.id === input.parentID)!.info as MessageV2.User + const agent = await Agent.get("compaction") + const model = agent.model + ? await Provider.getModel(agent.model.providerID, agent.model.modelID) + : await Provider.getModel(originalUserMessage.model.providerID, originalUserMessage.model.modelID) + + // Calculate token counts for messages first + const messageTokens: number[] = [] + let totalTokens = 0 + for (const msg of input.messages) { + const estimate = estimateMessageTokens(msg) + messageTokens.push(estimate) + totalTokens += estimate + } + + // Calculate extraction targets + const extractTarget = Math.floor(totalTokens * extractRatio) + const recentTarget = Math.floor(totalTokens * recentRatio) + + // Find split points + let extractedTokens = 0 + let extractSplitIndex = 0 + for (let i = 0; i < input.messages.length; i++) { + if (extractedTokens >= extractTarget) break + extractedTokens += messageTokens[i] + extractSplitIndex = i + 1 + } + + let recentTokens = 0 + let recentSplitIndex = input.messages.length + for (let i = input.messages.length - 1; i >= 0; i--) { + if (recentTokens >= recentTarget) break + recentTokens += messageTokens[i] + recentSplitIndex = i + } + + // Ensure recent split doesn't overlap with extract + if (recentSplitIndex <= extractSplitIndex) { + recentSplitIndex = extractSplitIndex + } + + const extractedMessages = input.messages.slice(0, extractSplitIndex) + const recentReferenceMessages = input.messages.slice(recentSplitIndex) + + log.debug("collapse split", { + totalTokens, + extractTarget, + extractedTokens, + extractedMessages: extractedMessages.length, + recentTarget, + recentTokens, + recentMessages: recentReferenceMessages.length, + }) + + if (extractedMessages.length === 0) { + log.info("collapse skipped", { reason: "no messages to extract" }) + return "continue" + } + + // Convert extracted messages to markdown for distillation + const markdownContent = messagesToMarkdown(extractedMessages) + const recentContext = messagesToMarkdown(recentReferenceMessages) + + // Build base prompt (without previous summaries) to calculate token budget + const markdownTokens = Token.estimate(markdownContent) + const recentTokensEstimate = Token.estimate(recentContext) + const templateTokens = Token.estimate(COLLAPSE_PROMPT_TEMPLATE) + const basePromptTokens = markdownTokens + recentTokensEstimate + templateTokens + const contextLimit = model.limit.context + const outputReserve = SessionPrompt.OUTPUT_TOKEN_MAX + const previousSummaryBudget = Math.max(0, contextLimit - outputReserve - basePromptTokens) + + // Fetch previous summaries that fit within budget + const previousSummaries = await getPreviousSummaries(input.sessionID, previousSummariesLimit, previousSummaryBudget) + + // Get the last extracted message to determine breakpoint position + const lastExtractedMessage = extractedMessages[extractedMessages.length - 1] + const lastExtractedId = lastExtractedMessage.info.id + + // Extract timestamp from the last extracted message ID + // Use createLike to handle both 6-byte and 7-byte ID formats + const breakpointTimestamp = lastExtractedMessage.info.time.created + 1 + + log.debug("collapse positioning", { + lastExtractedId, + breakpointTimestamp, + }) + + // Create the compaction user message at the breakpoint position + const compactionUserId = Identifier.createLike(lastExtractedId, "message", false, 1) + const compactionUserMsg = await Session.updateMessage({ + id: compactionUserId, + role: "user", + model: originalUserMessage.model, + sessionID: input.sessionID, + agent: originalUserMessage.agent, + time: { + created: breakpointTimestamp, + }, + }) + await Session.updatePart({ + id: Identifier.createLike(lastExtractedId, "part", false, 1), + messageID: compactionUserMsg.id, + sessionID: input.sessionID, + type: "compaction", + auto: input.auto, + }) + + // Create assistant summary message positioned right after the compaction user message + // Use compactionUserId as reference (not lastExtractedId) to ensure assistant sorts immediately after user + // This prevents other messages from being created with IDs that sort between user and assistant + const compactionAssistantId = Identifier.createLike(compactionUserId, "message", false, 1) + const msg = (await Session.updateMessage({ + id: compactionAssistantId, + role: "assistant", + parentID: compactionUserMsg.id, + sessionID: input.sessionID, + mode: "compaction", + agent: "compaction", + summary: true, + path: { + cwd: Instance.directory, + root: Instance.worktree, + }, + cost: 0, + tokens: { + output: 0, + input: 0, + reasoning: 0, + cache: { read: 0, write: 0 }, + }, + modelID: model.id, + providerID: model.providerID, + time: { + created: breakpointTimestamp + 1, + }, + })) as MessageV2.Assistant + + const processor = SessionProcessor.create({ + assistantMessage: msg, + sessionID: input.sessionID, + model, + abort: input.abort, + }) + + // Allow plugins to inject context + const compacting = await Plugin.trigger( + "experimental.session.compacting", + { sessionID: input.sessionID }, + { context: [], prompt: undefined }, + ) + + // Build prompt sections - only include what we have + const sections: string[] = [] + + // Instructions + sections.push(`You are creating a comprehensive context restoration document. This document will serve as the foundation for continued work - it must preserve critical knowledge that would otherwise be lost. + +Create a detailed summary (target: approximately ${summaryMaxTokens} tokens) with these sections: +1. Current Task State - what is being worked on, next steps, blockers +2. Resolved Code & Lessons Learned - working code verbatim, failed approaches, insights +3. User Directives - explicit preferences, style rules, things to always/never do +4. Custom Utilities & Commands - scripts, aliases, debugging commands +5. Design Decisions & Derived Requirements - architecture decisions, API contracts, patterns +6. Technical Facts - file paths, function names, config values, environment details + +Critical rules: +- PRESERVE working code verbatim in fenced blocks +- INCLUDE failed approaches with explanations +- Be specific with paths, line numbers, function names +- Capture the "why" behind decisions +- User directives are sacred - never omit them`) + + // Previous summaries + if (previousSummaries.length > 0) { + sections.push(` +IMPORTANT: Merge all information from these previous summaries into your new summary. Do not lose any historical context. + +${previousSummaries.map((summary, i) => `--- Summary ${i + 1} ---\n${summary}`).join("\n\n")} +`) + } + + // Extracted content + sections.push(` +The following conversation content needs to be distilled into the summary: + +${markdownContent} +`) + + // Recent context + sections.push(` +The following is recent context for reference (shows current state): + +${recentContext} +`) + + // Additional plugin context + if (compacting.context.length > 0) { + sections.push(` +${compacting.context.join("\n\n")} +`) + } + + sections.push("Generate the context restoration document now.") + + const collapsePrompt = sections.join("\n\n") + + const result = await processor.process({ + user: originalUserMessage, + agent, + abort: input.abort, + sessionID: input.sessionID, + tools: {}, + system: [], + messages: [ + { + role: "user", + content: [{ type: "text", text: collapsePrompt }], + }, + ], + model, + }) + + // NOTE: We intentionally do NOT add a "Continue if you have next steps" message + // for collapse mode. The collapse summary is just context restoration - the loop + // should exit after the summary is generated so the user can continue naturally. + + if (processor.message.error) return "stop" + + // Update token count on the chronologically last assistant message + // so isOverflow() sees the correct post-collapse state. + const allMessages = await Session.messages({ sessionID: input.sessionID }) + const lastAssistant = allMessages + .filter( + (m): m is MessageV2.WithParts & { info: MessageV2.Assistant } => + m.info.role === "assistant" && m.info.id !== msg.id, + ) + .sort((a, b) => b.info.time.created - a.info.time.created)[0] + + if (lastAssistant) { + const originalTokens = { ...lastAssistant.info.tokens } + const collapseSummaryTokens = processor.message.tokens.output + + const currentTotal = + lastAssistant.info.tokens.input + + lastAssistant.info.tokens.cache.read + + lastAssistant.info.tokens.cache.write + + lastAssistant.info.tokens.output + + const newTotal = Math.max(0, currentTotal - extractedTokens + collapseSummaryTokens) + + lastAssistant.info.tokens = { + input: 0, + output: lastAssistant.info.tokens.output, + reasoning: lastAssistant.info.tokens.reasoning, + cache: { + read: Math.max(0, newTotal - lastAssistant.info.tokens.output), + write: 0, + }, + } + await Session.updateMessage(lastAssistant.info) + + log.debug("tokens adjusted", { + extracted: extractedTokens, + summary: collapseSummaryTokens, + total: newTotal, + }) + } + + log.info("collapsed", { + messages: extractedMessages.length, + tokens: extractedTokens, + }) + + // Delete the original trigger message (created by create()) to prevent + // the loop from picking it up again as a pending compaction task. + // The trigger is the message at input.parentID - we've created a new + // compaction user message at the breakpoint position. + if (input.parentID !== compactionUserMsg.id) { + log.debug("cleanup trigger", { id: input.parentID }) + // Delete parts first + const triggerMsg = input.messages.find((m) => m.info.id === input.parentID) + if (triggerMsg) { + for (const part of triggerMsg.parts) { + await Session.removePart({ + sessionID: input.sessionID, + messageID: input.parentID, + partID: part.id, + }) + } + } + await Session.removeMessage({ + sessionID: input.sessionID, + messageID: input.parentID, + }) + } + + Bus.publish(Event.Compacted, { sessionID: input.sessionID }) + + // For auto-compaction: return "continue" so the loop processes the user's + // original message that triggered the overflow. The trigger message is deleted, + // so the loop will find the real user message and respond to it. + // For manual compaction: return "stop" - user explicitly requested compaction only. + if (input.auto) { + return "continue" + } + return "stop" + } + export const create = fn( z.object({ sessionID: Identifier.schema("session"), @@ -222,4 +649,107 @@ export namespace SessionCompaction { }) }, ) + + /** + * Estimate tokens for a message (respects compaction state) + */ + function estimateMessageTokens(msg: MessageV2.WithParts): number { + let tokens = 0 + for (const part of msg.parts) { + if (part.type === "text") { + tokens += Token.estimate(part.text) + } else if (part.type === "tool" && part.state.status === "completed") { + // Skip compacted tool outputs + if (part.state.time.compacted) continue + tokens += Token.estimate(JSON.stringify(part.state.input)) + tokens += Token.estimate(part.state.output) + } + } + return tokens + } + + /** + * Convert messages to markdown format for distillation + */ + function messagesToMarkdown(messages: MessageV2.WithParts[]): string { + const lines: string[] = [] + + for (const msg of messages) { + const role = msg.info.role === "user" ? "User" : "Assistant" + lines.push(`### ${role}`) + lines.push("") + + for (const part of msg.parts) { + if (part.type === "text" && part.text) { + // Skip synthetic parts like "Continue if you have next steps" + if (part.synthetic) continue + lines.push(part.text) + lines.push("") + } else if (part.type === "tool" && part.state.status === "completed") { + // Skip compacted tool outputs + if (part.state.time.compacted) continue + lines.push(`**Tool: ${part.tool}**`) + lines.push("```json") + lines.push(JSON.stringify(part.state.input, null, 2)) + lines.push("```") + if (part.state.output) { + lines.push("Output:") + lines.push("```") + lines.push(part.state.output.slice(0, 1000)) + if (part.state.output.length > 1000) lines.push("... (truncated)") + lines.push("```") + } + lines.push("") + } + } + } + + return lines.join("\n") + } + + /** + * Extract summary text from a compaction summary message's parts + */ + function extractSummaryText(msg: MessageV2.WithParts): string { + return msg.parts + .filter((p): p is MessageV2.TextPart => p.type === "text" && !p.synthetic) + .map((p) => p.text) + .join("\n") + } + + /** + * Fetch previous compaction summaries from the session (unfiltered). + * Respects token budget to avoid overflowing context window. + */ + async function getPreviousSummaries(sessionID: string, limit: number, tokenBudget: number): Promise { + const allMessages = await Session.messages({ sessionID }) + + const summaryMessages = allMessages + .filter( + (m): m is MessageV2.WithParts & { info: MessageV2.Assistant } => + m.info.role === "assistant" && + (m.info as MessageV2.Assistant).summary === true && + (m.info as MessageV2.Assistant).finish !== undefined, + ) + .sort((a, b) => a.info.time.created - b.info.time.created) // oldest first + .slice(-limit) // take the N most recent + + // Include summaries only if they fit within token budget + // Start from most recent (end of array) since those are most relevant + const result: string[] = [] + let tokensUsed = 0 + + for (let i = summaryMessages.length - 1; i >= 0; i--) { + const text = extractSummaryText(summaryMessages[i]) + if (!text.trim()) continue + + const estimate = Token.estimate(text) + if (tokensUsed + estimate > tokenBudget) break + + result.unshift(text) // prepend to maintain chronological order + tokensUsed += estimate + } + + return result + } } diff --git a/packages/opencode/src/session/index.ts b/packages/opencode/src/session/index.ts index 4285223bc5c..c64d569235d 100644 --- a/packages/opencode/src/session/index.ts +++ b/packages/opencode/src/session/index.ts @@ -147,12 +147,19 @@ export namespace Session { directory: Instance.directory, }) const msgs = await messages({ sessionID: input.sessionID }) + const idMap = new Map() + for (const msg of msgs) { if (input.messageID && msg.info.id >= input.messageID) break + const newID = Identifier.ascending("message") + idMap.set(msg.info.id, newID) + + const parentID = msg.info.role === "assistant" && msg.info.parentID ? idMap.get(msg.info.parentID) : undefined const cloned = await updateMessage({ ...msg.info, sessionID: session.id, - id: Identifier.ascending("message"), + id: newID, + ...(parentID && { parentID }), }) for (const part of msg.parts) { diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts index bb78ae64ce6..47eeb3a649f 100644 --- a/packages/opencode/src/session/message-v2.ts +++ b/packages/opencode/src/session/message-v2.ts @@ -11,8 +11,11 @@ import { ProviderTransform } from "@/provider/transform" import { STATUS_CODES } from "http" import { iife } from "@/util/iife" import { type SystemError } from "bun" +import { Log } from "../util/log" export namespace MessageV2 { + const log = Log.create({ service: "message-v2" }) + export const OutputLengthError = NamedError.create("MessageOutputLengthError", z.object({})) export const AbortedError = NamedError.create("MessageAbortedError", z.object({ message: z.string() })) export const AuthError = NamedError.create( @@ -577,17 +580,28 @@ export namespace MessageV2 { export async function filterCompacted(stream: AsyncIterable) { const result = [] as MessageV2.WithParts[] const completed = new Set() + for await (const msg of stream) { + const hasCompactionPart = msg.parts.some((part) => part.type === "compaction") + const isAssistantSummary = + msg.info.role === "assistant" && (msg.info as Assistant).summary && (msg.info as Assistant).finish + result.push(msg) - if ( - msg.info.role === "user" && - completed.has(msg.info.id) && - msg.parts.some((part) => part.type === "compaction") - ) + + // Check if this is a compaction breakpoint + if (msg.info.role === "user" && completed.has(msg.info.id) && hasCompactionPart) { + log.debug("breakpoint", { id: msg.info.id }) break - if (msg.info.role === "assistant" && msg.info.summary && msg.info.finish) completed.add(msg.info.parentID) + } + + // If assistant with summary=true and finish, add parentID to completed set + if (isAssistantSummary) { + completed.add((msg.info as Assistant).parentID) + } } + result.reverse() + log.debug("filtered", { count: result.length }) return result } diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts index 6bf71ef3653..f5a6c1fac7c 100644 --- a/packages/opencode/src/session/prompt.ts +++ b/packages/opencode/src/session/prompt.ts @@ -244,6 +244,7 @@ export namespace SessionPrompt { SessionStatus.set(sessionID, { type: "busy" }) log.info("loop", { step, sessionID }) if (abort.aborted) break + let msgs = await MessageV2.filterCompacted(MessageV2.stream(sessionID)) let lastUser: MessageV2.User | undefined @@ -263,6 +264,12 @@ export namespace SessionPrompt { } } + log.debug("state", { + lastUser: lastUser?.id, + lastFinished: lastFinished?.id, + tasks: tasks.length, + }) + if (!lastUser) throw new Error("No user message found in stream. This should never happen.") if ( lastAssistant?.finish && @@ -445,6 +452,7 @@ export namespace SessionPrompt { // pending compaction if (task?.type === "compaction") { + log.debug("compaction task", { auto: task.auto }) const result = await SessionCompaction.process({ messages: msgs, parentID: lastUser.id, @@ -462,6 +470,7 @@ export namespace SessionPrompt { lastFinished.summary !== true && (await SessionCompaction.isOverflow({ tokens: lastFinished.tokens, model })) ) { + log.info("overflow", { tokens: lastFinished.tokens }) await SessionCompaction.create({ sessionID, agent: lastUser.agent, @@ -471,7 +480,6 @@ export namespace SessionPrompt { continue } - // normal processing const agent = await Agent.get(lastUser.agent) const maxSteps = agent.maxSteps ?? Infinity const isLastStep = step >= maxSteps @@ -528,6 +536,16 @@ export namespace SessionPrompt { await Plugin.trigger("experimental.chat.messages.transform", {}, { messages: sessionMessages }) + // Debug: log messages being sent to LLM + log.debug("llm messages", { + count: sessionMessages.length, + messageIds: sessionMessages.map((m) => m.info.id), + firstMessageId: sessionMessages[0]?.info.id, + hasCompactionSummary: sessionMessages.some( + (m) => m.info.role === "assistant" && (m.info as any).summary === true, + ), + }) + const result = await processor.process({ user: lastUser, agent, @@ -552,12 +570,31 @@ export namespace SessionPrompt { continue } SessionCompaction.prune({ sessionID }) + + // Check if there are queued requests - their user messages are already created + // and need processing. We need to grab them before defer() runs cancel(). + const queued = state()[sessionID]?.callbacks ?? [] + if (queued.length > 0) { + // Clear callbacks so cancel() doesn't reject them + state()[sessionID].callbacks = [] + // Schedule re-entry after this function exits (and defer runs cancel) + // Use setImmediate to let defer() clear state first, then re-enter loop + setImmediate(async () => { + const result = await loop(sessionID) + for (const q of queued) { + q.resolve(result) + } + }) + // Return last assistant for now - queued requests will get their real response + for await (const item of MessageV2.stream(sessionID)) { + if (item.info.role === "user") continue + return item + } + } + + // No queued requests - return last assistant as before for await (const item of MessageV2.stream(sessionID)) { if (item.info.role === "user") continue - const queued = state()[sessionID]?.callbacks ?? [] - for (const q of queued) { - q.resolve(item) - } return item } throw new Error("Impossible") diff --git a/packages/sdk/js/src/v2/gen/types.gen.ts b/packages/sdk/js/src/v2/gen/types.gen.ts index 8b3bece004f..e2af8443f6a 100644 --- a/packages/sdk/js/src/v2/gen/types.gen.ts +++ b/packages/sdk/js/src/v2/gen/types.gen.ts @@ -1616,6 +1616,22 @@ export type Config = { * Enable pruning of old tool outputs (default: true) */ prune?: boolean + /** + * Compaction method: 'standard' summarizes entire conversation, 'collapse' extracts oldest messages and creates summary at breakpoint (default: collapse) + */ + method?: "standard" | "collapse" + /** + * Trigger compaction at this fraction of total context (default: 0.85 = 85%) + */ + trigger?: number + /** + * For collapse mode: fraction of oldest tokens to extract and summarize (default: 0.65) + */ + extractRatio?: number + /** + * For collapse mode: fraction of newest tokens to use as reference context (default: 0.15) + */ + recentRatio?: number } experimental?: { hook?: { diff --git a/packages/sdk/openapi.json b/packages/sdk/openapi.json index 4924a5bfac0..c693a643202 100644 --- a/packages/sdk/openapi.json +++ b/packages/sdk/openapi.json @@ -8684,6 +8684,29 @@ "prune": { "description": "Enable pruning of old tool outputs (default: true)", "type": "boolean" + }, + "method": { + "description": "Compaction method: 'standard' summarizes entire conversation, 'collapse' extracts oldest messages and creates summary at breakpoint (default: collapse)", + "type": "string", + "enum": ["standard", "collapse"] + }, + "trigger": { + "description": "Trigger compaction at this fraction of total context (default: 0.85 = 85%)", + "type": "number", + "minimum": 0, + "maximum": 1 + }, + "extractRatio": { + "description": "For collapse mode: fraction of oldest tokens to extract and summarize (default: 0.65)", + "type": "number", + "minimum": 0, + "maximum": 1 + }, + "recentRatio": { + "description": "For collapse mode: fraction of newest tokens to use as reference context (default: 0.15)", + "type": "number", + "minimum": 0, + "maximum": 1 } } },