diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
index 374645abb35..f530b7f7f42 100644
--- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
+++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
@@ -128,6 +128,9 @@ export function Session() {
const [userMessageMarkdown, setUserMessageMarkdown] = createSignal(kv.get("user_message_markdown", true))
const [diffWrapMode, setDiffWrapMode] = createSignal<"word" | "none">("word")
const [animationsEnabled, setAnimationsEnabled] = createSignal(kv.get("animations_enabled", true))
+ const [compactionMethod, setCompactionMethod] = createSignal<"standard" | "collapse">(
+ kv.get("compaction_method", sync.data.config.compaction?.method ?? "standard"),
+ )
const wide = createMemo(() => dimensions().width > 120)
const sidebarVisible = createMemo(() => {
@@ -395,6 +398,19 @@ export function Session() {
dialog.clear()
},
},
+ {
+ title: compactionMethod() === "collapse" ? "Use standard compaction" : "Use collapse compaction",
+ value: "session.toggle.compaction_method",
+ category: "Session",
+ onSelect: (dialog) => {
+ setCompactionMethod((prev) => {
+ const next = prev === "standard" ? "collapse" : "standard"
+ kv.set("compaction_method", next)
+ return next
+ })
+ dialog.clear()
+ },
+ },
{
title: "Unshare session",
value: "session.unshare",
diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx
index a9ed042d1bb..3efae65f602 100644
--- a/packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx
+++ b/packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx
@@ -92,6 +92,12 @@ export function Sidebar(props: { sessionID: string }) {
Context
+
+ compact{" "}
+ {sync.data.config.compaction?.auto === false
+ ? "disabled"
+ : kv.get("compaction_method", sync.data.config.compaction?.method ?? "standard")}
+
{context()?.tokens ?? 0} tokens
{context()?.percentage ?? 0}% used
{cost()} spent
diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts
index 012e3e12f53..60ebb3c624b 100644
--- a/packages/opencode/src/config/config.ts
+++ b/packages/opencode/src/config/config.ts
@@ -807,6 +807,42 @@ export namespace Config {
.object({
auto: z.boolean().optional().describe("Enable automatic compaction when context is full (default: true)"),
prune: z.boolean().optional().describe("Enable pruning of old tool outputs (default: true)"),
+ method: z
+ .enum(["standard", "collapse"])
+ .optional()
+ .describe(
+ "Compaction method: 'standard' summarizes entire conversation, 'collapse' extracts oldest messages and creates summary at breakpoint (default: collapse)",
+ ),
+ trigger: z
+ .number()
+ .min(0)
+ .max(1)
+ .optional()
+ .describe("Trigger compaction at this fraction of total context (default: 0.85 = 85%)"),
+ extractRatio: z
+ .number()
+ .min(0)
+ .max(1)
+ .optional()
+ .describe("For collapse mode: fraction of oldest tokens to extract and summarize (default: 0.65)"),
+ recentRatio: z
+ .number()
+ .min(0)
+ .max(1)
+ .optional()
+ .describe("For collapse mode: fraction of newest tokens to use as reference context (default: 0.15)"),
+ summaryMaxTokens: z
+ .number()
+ .min(1000)
+ .max(50000)
+ .optional()
+ .describe("For collapse mode: target token count for the summary output (default: 10000)"),
+ previousSummaries: z
+ .number()
+ .min(0)
+ .max(10)
+ .optional()
+ .describe("For collapse mode: number of previous summaries to include for context merging (default: 3)"),
})
.optional(),
experimental: z
diff --git a/packages/opencode/src/id/id.ts b/packages/opencode/src/id/id.ts
index ad6e22e1bee..6bc6356ced8 100644
--- a/packages/opencode/src/id/id.ts
+++ b/packages/opencode/src/id/id.ts
@@ -15,7 +15,11 @@ export namespace Identifier {
return z.string().startsWith(prefixes[prefix])
}
+ // Total ID length after prefix: 6 bytes hex (12 chars) + 14 random chars = 26 chars
+ // Note: 6-byte format truncates high byte but maintains backwards compatibility
+ // Use createLike() with a 7-byte reference ID when inserting at past timestamps
const LENGTH = 26
+ const TIME_BYTES = 6
// State for monotonic ID generation
let lastTimestamp = 0
@@ -59,15 +63,140 @@ export namespace Identifier {
}
counter++
+ // Encode timestamp * 0x1000 + counter into 6 bytes (48 bits)
+ // Note: This truncates the high byte for modern timestamps, but all IDs
+ // created at "now" will have the same truncation, so they sort correctly.
+ // The truncation only matters when inserting at past timestamps (use createLike for that).
let now = BigInt(currentTimestamp) * BigInt(0x1000) + BigInt(counter)
now = descending ? ~now : now
- const timeBytes = Buffer.alloc(6)
- for (let i = 0; i < 6; i++) {
- timeBytes[i] = Number((now >> BigInt(40 - 8 * i)) & BigInt(0xff))
+ const timeBytes = Buffer.alloc(TIME_BYTES)
+ for (let i = 0; i < TIME_BYTES; i++) {
+ timeBytes[i] = Number((now >> BigInt((TIME_BYTES - 1 - i) * 8)) & BigInt(0xff))
}
- return prefixes[prefix] + "_" + timeBytes.toString("hex") + randomBase62(LENGTH - 12)
+ return prefixes[prefix] + "_" + timeBytes.toString("hex") + randomBase62(LENGTH - TIME_BYTES * 2)
+ }
+
+ /**
+ * Detect the byte format (6 or 7) of an existing ID.
+ * 6-byte IDs: 12 hex chars + 14 random = 26 total after prefix
+ * 7-byte IDs: 14 hex chars + 12 random = 26 total after prefix
+ */
+ export function detectFormat(id: string): 6 | 7 {
+ const underscoreIndex = id.indexOf("_")
+ if (underscoreIndex === -1) return TIME_BYTES as 6 | 7
+
+ const afterPrefix = id.slice(underscoreIndex + 1)
+
+ // Check if first 14 chars are all valid hex (would indicate 7-byte format)
+ const first14 = afterPrefix.slice(0, 14)
+ const isValidHex14 = /^[0-9a-f]{14}$/i.test(first14)
+
+ if (isValidHex14) {
+ // Could be 7-byte format, verify by checking if it decodes to a valid timestamp
+ try {
+ const bigValue = BigInt("0x" + first14)
+ const ts = Number(bigValue / BigInt(0x1000))
+
+ // Check if this looks like a valid modern timestamp (after 2020, before 2100)
+ const year2020 = 1577836800000
+ const year2100 = 4102444800000
+ if (ts >= year2020 && ts < year2100) {
+ return 7
+ }
+ } catch {
+ // Not valid hex, fall through to 6-byte
+ }
+ }
+
+ // Otherwise assume 6-byte (old format)
+ return 6
+ }
+
+ /**
+ * Create an ID that sorts immediately after a reference ID.
+ *
+ * This works by extracting the raw encoded value from the reference ID and
+ * incrementing it, ensuring the new ID sorts correctly regardless of the
+ * byte format (6 or 7 bytes).
+ *
+ * @param referenceId - The ID to sort after
+ * @param prefix - The prefix for the new ID (e.g., "message", "part")
+ * @param descending - Whether to use descending order (usually false)
+ * @param offsetMs - Milliseconds to add to the reference timestamp (default 1)
+ */
+ export function createLike(
+ referenceId: string,
+ prefix: keyof typeof prefixes,
+ descending: boolean,
+ offsetMs: number = 1,
+ ): string {
+ const format = detectFormat(referenceId)
+ const underscoreIndex = referenceId.indexOf("_")
+ if (underscoreIndex === -1) {
+ throw new Error(`Invalid reference ID: ${referenceId}`)
+ }
+
+ // Extract the hex timestamp portion from the reference ID
+ const hexPart = referenceId.slice(underscoreIndex + 1, underscoreIndex + 1 + format * 2)
+ const referenceValue = BigInt("0x" + hexPart)
+
+ // Add offset (in the encoded space: offsetMs * 0x1000)
+ // This ensures the new ID sorts after the reference regardless of truncation
+ let newValue = referenceValue + BigInt(offsetMs) * BigInt(0x1000) + BigInt(1) // +1 for counter
+
+ newValue = descending ? ~newValue : newValue
+
+ const timeBytes = Buffer.alloc(format)
+ for (let i = 0; i < format; i++) {
+ timeBytes[i] = Number((newValue >> BigInt((format - 1 - i) * 8)) & BigInt(0xff))
+ }
+
+ const randomLength = LENGTH - format * 2
+ return prefixes[prefix] + "_" + timeBytes.toString("hex") + randomBase62(randomLength)
+ }
+
+ /**
+ * Decode the timestamp from an ID.
+ * Handles both old 6-byte IDs and new 7-byte IDs.
+ */
+ export function decodeTimestamp(id: string): { timestamp: number; counter: number } | null {
+ const underscoreIndex = id.indexOf("_")
+ if (underscoreIndex === -1) return null
+
+ const hexPart = id.slice(underscoreIndex + 1)
+
+ // Determine if this is an old 6-byte ID or new 7-byte ID
+ // Old IDs: 12 hex chars for time + 14 random = 26 total after prefix
+ // New IDs: 14 hex chars for time + 12 random = 26 total after prefix
+ // We can detect by checking if the first 14 chars decode to a reasonable timestamp
+
+ // Try 7-byte (new format) first
+ if (hexPart.length >= 14) {
+ const hex7 = hexPart.slice(0, 14)
+ const bigValue7 = BigInt("0x" + hex7)
+ const ts7 = Number(bigValue7 / BigInt(0x1000))
+ const counter7 = Number(bigValue7 % BigInt(0x1000))
+
+ // Check if this looks like a valid modern timestamp (after 2020, before 2100)
+ const year2020 = 1577836800000
+ const year2100 = 4102444800000
+ if (ts7 >= year2020 && ts7 < year2100) {
+ return { timestamp: ts7, counter: counter7 }
+ }
+ }
+
+ // Try 6-byte (old format)
+ if (hexPart.length >= 12) {
+ const hex6 = hexPart.slice(0, 12)
+ const bigValue6 = BigInt("0x" + hex6)
+ const ts6 = Number(bigValue6 / BigInt(0x1000))
+ const counter6 = Number(bigValue6 % BigInt(0x1000))
+ return { timestamp: ts6, counter: counter6 }
+ }
+
+ return null
}
}
diff --git a/packages/opencode/src/server/server.ts b/packages/opencode/src/server/server.ts
index f31b8ec44f5..6f32e854447 100644
--- a/packages/opencode/src/server/server.ts
+++ b/packages/opencode/src/server/server.ts
@@ -1121,6 +1121,8 @@ export namespace Server {
break
}
}
+ // Create compaction trigger, then loop processes it
+ // process() will route to the appropriate method (collapse or standard)
await SessionCompaction.create({
sessionID,
agent: currentAgent,
diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts
index 42bab2eb975..886ebbb161a 100644
--- a/packages/opencode/src/session/compaction.ts
+++ b/packages/opencode/src/session/compaction.ts
@@ -14,6 +14,8 @@ import { fn } from "@/util/fn"
import { Agent } from "@/agent/agent"
import { Plugin } from "@/plugin"
import { Config } from "@/config/config"
+import { Global } from "@/global"
+import path from "path"
export namespace SessionCompaction {
const log = Log.create({ service: "session.compaction" })
@@ -27,15 +29,89 @@ export namespace SessionCompaction {
),
}
+ // Default configuration values
+ export const DEFAULTS = {
+ method: "standard" as const,
+ trigger: 0.85, // Trigger at 85% of usable context to leave headroom
+ extractRatio: 0.65,
+ recentRatio: 0.15,
+ summaryMaxTokens: 10000, // Target token count for collapse summary
+ previousSummaries: 3, // Number of previous summaries to include in collapse
+ }
+
+ // Static portion of collapse prompt template for token estimation
+ const COLLAPSE_PROMPT_TEMPLATE = `You are creating a comprehensive context restoration document from a collapsed conversation segment. This document will serve as the foundation for continued work - it must preserve critical knowledge that would otherwise be lost.
+
+## Output Structure
+
+Create a detailed summary with the following sections:
+
+### 1. Current Task State
+### 2. Resolved Code & Lessons Learned
+### 3. User Directives
+### 4. Custom Utilities & Commands
+### 5. Design Decisions & Derived Requirements
+### 6. Technical Facts
+
+## Critical Rules
+
+- PRESERVE working code verbatim in fenced blocks
+- INCLUDE failed approaches with explanations
+- Be specific: exact paths, line numbers, function names, config values
+- Capture the "why" behind decisions, not just the "what"
+- User directives are sacred - never omit explicit user preferences
+
+## Extracted Context (to distill)
+## Recent Context (for reference)
+
+Generate the context restoration document now:`
+
+ /**
+ * Get the compaction method.
+ * Priority: TUI toggle (kv.json) > config file > default
+ */
+ export async function getMethod(): Promise<"standard" | "collapse"> {
+ const config = await Config.get()
+ const configMethod = config.compaction?.method
+
+ // Check TUI toggle override
+ try {
+ const file = Bun.file(path.join(Global.Path.state, "kv.json"))
+ if (await file.exists()) {
+ const kv = await file.json()
+ const toggle = kv["compaction_method"]
+ if (toggle === "standard" || toggle === "collapse") {
+ return toggle
+ }
+ }
+ } catch {
+ // Ignore KV read errors
+ }
+
+ return configMethod ?? DEFAULTS.method
+ }
+
export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
const config = await Config.get()
if (config.compaction?.auto === false) return false
const context = input.model.limit.context
if (context === 0) return false
- const count = input.tokens.input + input.tokens.cache.read + input.tokens.output
- const output = Math.min(input.model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) || SessionPrompt.OUTPUT_TOKEN_MAX
- const usable = context - output
- return count > usable
+
+ const count = input.tokens.input + input.tokens.cache.read + input.tokens.cache.write + input.tokens.output
+ const trigger = config.compaction?.trigger ?? DEFAULTS.trigger
+ const threshold = context * trigger
+ const isOver = count > threshold
+
+ log.debug("overflow check", {
+ tokens: input.tokens,
+ count,
+ context,
+ trigger,
+ threshold,
+ isOver,
+ })
+
+ return isOver
}
export const PRUNE_MINIMUM = 20_000
@@ -89,13 +165,37 @@ export namespace SessionCompaction {
}
}
+ /**
+ * Process compaction - routes to appropriate method based on config.
+ * This is called via the create() -> loop() -> process() flow.
+ */
export async function process(input: {
parentID: string
messages: MessageV2.WithParts[]
sessionID: string
abort: AbortSignal
auto: boolean
- }) {
+ }): Promise<"continue" | "stop"> {
+ const method = await getMethod()
+ log.info("compacting", { method })
+
+ if (method === "collapse") {
+ return processCollapse(input)
+ }
+ return processStandard(input)
+ }
+
+ /**
+ * Standard compaction: Summarizes entire conversation at end.
+ */
+ async function processStandard(input: {
+ parentID: string
+ messages: MessageV2.WithParts[]
+ sessionID: string
+ abort: AbortSignal
+ auto: boolean
+ }): Promise<"continue" | "stop"> {
+ log.debug("standard", { parentID: input.parentID })
const userMessage = input.messages.findLast((m) => m.info.id === input.parentID)!.info as MessageV2.User
const agent = await Agent.get("compaction")
const model = agent.model
@@ -192,6 +292,333 @@ export namespace SessionCompaction {
return "continue"
}
+ /**
+ * Collapse compaction: Extract oldest messages, distill with AI, insert summary at breakpoint.
+ * Messages before the breakpoint are filtered out by filterCompacted().
+ */
+ async function processCollapse(input: {
+ parentID: string
+ messages: MessageV2.WithParts[]
+ sessionID: string
+ abort: AbortSignal
+ auto: boolean
+ }): Promise<"continue" | "stop"> {
+ const config = await Config.get()
+ const extractRatio = config.compaction?.extractRatio ?? DEFAULTS.extractRatio
+ const recentRatio = config.compaction?.recentRatio ?? DEFAULTS.recentRatio
+ const summaryMaxTokens = config.compaction?.summaryMaxTokens ?? DEFAULTS.summaryMaxTokens
+ const previousSummariesLimit = config.compaction?.previousSummaries ?? DEFAULTS.previousSummaries
+
+ // Get the user message to determine which model we'll use
+ const originalUserMessage = input.messages.findLast((m) => m.info.id === input.parentID)!.info as MessageV2.User
+ const agent = await Agent.get("compaction")
+ const model = agent.model
+ ? await Provider.getModel(agent.model.providerID, agent.model.modelID)
+ : await Provider.getModel(originalUserMessage.model.providerID, originalUserMessage.model.modelID)
+
+ // Calculate token counts for messages first
+ const messageTokens: number[] = []
+ let totalTokens = 0
+ for (const msg of input.messages) {
+ const estimate = estimateMessageTokens(msg)
+ messageTokens.push(estimate)
+ totalTokens += estimate
+ }
+
+ // Calculate extraction targets
+ const extractTarget = Math.floor(totalTokens * extractRatio)
+ const recentTarget = Math.floor(totalTokens * recentRatio)
+
+ // Find split points
+ let extractedTokens = 0
+ let extractSplitIndex = 0
+ for (let i = 0; i < input.messages.length; i++) {
+ if (extractedTokens >= extractTarget) break
+ extractedTokens += messageTokens[i]
+ extractSplitIndex = i + 1
+ }
+
+ let recentTokens = 0
+ let recentSplitIndex = input.messages.length
+ for (let i = input.messages.length - 1; i >= 0; i--) {
+ if (recentTokens >= recentTarget) break
+ recentTokens += messageTokens[i]
+ recentSplitIndex = i
+ }
+
+ // Ensure recent split doesn't overlap with extract
+ if (recentSplitIndex <= extractSplitIndex) {
+ recentSplitIndex = extractSplitIndex
+ }
+
+ const extractedMessages = input.messages.slice(0, extractSplitIndex)
+ const recentReferenceMessages = input.messages.slice(recentSplitIndex)
+
+ log.debug("collapse split", {
+ totalTokens,
+ extractTarget,
+ extractedTokens,
+ extractedMessages: extractedMessages.length,
+ recentTarget,
+ recentTokens,
+ recentMessages: recentReferenceMessages.length,
+ })
+
+ if (extractedMessages.length === 0) {
+ log.info("collapse skipped", { reason: "no messages to extract" })
+ return "continue"
+ }
+
+ // Convert extracted messages to markdown for distillation
+ const markdownContent = messagesToMarkdown(extractedMessages)
+ const recentContext = messagesToMarkdown(recentReferenceMessages)
+
+ // Build base prompt (without previous summaries) to calculate token budget
+ const markdownTokens = Token.estimate(markdownContent)
+ const recentTokensEstimate = Token.estimate(recentContext)
+ const templateTokens = Token.estimate(COLLAPSE_PROMPT_TEMPLATE)
+ const basePromptTokens = markdownTokens + recentTokensEstimate + templateTokens
+ const contextLimit = model.limit.context
+ const outputReserve = SessionPrompt.OUTPUT_TOKEN_MAX
+ const previousSummaryBudget = Math.max(0, contextLimit - outputReserve - basePromptTokens)
+
+ // Fetch previous summaries that fit within budget
+ const previousSummaries = await getPreviousSummaries(input.sessionID, previousSummariesLimit, previousSummaryBudget)
+
+ // Get the last extracted message to determine breakpoint position
+ const lastExtractedMessage = extractedMessages[extractedMessages.length - 1]
+ const lastExtractedId = lastExtractedMessage.info.id
+
+ // Extract timestamp from the last extracted message ID
+ // Use createLike to handle both 6-byte and 7-byte ID formats
+ const breakpointTimestamp = lastExtractedMessage.info.time.created + 1
+
+ log.debug("collapse positioning", {
+ lastExtractedId,
+ breakpointTimestamp,
+ })
+
+ // Create the compaction user message at the breakpoint position
+ const compactionUserId = Identifier.createLike(lastExtractedId, "message", false, 1)
+ const compactionUserMsg = await Session.updateMessage({
+ id: compactionUserId,
+ role: "user",
+ model: originalUserMessage.model,
+ sessionID: input.sessionID,
+ agent: originalUserMessage.agent,
+ time: {
+ created: breakpointTimestamp,
+ },
+ })
+ await Session.updatePart({
+ id: Identifier.createLike(lastExtractedId, "part", false, 1),
+ messageID: compactionUserMsg.id,
+ sessionID: input.sessionID,
+ type: "compaction",
+ auto: input.auto,
+ })
+
+ // Create assistant summary message positioned right after the compaction user message
+ // Use compactionUserId as reference (not lastExtractedId) to ensure assistant sorts immediately after user
+ // This prevents other messages from being created with IDs that sort between user and assistant
+ const compactionAssistantId = Identifier.createLike(compactionUserId, "message", false, 1)
+ const msg = (await Session.updateMessage({
+ id: compactionAssistantId,
+ role: "assistant",
+ parentID: compactionUserMsg.id,
+ sessionID: input.sessionID,
+ mode: "compaction",
+ agent: "compaction",
+ summary: true,
+ path: {
+ cwd: Instance.directory,
+ root: Instance.worktree,
+ },
+ cost: 0,
+ tokens: {
+ output: 0,
+ input: 0,
+ reasoning: 0,
+ cache: { read: 0, write: 0 },
+ },
+ modelID: model.id,
+ providerID: model.providerID,
+ time: {
+ created: breakpointTimestamp + 1,
+ },
+ })) as MessageV2.Assistant
+
+ const processor = SessionProcessor.create({
+ assistantMessage: msg,
+ sessionID: input.sessionID,
+ model,
+ abort: input.abort,
+ })
+
+ // Allow plugins to inject context
+ const compacting = await Plugin.trigger(
+ "experimental.session.compacting",
+ { sessionID: input.sessionID },
+ { context: [], prompt: undefined },
+ )
+
+ // Build prompt sections - only include what we have
+ const sections: string[] = []
+
+ // Instructions
+ sections.push(`You are creating a comprehensive context restoration document. This document will serve as the foundation for continued work - it must preserve critical knowledge that would otherwise be lost.
+
+Create a detailed summary (target: approximately ${summaryMaxTokens} tokens) with these sections:
+1. Current Task State - what is being worked on, next steps, blockers
+2. Resolved Code & Lessons Learned - working code verbatim, failed approaches, insights
+3. User Directives - explicit preferences, style rules, things to always/never do
+4. Custom Utilities & Commands - scripts, aliases, debugging commands
+5. Design Decisions & Derived Requirements - architecture decisions, API contracts, patterns
+6. Technical Facts - file paths, function names, config values, environment details
+
+Critical rules:
+- PRESERVE working code verbatim in fenced blocks
+- INCLUDE failed approaches with explanations
+- Be specific with paths, line numbers, function names
+- Capture the "why" behind decisions
+- User directives are sacred - never omit them`)
+
+ // Previous summaries
+ if (previousSummaries.length > 0) {
+ sections.push(`
+IMPORTANT: Merge all information from these previous summaries into your new summary. Do not lose any historical context.
+
+${previousSummaries.map((summary, i) => `--- Summary ${i + 1} ---\n${summary}`).join("\n\n")}
+`)
+ }
+
+ // Extracted content
+ sections.push(`
+The following conversation content needs to be distilled into the summary:
+
+${markdownContent}
+`)
+
+ // Recent context
+ sections.push(`
+The following is recent context for reference (shows current state):
+
+${recentContext}
+`)
+
+ // Additional plugin context
+ if (compacting.context.length > 0) {
+ sections.push(`
+${compacting.context.join("\n\n")}
+`)
+ }
+
+ sections.push("Generate the context restoration document now.")
+
+ const collapsePrompt = sections.join("\n\n")
+
+ const result = await processor.process({
+ user: originalUserMessage,
+ agent,
+ abort: input.abort,
+ sessionID: input.sessionID,
+ tools: {},
+ system: [],
+ messages: [
+ {
+ role: "user",
+ content: [{ type: "text", text: collapsePrompt }],
+ },
+ ],
+ model,
+ })
+
+ // NOTE: We intentionally do NOT add a "Continue if you have next steps" message
+ // for collapse mode. The collapse summary is just context restoration - the loop
+ // should exit after the summary is generated so the user can continue naturally.
+
+ if (processor.message.error) return "stop"
+
+ // Update token count on the chronologically last assistant message
+ // so isOverflow() sees the correct post-collapse state.
+ const allMessages = await Session.messages({ sessionID: input.sessionID })
+ const lastAssistant = allMessages
+ .filter(
+ (m): m is MessageV2.WithParts & { info: MessageV2.Assistant } =>
+ m.info.role === "assistant" && m.info.id !== msg.id,
+ )
+ .sort((a, b) => b.info.time.created - a.info.time.created)[0]
+
+ if (lastAssistant) {
+ const originalTokens = { ...lastAssistant.info.tokens }
+ const collapseSummaryTokens = processor.message.tokens.output
+
+ const currentTotal =
+ lastAssistant.info.tokens.input +
+ lastAssistant.info.tokens.cache.read +
+ lastAssistant.info.tokens.cache.write +
+ lastAssistant.info.tokens.output
+
+ const newTotal = Math.max(0, currentTotal - extractedTokens + collapseSummaryTokens)
+
+ lastAssistant.info.tokens = {
+ input: 0,
+ output: lastAssistant.info.tokens.output,
+ reasoning: lastAssistant.info.tokens.reasoning,
+ cache: {
+ read: Math.max(0, newTotal - lastAssistant.info.tokens.output),
+ write: 0,
+ },
+ }
+ await Session.updateMessage(lastAssistant.info)
+
+ log.debug("tokens adjusted", {
+ extracted: extractedTokens,
+ summary: collapseSummaryTokens,
+ total: newTotal,
+ })
+ }
+
+ log.info("collapsed", {
+ messages: extractedMessages.length,
+ tokens: extractedTokens,
+ })
+
+ // Delete the original trigger message (created by create()) to prevent
+ // the loop from picking it up again as a pending compaction task.
+ // The trigger is the message at input.parentID - we've created a new
+ // compaction user message at the breakpoint position.
+ if (input.parentID !== compactionUserMsg.id) {
+ log.debug("cleanup trigger", { id: input.parentID })
+ // Delete parts first
+ const triggerMsg = input.messages.find((m) => m.info.id === input.parentID)
+ if (triggerMsg) {
+ for (const part of triggerMsg.parts) {
+ await Session.removePart({
+ sessionID: input.sessionID,
+ messageID: input.parentID,
+ partID: part.id,
+ })
+ }
+ }
+ await Session.removeMessage({
+ sessionID: input.sessionID,
+ messageID: input.parentID,
+ })
+ }
+
+ Bus.publish(Event.Compacted, { sessionID: input.sessionID })
+
+ // For auto-compaction: return "continue" so the loop processes the user's
+ // original message that triggered the overflow. The trigger message is deleted,
+ // so the loop will find the real user message and respond to it.
+ // For manual compaction: return "stop" - user explicitly requested compaction only.
+ if (input.auto) {
+ return "continue"
+ }
+ return "stop"
+ }
+
export const create = fn(
z.object({
sessionID: Identifier.schema("session"),
@@ -222,4 +649,107 @@ export namespace SessionCompaction {
})
},
)
+
+ /**
+ * Estimate tokens for a message (respects compaction state)
+ */
+ function estimateMessageTokens(msg: MessageV2.WithParts): number {
+ let tokens = 0
+ for (const part of msg.parts) {
+ if (part.type === "text") {
+ tokens += Token.estimate(part.text)
+ } else if (part.type === "tool" && part.state.status === "completed") {
+ // Skip compacted tool outputs
+ if (part.state.time.compacted) continue
+ tokens += Token.estimate(JSON.stringify(part.state.input))
+ tokens += Token.estimate(part.state.output)
+ }
+ }
+ return tokens
+ }
+
+ /**
+ * Convert messages to markdown format for distillation
+ */
+ function messagesToMarkdown(messages: MessageV2.WithParts[]): string {
+ const lines: string[] = []
+
+ for (const msg of messages) {
+ const role = msg.info.role === "user" ? "User" : "Assistant"
+ lines.push(`### ${role}`)
+ lines.push("")
+
+ for (const part of msg.parts) {
+ if (part.type === "text" && part.text) {
+ // Skip synthetic parts like "Continue if you have next steps"
+ if (part.synthetic) continue
+ lines.push(part.text)
+ lines.push("")
+ } else if (part.type === "tool" && part.state.status === "completed") {
+ // Skip compacted tool outputs
+ if (part.state.time.compacted) continue
+ lines.push(`**Tool: ${part.tool}**`)
+ lines.push("```json")
+ lines.push(JSON.stringify(part.state.input, null, 2))
+ lines.push("```")
+ if (part.state.output) {
+ lines.push("Output:")
+ lines.push("```")
+ lines.push(part.state.output.slice(0, 1000))
+ if (part.state.output.length > 1000) lines.push("... (truncated)")
+ lines.push("```")
+ }
+ lines.push("")
+ }
+ }
+ }
+
+ return lines.join("\n")
+ }
+
+ /**
+ * Extract summary text from a compaction summary message's parts
+ */
+ function extractSummaryText(msg: MessageV2.WithParts): string {
+ return msg.parts
+ .filter((p): p is MessageV2.TextPart => p.type === "text" && !p.synthetic)
+ .map((p) => p.text)
+ .join("\n")
+ }
+
+ /**
+ * Fetch previous compaction summaries from the session (unfiltered).
+ * Respects token budget to avoid overflowing context window.
+ */
+ async function getPreviousSummaries(sessionID: string, limit: number, tokenBudget: number): Promise {
+ const allMessages = await Session.messages({ sessionID })
+
+ const summaryMessages = allMessages
+ .filter(
+ (m): m is MessageV2.WithParts & { info: MessageV2.Assistant } =>
+ m.info.role === "assistant" &&
+ (m.info as MessageV2.Assistant).summary === true &&
+ (m.info as MessageV2.Assistant).finish !== undefined,
+ )
+ .sort((a, b) => a.info.time.created - b.info.time.created) // oldest first
+ .slice(-limit) // take the N most recent
+
+ // Include summaries only if they fit within token budget
+ // Start from most recent (end of array) since those are most relevant
+ const result: string[] = []
+ let tokensUsed = 0
+
+ for (let i = summaryMessages.length - 1; i >= 0; i--) {
+ const text = extractSummaryText(summaryMessages[i])
+ if (!text.trim()) continue
+
+ const estimate = Token.estimate(text)
+ if (tokensUsed + estimate > tokenBudget) break
+
+ result.unshift(text) // prepend to maintain chronological order
+ tokensUsed += estimate
+ }
+
+ return result
+ }
}
diff --git a/packages/opencode/src/session/index.ts b/packages/opencode/src/session/index.ts
index 4285223bc5c..c64d569235d 100644
--- a/packages/opencode/src/session/index.ts
+++ b/packages/opencode/src/session/index.ts
@@ -147,12 +147,19 @@ export namespace Session {
directory: Instance.directory,
})
const msgs = await messages({ sessionID: input.sessionID })
+ const idMap = new Map()
+
for (const msg of msgs) {
if (input.messageID && msg.info.id >= input.messageID) break
+ const newID = Identifier.ascending("message")
+ idMap.set(msg.info.id, newID)
+
+ const parentID = msg.info.role === "assistant" && msg.info.parentID ? idMap.get(msg.info.parentID) : undefined
const cloned = await updateMessage({
...msg.info,
sessionID: session.id,
- id: Identifier.ascending("message"),
+ id: newID,
+ ...(parentID && { parentID }),
})
for (const part of msg.parts) {
diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts
index bb78ae64ce6..47eeb3a649f 100644
--- a/packages/opencode/src/session/message-v2.ts
+++ b/packages/opencode/src/session/message-v2.ts
@@ -11,8 +11,11 @@ import { ProviderTransform } from "@/provider/transform"
import { STATUS_CODES } from "http"
import { iife } from "@/util/iife"
import { type SystemError } from "bun"
+import { Log } from "../util/log"
export namespace MessageV2 {
+ const log = Log.create({ service: "message-v2" })
+
export const OutputLengthError = NamedError.create("MessageOutputLengthError", z.object({}))
export const AbortedError = NamedError.create("MessageAbortedError", z.object({ message: z.string() }))
export const AuthError = NamedError.create(
@@ -577,17 +580,28 @@ export namespace MessageV2 {
export async function filterCompacted(stream: AsyncIterable) {
const result = [] as MessageV2.WithParts[]
const completed = new Set()
+
for await (const msg of stream) {
+ const hasCompactionPart = msg.parts.some((part) => part.type === "compaction")
+ const isAssistantSummary =
+ msg.info.role === "assistant" && (msg.info as Assistant).summary && (msg.info as Assistant).finish
+
result.push(msg)
- if (
- msg.info.role === "user" &&
- completed.has(msg.info.id) &&
- msg.parts.some((part) => part.type === "compaction")
- )
+
+ // Check if this is a compaction breakpoint
+ if (msg.info.role === "user" && completed.has(msg.info.id) && hasCompactionPart) {
+ log.debug("breakpoint", { id: msg.info.id })
break
- if (msg.info.role === "assistant" && msg.info.summary && msg.info.finish) completed.add(msg.info.parentID)
+ }
+
+ // If assistant with summary=true and finish, add parentID to completed set
+ if (isAssistantSummary) {
+ completed.add((msg.info as Assistant).parentID)
+ }
}
+
result.reverse()
+ log.debug("filtered", { count: result.length })
return result
}
diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts
index 6bf71ef3653..f5a6c1fac7c 100644
--- a/packages/opencode/src/session/prompt.ts
+++ b/packages/opencode/src/session/prompt.ts
@@ -244,6 +244,7 @@ export namespace SessionPrompt {
SessionStatus.set(sessionID, { type: "busy" })
log.info("loop", { step, sessionID })
if (abort.aborted) break
+
let msgs = await MessageV2.filterCompacted(MessageV2.stream(sessionID))
let lastUser: MessageV2.User | undefined
@@ -263,6 +264,12 @@ export namespace SessionPrompt {
}
}
+ log.debug("state", {
+ lastUser: lastUser?.id,
+ lastFinished: lastFinished?.id,
+ tasks: tasks.length,
+ })
+
if (!lastUser) throw new Error("No user message found in stream. This should never happen.")
if (
lastAssistant?.finish &&
@@ -445,6 +452,7 @@ export namespace SessionPrompt {
// pending compaction
if (task?.type === "compaction") {
+ log.debug("compaction task", { auto: task.auto })
const result = await SessionCompaction.process({
messages: msgs,
parentID: lastUser.id,
@@ -462,6 +470,7 @@ export namespace SessionPrompt {
lastFinished.summary !== true &&
(await SessionCompaction.isOverflow({ tokens: lastFinished.tokens, model }))
) {
+ log.info("overflow", { tokens: lastFinished.tokens })
await SessionCompaction.create({
sessionID,
agent: lastUser.agent,
@@ -471,7 +480,6 @@ export namespace SessionPrompt {
continue
}
- // normal processing
const agent = await Agent.get(lastUser.agent)
const maxSteps = agent.maxSteps ?? Infinity
const isLastStep = step >= maxSteps
@@ -528,6 +536,16 @@ export namespace SessionPrompt {
await Plugin.trigger("experimental.chat.messages.transform", {}, { messages: sessionMessages })
+ // Debug: log messages being sent to LLM
+ log.debug("llm messages", {
+ count: sessionMessages.length,
+ messageIds: sessionMessages.map((m) => m.info.id),
+ firstMessageId: sessionMessages[0]?.info.id,
+ hasCompactionSummary: sessionMessages.some(
+ (m) => m.info.role === "assistant" && (m.info as any).summary === true,
+ ),
+ })
+
const result = await processor.process({
user: lastUser,
agent,
@@ -552,12 +570,31 @@ export namespace SessionPrompt {
continue
}
SessionCompaction.prune({ sessionID })
+
+ // Check if there are queued requests - their user messages are already created
+ // and need processing. We need to grab them before defer() runs cancel().
+ const queued = state()[sessionID]?.callbacks ?? []
+ if (queued.length > 0) {
+ // Clear callbacks so cancel() doesn't reject them
+ state()[sessionID].callbacks = []
+ // Schedule re-entry after this function exits (and defer runs cancel)
+ // Use setImmediate to let defer() clear state first, then re-enter loop
+ setImmediate(async () => {
+ const result = await loop(sessionID)
+ for (const q of queued) {
+ q.resolve(result)
+ }
+ })
+ // Return last assistant for now - queued requests will get their real response
+ for await (const item of MessageV2.stream(sessionID)) {
+ if (item.info.role === "user") continue
+ return item
+ }
+ }
+
+ // No queued requests - return last assistant as before
for await (const item of MessageV2.stream(sessionID)) {
if (item.info.role === "user") continue
- const queued = state()[sessionID]?.callbacks ?? []
- for (const q of queued) {
- q.resolve(item)
- }
return item
}
throw new Error("Impossible")
diff --git a/packages/sdk/js/src/v2/gen/types.gen.ts b/packages/sdk/js/src/v2/gen/types.gen.ts
index 8b3bece004f..e2af8443f6a 100644
--- a/packages/sdk/js/src/v2/gen/types.gen.ts
+++ b/packages/sdk/js/src/v2/gen/types.gen.ts
@@ -1616,6 +1616,22 @@ export type Config = {
* Enable pruning of old tool outputs (default: true)
*/
prune?: boolean
+ /**
+ * Compaction method: 'standard' summarizes entire conversation, 'collapse' extracts oldest messages and creates summary at breakpoint (default: collapse)
+ */
+ method?: "standard" | "collapse"
+ /**
+ * Trigger compaction at this fraction of total context (default: 0.85 = 85%)
+ */
+ trigger?: number
+ /**
+ * For collapse mode: fraction of oldest tokens to extract and summarize (default: 0.65)
+ */
+ extractRatio?: number
+ /**
+ * For collapse mode: fraction of newest tokens to use as reference context (default: 0.15)
+ */
+ recentRatio?: number
}
experimental?: {
hook?: {
diff --git a/packages/sdk/openapi.json b/packages/sdk/openapi.json
index 4924a5bfac0..c693a643202 100644
--- a/packages/sdk/openapi.json
+++ b/packages/sdk/openapi.json
@@ -8684,6 +8684,29 @@
"prune": {
"description": "Enable pruning of old tool outputs (default: true)",
"type": "boolean"
+ },
+ "method": {
+ "description": "Compaction method: 'standard' summarizes entire conversation, 'collapse' extracts oldest messages and creates summary at breakpoint (default: collapse)",
+ "type": "string",
+ "enum": ["standard", "collapse"]
+ },
+ "trigger": {
+ "description": "Trigger compaction at this fraction of total context (default: 0.85 = 85%)",
+ "type": "number",
+ "minimum": 0,
+ "maximum": 1
+ },
+ "extractRatio": {
+ "description": "For collapse mode: fraction of oldest tokens to extract and summarize (default: 0.65)",
+ "type": "number",
+ "minimum": 0,
+ "maximum": 1
+ },
+ "recentRatio": {
+ "description": "For collapse mode: fraction of newest tokens to use as reference context (default: 0.15)",
+ "type": "number",
+ "minimum": 0,
+ "maximum": 1
}
}
},