diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
index 374645abb35..f530b7f7f42 100644
--- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
+++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx
@@ -128,6 +128,9 @@ export function Session() {
   const [userMessageMarkdown, setUserMessageMarkdown] = createSignal(kv.get("user_message_markdown", true))
   const [diffWrapMode, setDiffWrapMode] = createSignal<"word" | "none">("word")
   const [animationsEnabled, setAnimationsEnabled] = createSignal(kv.get("animations_enabled", true))
+  const [compactionMethod, setCompactionMethod] = createSignal<"standard" | "collapse">(
+    kv.get("compaction_method", sync.data.config.compaction?.method ?? "standard"),
+  )
 
   const wide = createMemo(() => dimensions().width > 120)
   const sidebarVisible = createMemo(() => {
@@ -395,6 +398,19 @@ export function Session() {
         dialog.clear()
       },
     },
+    {
+      title: compactionMethod() === "collapse" ? "Use standard compaction" : "Use collapse compaction",
+      value: "session.toggle.compaction_method",
+      category: "Session",
+      onSelect: (dialog) => {
+        setCompactionMethod((prev) => {
+          const next = prev === "standard" ? "collapse" : "standard"
+          kv.set("compaction_method", next)
+          return next
+        })
+        dialog.clear()
+      },
+    },
     {
       title: "Unshare session",
       value: "session.unshare",
diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx
index a9ed042d1bb..3efae65f602 100644
--- a/packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx
+++ b/packages/opencode/src/cli/cmd/tui/routes/session/sidebar.tsx
@@ -92,6 +92,12 @@ export function Sidebar(props: { sessionID: string }) {
               <text fg={theme.text}>
                 <b>Context</b>
               </text>
+              <text fg={theme.textMuted}>
+                compact{" "}
+                {sync.data.config.compaction?.auto === false
+                  ? "disabled"
+                  : kv.get("compaction_method", sync.data.config.compaction?.method ?? "standard")}
+              </text>
               <text fg={theme.textMuted}>{context()?.tokens ?? 0} tokens</text>
               <text fg={theme.textMuted}>{context()?.percentage ?? 0}% used</text>
               <text fg={theme.textMuted}>{cost()} spent</text>
diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts
index 012e3e12f53..60ebb3c624b 100644
--- a/packages/opencode/src/config/config.ts
+++ b/packages/opencode/src/config/config.ts
@@ -807,6 +807,42 @@ export namespace Config {
         .object({
           auto: z.boolean().optional().describe("Enable automatic compaction when context is full (default: true)"),
           prune: z.boolean().optional().describe("Enable pruning of old tool outputs (default: true)"),
+          method: z
+            .enum(["standard", "collapse"])
+            .optional()
+            .describe(
+              "Compaction method: 'standard' summarizes entire conversation, 'collapse' extracts oldest messages and creates summary at breakpoint (default: collapse)",
+            ),
+          trigger: z
+            .number()
+            .min(0)
+            .max(1)
+            .optional()
+            .describe("Trigger compaction at this fraction of total context (default: 0.85 = 85%)"),
+          extractRatio: z
+            .number()
+            .min(0)
+            .max(1)
+            .optional()
+            .describe("For collapse mode: fraction of oldest tokens to extract and summarize (default: 0.65)"),
+          recentRatio: z
+            .number()
+            .min(0)
+            .max(1)
+            .optional()
+            .describe("For collapse mode: fraction of newest tokens to use as reference context (default: 0.15)"),
+          summaryMaxTokens: z
+            .number()
+            .min(1000)
+            .max(50000)
+            .optional()
+            .describe("For collapse mode: target token count for the summary output (default: 10000)"),
+          previousSummaries: z
+            .number()
+            .min(0)
+            .max(10)
+            .optional()
+            .describe("For collapse mode: number of previous summaries to include for context merging (default: 3)"),
         })
         .optional(),
       experimental: z
diff --git a/packages/opencode/src/id/id.ts b/packages/opencode/src/id/id.ts
index ad6e22e1bee..6bc6356ced8 100644
--- a/packages/opencode/src/id/id.ts
+++ b/packages/opencode/src/id/id.ts
@@ -15,7 +15,11 @@ export namespace Identifier {
     return z.string().startsWith(prefixes[prefix])
   }
 
+  // Total ID length after prefix: 6 bytes hex (12 chars) + 14 random chars = 26 chars
+  // Note: 6-byte format truncates high byte but maintains backwards compatibility
+  // Use createLike() with a 7-byte reference ID when inserting at past timestamps
   const LENGTH = 26
+  const TIME_BYTES = 6
 
   // State for monotonic ID generation
   let lastTimestamp = 0
@@ -59,15 +63,140 @@ export namespace Identifier {
     }
     counter++
 
+    // Encode timestamp * 0x1000 + counter into 6 bytes (48 bits)
+    // Note: This truncates the high byte for modern timestamps, but all IDs
+    // created at "now" will have the same truncation, so they sort correctly.
+    // The truncation only matters when inserting at past timestamps (use createLike for that).
     let now = BigInt(currentTimestamp) * BigInt(0x1000) + BigInt(counter)
 
     now = descending ? ~now : now
 
-    const timeBytes = Buffer.alloc(6)
-    for (let i = 0; i < 6; i++) {
-      timeBytes[i] = Number((now >> BigInt(40 - 8 * i)) & BigInt(0xff))
+    const timeBytes = Buffer.alloc(TIME_BYTES)
+    for (let i = 0; i < TIME_BYTES; i++) {
+      timeBytes[i] = Number((now >> BigInt((TIME_BYTES - 1 - i) * 8)) & BigInt(0xff))
     }
 
-    return prefixes[prefix] + "_" + timeBytes.toString("hex") + randomBase62(LENGTH - 12)
+    return prefixes[prefix] + "_" + timeBytes.toString("hex") + randomBase62(LENGTH - TIME_BYTES * 2)
+  }
+
+  /**
+   * Detect the byte format (6 or 7) of an existing ID.
+   * 6-byte IDs: 12 hex chars + 14 random = 26 total after prefix
+   * 7-byte IDs: 14 hex chars + 12 random = 26 total after prefix
+   */
+  export function detectFormat(id: string): 6 | 7 {
+    const underscoreIndex = id.indexOf("_")
+    if (underscoreIndex === -1) return TIME_BYTES as 6 | 7
+
+    const afterPrefix = id.slice(underscoreIndex + 1)
+
+    // Check if first 14 chars are all valid hex (would indicate 7-byte format)
+    const first14 = afterPrefix.slice(0, 14)
+    const isValidHex14 = /^[0-9a-f]{14}$/i.test(first14)
+
+    if (isValidHex14) {
+      // Could be 7-byte format, verify by checking if it decodes to a valid timestamp
+      try {
+        const bigValue = BigInt("0x" + first14)
+        const ts = Number(bigValue / BigInt(0x1000))
+
+        // Check if this looks like a valid modern timestamp (after 2020, before 2100)
+        const year2020 = 1577836800000
+        const year2100 = 4102444800000
+        if (ts >= year2020 && ts < year2100) {
+          return 7
+        }
+      } catch {
+        // Not valid hex, fall through to 6-byte
+      }
+    }
+
+    // Otherwise assume 6-byte (old format)
+    return 6
+  }
+
+  /**
+   * Create an ID that sorts immediately after a reference ID.
+   *
+   * This works by extracting the raw encoded value from the reference ID and
+   * incrementing it, ensuring the new ID sorts correctly regardless of the
+   * byte format (6 or 7 bytes).
+   *
+   * @param referenceId - The ID to sort after
+   * @param prefix - The prefix for the new ID (e.g., "message", "part")
+   * @param descending - Whether to use descending order (usually false)
+   * @param offsetMs - Milliseconds to add to the reference timestamp (default 1)
+   */
+  export function createLike(
+    referenceId: string,
+    prefix: keyof typeof prefixes,
+    descending: boolean,
+    offsetMs: number = 1,
+  ): string {
+    const format = detectFormat(referenceId)
+    const underscoreIndex = referenceId.indexOf("_")
+    if (underscoreIndex === -1) {
+      throw new Error(`Invalid reference ID: ${referenceId}`)
+    }
+
+    // Extract the hex timestamp portion from the reference ID
+    const hexPart = referenceId.slice(underscoreIndex + 1, underscoreIndex + 1 + format * 2)
+    const referenceValue = BigInt("0x" + hexPart)
+
+    // Add offset (in the encoded space: offsetMs * 0x1000)
+    // This ensures the new ID sorts after the reference regardless of truncation
+    let newValue = referenceValue + BigInt(offsetMs) * BigInt(0x1000) + BigInt(1) // +1 for counter
+
+    newValue = descending ? ~newValue : newValue
+
+    const timeBytes = Buffer.alloc(format)
+    for (let i = 0; i < format; i++) {
+      timeBytes[i] = Number((newValue >> BigInt((format - 1 - i) * 8)) & BigInt(0xff))
+    }
+
+    const randomLength = LENGTH - format * 2
+    return prefixes[prefix] + "_" + timeBytes.toString("hex") + randomBase62(randomLength)
+  }
+
+  /**
+   * Decode the timestamp from an ID.
+   * Handles both old 6-byte IDs and new 7-byte IDs.
+   */
+  export function decodeTimestamp(id: string): { timestamp: number; counter: number } | null {
+    const underscoreIndex = id.indexOf("_")
+    if (underscoreIndex === -1) return null
+
+    const hexPart = id.slice(underscoreIndex + 1)
+
+    // Determine if this is an old 6-byte ID or new 7-byte ID
+    // Old IDs: 12 hex chars for time + 14 random = 26 total after prefix
+    // New IDs: 14 hex chars for time + 12 random = 26 total after prefix
+    // We can detect by checking if the first 14 chars decode to a reasonable timestamp
+
+    // Try 7-byte (new format) first
+    if (hexPart.length >= 14) {
+      const hex7 = hexPart.slice(0, 14)
+      const bigValue7 = BigInt("0x" + hex7)
+      const ts7 = Number(bigValue7 / BigInt(0x1000))
+      const counter7 = Number(bigValue7 % BigInt(0x1000))
+
+      // Check if this looks like a valid modern timestamp (after 2020, before 2100)
+      const year2020 = 1577836800000
+      const year2100 = 4102444800000
+      if (ts7 >= year2020 && ts7 < year2100) {
+        return { timestamp: ts7, counter: counter7 }
+      }
+    }
+
+    // Try 6-byte (old format)
+    if (hexPart.length >= 12) {
+      const hex6 = hexPart.slice(0, 12)
+      const bigValue6 = BigInt("0x" + hex6)
+      const ts6 = Number(bigValue6 / BigInt(0x1000))
+      const counter6 = Number(bigValue6 % BigInt(0x1000))
+      return { timestamp: ts6, counter: counter6 }
+    }
+
+    return null
   }
 }
diff --git a/packages/opencode/src/server/server.ts b/packages/opencode/src/server/server.ts
index f31b8ec44f5..6f32e854447 100644
--- a/packages/opencode/src/server/server.ts
+++ b/packages/opencode/src/server/server.ts
@@ -1121,6 +1121,8 @@ export namespace Server {
               break
             }
           }
+          // Create compaction trigger, then loop processes it
+          // process() will route to the appropriate method (collapse or standard)
           await SessionCompaction.create({
             sessionID,
             agent: currentAgent,
diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts
index 42bab2eb975..886ebbb161a 100644
--- a/packages/opencode/src/session/compaction.ts
+++ b/packages/opencode/src/session/compaction.ts
@@ -14,6 +14,8 @@ import { fn } from "@/util/fn"
 import { Agent } from "@/agent/agent"
 import { Plugin } from "@/plugin"
 import { Config } from "@/config/config"
+import { Global } from "@/global"
+import path from "path"
 
 export namespace SessionCompaction {
   const log = Log.create({ service: "session.compaction" })
@@ -27,15 +29,89 @@ export namespace SessionCompaction {
     ),
   }
 
+  // Default configuration values
+  export const DEFAULTS = {
+    method: "standard" as const,
+    trigger: 0.85, // Trigger at 85% of usable context to leave headroom
+    extractRatio: 0.65,
+    recentRatio: 0.15,
+    summaryMaxTokens: 10000, // Target token count for collapse summary
+    previousSummaries: 3, // Number of previous summaries to include in collapse
+  }
+
+  // Static portion of collapse prompt template for token estimation
+  const COLLAPSE_PROMPT_TEMPLATE = `You are creating a comprehensive context restoration document from a collapsed conversation segment. This document will serve as the foundation for continued work - it must preserve critical knowledge that would otherwise be lost.
+
+## Output Structure
+
+Create a detailed summary with the following sections:
+
+### 1. Current Task State
+### 2. Resolved Code & Lessons Learned
+### 3. User Directives
+### 4. Custom Utilities & Commands
+### 5. Design Decisions & Derived Requirements
+### 6. Technical Facts
+
+## Critical Rules
+
+- PRESERVE working code verbatim in fenced blocks
+- INCLUDE failed approaches with explanations
+- Be specific: exact paths, line numbers, function names, config values
+- Capture the "why" behind decisions, not just the "what"
+- User directives are sacred - never omit explicit user preferences
+
+## Extracted Context (to distill)
+## Recent Context (for reference)
+
+Generate the context restoration document now:`
+
+  /**
+   * Get the compaction method.
+   * Priority: TUI toggle (kv.json) > config file > default
+   */
+  export async function getMethod(): Promise<"standard" | "collapse"> {
+    const config = await Config.get()
+    const configMethod = config.compaction?.method
+
+    // Check TUI toggle override
+    try {
+      const file = Bun.file(path.join(Global.Path.state, "kv.json"))
+      if (await file.exists()) {
+        const kv = await file.json()
+        const toggle = kv["compaction_method"]
+        if (toggle === "standard" || toggle === "collapse") {
+          return toggle
+        }
+      }
+    } catch {
+      // Ignore KV read errors
+    }
+
+    return configMethod ?? DEFAULTS.method
+  }
+
   export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
     const config = await Config.get()
     if (config.compaction?.auto === false) return false
     const context = input.model.limit.context
     if (context === 0) return false
-    const count = input.tokens.input + input.tokens.cache.read + input.tokens.output
-    const output = Math.min(input.model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) || SessionPrompt.OUTPUT_TOKEN_MAX
-    const usable = context - output
-    return count > usable
+
+    const count = input.tokens.input + input.tokens.cache.read + input.tokens.cache.write + input.tokens.output
+    const trigger = config.compaction?.trigger ?? DEFAULTS.trigger
+    const threshold = context * trigger
+    const isOver = count > threshold
+
+    log.debug("overflow check", {
+      tokens: input.tokens,
+      count,
+      context,
+      trigger,
+      threshold,
+      isOver,
+    })
+
+    return isOver
   }
 
   export const PRUNE_MINIMUM = 20_000
@@ -89,13 +165,37 @@ export namespace SessionCompaction {
     }
   }
 
+  /**
+   * Process compaction - routes to appropriate method based on config.
+   * This is called via the create() -> loop() -> process() flow.
+   */
   export async function process(input: {
     parentID: string
     messages: MessageV2.WithParts[]
     sessionID: string
     abort: AbortSignal
     auto: boolean
-  }) {
+  }): Promise<"continue" | "stop"> {
+    const method = await getMethod()
+    log.info("compacting", { method })
+
+    if (method === "collapse") {
+      return processCollapse(input)
+    }
+    return processStandard(input)
+  }
+
+  /**
+   * Standard compaction: Summarizes entire conversation at end.
+   */
+  async function processStandard(input: {
+    parentID: string
+    messages: MessageV2.WithParts[]
+    sessionID: string
+    abort: AbortSignal
+    auto: boolean
+  }): Promise<"continue" | "stop"> {
+    log.debug("standard", { parentID: input.parentID })
     const userMessage = input.messages.findLast((m) => m.info.id === input.parentID)!.info as MessageV2.User
     const agent = await Agent.get("compaction")
     const model = agent.model
@@ -192,6 +292,333 @@ export namespace SessionCompaction {
     return "continue"
   }
 
+  /**
+   * Collapse compaction: Extract oldest messages, distill with AI, insert summary at breakpoint.
+   * Messages before the breakpoint are filtered out by filterCompacted().
+   */
+  async function processCollapse(input: {
+    parentID: string
+    messages: MessageV2.WithParts[]
+    sessionID: string
+    abort: AbortSignal
+    auto: boolean
+  }): Promise<"continue" | "stop"> {
+    const config = await Config.get()
+    const extractRatio = config.compaction?.extractRatio ?? DEFAULTS.extractRatio
+    const recentRatio = config.compaction?.recentRatio ?? DEFAULTS.recentRatio
+    const summaryMaxTokens = config.compaction?.summaryMaxTokens ?? DEFAULTS.summaryMaxTokens
+    const previousSummariesLimit = config.compaction?.previousSummaries ?? DEFAULTS.previousSummaries
+
+    // Get the user message to determine which model we'll use
+    const originalUserMessage = input.messages.findLast((m) => m.info.id === input.parentID)!.info as MessageV2.User
+    const agent = await Agent.get("compaction")
+    const model = agent.model
+      ? await Provider.getModel(agent.model.providerID, agent.model.modelID)
+      : await Provider.getModel(originalUserMessage.model.providerID, originalUserMessage.model.modelID)
+
+    // Calculate token counts for messages first
+    const messageTokens: number[] = []
+    let totalTokens = 0
+    for (const msg of input.messages) {
+      const estimate = estimateMessageTokens(msg)
+      messageTokens.push(estimate)
+      totalTokens += estimate
+    }
+
+    // Calculate extraction targets
+    const extractTarget = Math.floor(totalTokens * extractRatio)
+    const recentTarget = Math.floor(totalTokens * recentRatio)
+
+    // Find split points
+    let extractedTokens = 0
+    let extractSplitIndex = 0
+    for (let i = 0; i < input.messages.length; i++) {
+      if (extractedTokens >= extractTarget) break
+      extractedTokens += messageTokens[i]
+      extractSplitIndex = i + 1
+    }
+
+    let recentTokens = 0
+    let recentSplitIndex = input.messages.length
+    for (let i = input.messages.length - 1; i >= 0; i--) {
+      if (recentTokens >= recentTarget) break
+      recentTokens += messageTokens[i]
+      recentSplitIndex = i
+    }
+
+    // Ensure recent split doesn't overlap with extract
+    if (recentSplitIndex <= extractSplitIndex) {
+      recentSplitIndex = extractSplitIndex
+    }
+
+    const extractedMessages = input.messages.slice(0, extractSplitIndex)
+    const recentReferenceMessages = input.messages.slice(recentSplitIndex)
+
+    log.debug("collapse split", {
+      totalTokens,
+      extractTarget,
+      extractedTokens,
+      extractedMessages: extractedMessages.length,
+      recentTarget,
+      recentTokens,
+      recentMessages: recentReferenceMessages.length,
+    })
+
+    if (extractedMessages.length === 0) {
+      log.info("collapse skipped", { reason: "no messages to extract" })
+      return "continue"
+    }
+
+    // Convert extracted messages to markdown for distillation
+    const markdownContent = messagesToMarkdown(extractedMessages)
+    const recentContext = messagesToMarkdown(recentReferenceMessages)
+
+    // Build base prompt (without previous summaries) to calculate token budget
+    const markdownTokens = Token.estimate(markdownContent)
+    const recentTokensEstimate = Token.estimate(recentContext)
+    const templateTokens = Token.estimate(COLLAPSE_PROMPT_TEMPLATE)
+    const basePromptTokens = markdownTokens + recentTokensEstimate + templateTokens
+    const contextLimit = model.limit.context
+    const outputReserve = SessionPrompt.OUTPUT_TOKEN_MAX
+    const previousSummaryBudget = Math.max(0, contextLimit - outputReserve - basePromptTokens)
+
+    // Fetch previous summaries that fit within budget
+    const previousSummaries = await getPreviousSummaries(input.sessionID, previousSummariesLimit, previousSummaryBudget)
+
+    // Get the last extracted message to determine breakpoint position
+    const lastExtractedMessage = extractedMessages[extractedMessages.length - 1]
+    const lastExtractedId = lastExtractedMessage.info.id
+
+    // Extract timestamp from the last extracted message ID
+    // Use createLike to handle both 6-byte and 7-byte ID formats
+    const breakpointTimestamp = lastExtractedMessage.info.time.created + 1
+
+    log.debug("collapse positioning", {
+      lastExtractedId,
+      breakpointTimestamp,
+    })
+
+    // Create the compaction user message at the breakpoint position
+    const compactionUserId = Identifier.createLike(lastExtractedId, "message", false, 1)
+    const compactionUserMsg = await Session.updateMessage({
+      id: compactionUserId,
+      role: "user",
+      model: originalUserMessage.model,
+      sessionID: input.sessionID,
+      agent: originalUserMessage.agent,
+      time: {
+        created: breakpointTimestamp,
+      },
+    })
+    await Session.updatePart({
+      id: Identifier.createLike(lastExtractedId, "part", false, 1),
+      messageID: compactionUserMsg.id,
+      sessionID: input.sessionID,
+      type: "compaction",
+      auto: input.auto,
+    })
+
+    // Create assistant summary message positioned right after the compaction user message
+    // Use compactionUserId as reference (not lastExtractedId) to ensure assistant sorts immediately after user
+    // This prevents other messages from being created with IDs that sort between user and assistant
+    const compactionAssistantId = Identifier.createLike(compactionUserId, "message", false, 1)
+    const msg = (await Session.updateMessage({
+      id: compactionAssistantId,
+      role: "assistant",
+      parentID: compactionUserMsg.id,
+      sessionID: input.sessionID,
+      mode: "compaction",
+      agent: "compaction",
+      summary: true,
+      path: {
+        cwd: Instance.directory,
+        root: Instance.worktree,
+      },
+      cost: 0,
+      tokens: {
+        output: 0,
+        input: 0,
+        reasoning: 0,
+        cache: { read: 0, write: 0 },
+      },
+      modelID: model.id,
+      providerID: model.providerID,
+      time: {
+        created: breakpointTimestamp + 1,
+      },
+    })) as MessageV2.Assistant
+
+    const processor = SessionProcessor.create({
+      assistantMessage: msg,
+      sessionID: input.sessionID,
+      model,
+      abort: input.abort,
+    })
+
+    // Allow plugins to inject context
+    const compacting = await Plugin.trigger(
+      "experimental.session.compacting",
+      { sessionID: input.sessionID },
+      { context: [], prompt: undefined },
+    )
+
+    // Build prompt sections - only include what we have
+    const sections: string[] = []
+
+    // Instructions
+    sections.push(`You are creating a comprehensive context restoration document. This document will serve as the foundation for continued work - it must preserve critical knowledge that would otherwise be lost.
+
+Create a detailed summary (target: approximately ${summaryMaxTokens} tokens) with these sections:
+1. Current Task State - what is being worked on, next steps, blockers
+2. Resolved Code & Lessons Learned - working code verbatim, failed approaches, insights
+3. User Directives - explicit preferences, style rules, things to always/never do
+4. Custom Utilities & Commands - scripts, aliases, debugging commands
+5. Design Decisions & Derived Requirements - architecture decisions, API contracts, patterns
+6. Technical Facts - file paths, function names, config values, environment details
+
+Critical rules:
+- PRESERVE working code verbatim in fenced blocks
+- INCLUDE failed approaches with explanations
+- Be specific with paths, line numbers, function names
+- Capture the "why" behind decisions
+- User directives are sacred - never omit them`)
+
+    // Previous summaries
+    if (previousSummaries.length > 0) {
+      sections.push(`<previous_summaries>
+IMPORTANT: Merge all information from these previous summaries into your new summary. Do not lose any historical context.
+
+${previousSummaries.map((summary, i) => `--- Summary ${i + 1} ---\n${summary}`).join("\n\n")}
+</previous_summaries>`)
+    }
+
+    // Extracted content
+    sections.push(`<extracted_context>
+The following conversation content needs to be distilled into the summary:
+
+${markdownContent}
+</extracted_context>`)
+
+    // Recent context
+    sections.push(`<recent_context>
+The following is recent context for reference (shows current state):
+
+${recentContext}
+</recent_context>`)
+
+    // Additional plugin context
+    if (compacting.context.length > 0) {
+      sections.push(`<additional_context>
+${compacting.context.join("\n\n")}
+</additional_context>`)
+    }
+
+    sections.push("Generate the context restoration document now.")
+
+    const collapsePrompt = sections.join("\n\n")
+
+    const result = await processor.process({
+      user: originalUserMessage,
+      agent,
+      abort: input.abort,
+      sessionID: input.sessionID,
+      tools: {},
+      system: [],
+      messages: [
+        {
+          role: "user",
+          content: [{ type: "text", text: collapsePrompt }],
+        },
+      ],
+      model,
+    })
+
+    // NOTE: We intentionally do NOT add a "Continue if you have next steps" message
+    // for collapse mode. The collapse summary is just context restoration - the loop
+    // should exit after the summary is generated so the user can continue naturally.
+
+    if (processor.message.error) return "stop"
+
+    // Update token count on the chronologically last assistant message
+    // so isOverflow() sees the correct post-collapse state.
+    const allMessages = await Session.messages({ sessionID: input.sessionID })
+    const lastAssistant = allMessages
+      .filter(
+        (m): m is MessageV2.WithParts & { info: MessageV2.Assistant } =>
+          m.info.role === "assistant" && m.info.id !== msg.id,
+      )
+      .sort((a, b) => b.info.time.created - a.info.time.created)[0]
+
+    if (lastAssistant) {
+      const originalTokens = { ...lastAssistant.info.tokens }
+      const collapseSummaryTokens = processor.message.tokens.output
+
+      const currentTotal =
+        lastAssistant.info.tokens.input +
+        lastAssistant.info.tokens.cache.read +
+        lastAssistant.info.tokens.cache.write +
+        lastAssistant.info.tokens.output
+
+      const newTotal = Math.max(0, currentTotal - extractedTokens + collapseSummaryTokens)
+
+      lastAssistant.info.tokens = {
+        input: 0,
+        output: lastAssistant.info.tokens.output,
+        reasoning: lastAssistant.info.tokens.reasoning,
+        cache: {
+          read: Math.max(0, newTotal - lastAssistant.info.tokens.output),
+          write: 0,
+        },
+      }
+      await Session.updateMessage(lastAssistant.info)
+
+      log.debug("tokens adjusted", {
+        extracted: extractedTokens,
+        summary: collapseSummaryTokens,
+        total: newTotal,
+      })
+    }
+
+    log.info("collapsed", {
+      messages: extractedMessages.length,
+      tokens: extractedTokens,
+    })
+
+    // Delete the original trigger message (created by create()) to prevent
+    // the loop from picking it up again as a pending compaction task.
+    // The trigger is the message at input.parentID - we've created a new
+    // compaction user message at the breakpoint position.
+    if (input.parentID !== compactionUserMsg.id) {
+      log.debug("cleanup trigger", { id: input.parentID })
+      // Delete parts first
+      const triggerMsg = input.messages.find((m) => m.info.id === input.parentID)
+      if (triggerMsg) {
+        for (const part of triggerMsg.parts) {
+          await Session.removePart({
+            sessionID: input.sessionID,
+            messageID: input.parentID,
+            partID: part.id,
+          })
+        }
+      }
+      await Session.removeMessage({
+        sessionID: input.sessionID,
+        messageID: input.parentID,
+      })
+    }
+
+    Bus.publish(Event.Compacted, { sessionID: input.sessionID })
+
+    // For auto-compaction: return "continue" so the loop processes the user's
+    // original message that triggered the overflow. The trigger message is deleted,
+    // so the loop will find the real user message and respond to it.
+    // For manual compaction: return "stop" - user explicitly requested compaction only.
+    if (input.auto) {
+      return "continue"
+    }
+    return "stop"
+  }
+
   export const create = fn(
     z.object({
       sessionID: Identifier.schema("session"),
@@ -222,4 +649,107 @@ export namespace SessionCompaction {
       })
     },
   )
+
+  /**
+   * Estimate tokens for a message (respects compaction state)
+   */
+  function estimateMessageTokens(msg: MessageV2.WithParts): number {
+    let tokens = 0
+    for (const part of msg.parts) {
+      if (part.type === "text") {
+        tokens += Token.estimate(part.text)
+      } else if (part.type === "tool" && part.state.status === "completed") {
+        // Skip compacted tool outputs
+        if (part.state.time.compacted) continue
+        tokens += Token.estimate(JSON.stringify(part.state.input))
+        tokens += Token.estimate(part.state.output)
+      }
+    }
+    return tokens
+  }
+
+  /**
+   * Convert messages to markdown format for distillation
+   */
+  function messagesToMarkdown(messages: MessageV2.WithParts[]): string {
+    const lines: string[] = []
+
+    for (const msg of messages) {
+      const role = msg.info.role === "user" ? "User" : "Assistant"
+      lines.push(`### ${role}`)
+      lines.push("")
+
+      for (const part of msg.parts) {
+        if (part.type === "text" && part.text) {
+          // Skip synthetic parts like "Continue if you have next steps"
+          if (part.synthetic) continue
+          lines.push(part.text)
+          lines.push("")
+        } else if (part.type === "tool" && part.state.status === "completed") {
+          // Skip compacted tool outputs
+          if (part.state.time.compacted) continue
+          lines.push(`**Tool: ${part.tool}**`)
+          lines.push("```json")
+          lines.push(JSON.stringify(part.state.input, null, 2))
+          lines.push("```")
+          if (part.state.output) {
+            lines.push("Output:")
+            lines.push("```")
+            lines.push(part.state.output.slice(0, 1000))
+            if (part.state.output.length > 1000) lines.push("... (truncated)")
+            lines.push("```")
+          }
+          lines.push("")
+        }
+      }
+    }
+
+    return lines.join("\n")
+  }
+
+  /**
+   * Extract summary text from a compaction summary message's parts
+   */
+  function extractSummaryText(msg: MessageV2.WithParts): string {
+    return msg.parts
+      .filter((p): p is MessageV2.TextPart => p.type === "text" && !p.synthetic)
+      .map((p) => p.text)
+      .join("\n")
+  }
+
+  /**
+   * Fetch previous compaction summaries from the session (unfiltered).
+   * Respects token budget to avoid overflowing context window.
+   */
+  async function getPreviousSummaries(sessionID: string, limit: number, tokenBudget: number): Promise<string[]> {
+    const allMessages = await Session.messages({ sessionID })
+
+    const summaryMessages = allMessages
+      .filter(
+        (m): m is MessageV2.WithParts & { info: MessageV2.Assistant } =>
+          m.info.role === "assistant" &&
+          (m.info as MessageV2.Assistant).summary === true &&
+          (m.info as MessageV2.Assistant).finish !== undefined,
+      )
+      .sort((a, b) => a.info.time.created - b.info.time.created) // oldest first
+      .slice(-limit) // take the N most recent
+
+    // Include summaries only if they fit within token budget
+    // Start from most recent (end of array) since those are most relevant
+    const result: string[] = []
+    let tokensUsed = 0
+
+    for (let i = summaryMessages.length - 1; i >= 0; i--) {
+      const text = extractSummaryText(summaryMessages[i])
+      if (!text.trim()) continue
+
+      const estimate = Token.estimate(text)
+      if (tokensUsed + estimate > tokenBudget) break
+
+      result.unshift(text) // prepend to maintain chronological order
+      tokensUsed += estimate
+    }
+
+    return result
+  }
 }
diff --git a/packages/opencode/src/session/index.ts b/packages/opencode/src/session/index.ts
index 4285223bc5c..c64d569235d 100644
--- a/packages/opencode/src/session/index.ts
+++ b/packages/opencode/src/session/index.ts
@@ -147,12 +147,19 @@ export namespace Session {
         directory: Instance.directory,
       })
       const msgs = await messages({ sessionID: input.sessionID })
+      const idMap = new Map<string, string>()
+
       for (const msg of msgs) {
         if (input.messageID && msg.info.id >= input.messageID) break
+        const newID = Identifier.ascending("message")
+        idMap.set(msg.info.id, newID)
+
+        const parentID = msg.info.role === "assistant" && msg.info.parentID ? idMap.get(msg.info.parentID) : undefined
         const cloned = await updateMessage({
           ...msg.info,
           sessionID: session.id,
-          id: Identifier.ascending("message"),
+          id: newID,
+          ...(parentID && { parentID }),
         })
 
         for (const part of msg.parts) {
diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts
index bb78ae64ce6..47eeb3a649f 100644
--- a/packages/opencode/src/session/message-v2.ts
+++ b/packages/opencode/src/session/message-v2.ts
@@ -11,8 +11,11 @@ import { ProviderTransform } from "@/provider/transform"
 import { STATUS_CODES } from "http"
 import { iife } from "@/util/iife"
 import { type SystemError } from "bun"
+import { Log } from "../util/log"
 
 export namespace MessageV2 {
+  const log = Log.create({ service: "message-v2" })
+
   export const OutputLengthError = NamedError.create("MessageOutputLengthError", z.object({}))
   export const AbortedError = NamedError.create("MessageAbortedError", z.object({ message: z.string() }))
   export const AuthError = NamedError.create(
@@ -577,17 +580,28 @@ export namespace MessageV2 {
   export async function filterCompacted(stream: AsyncIterable<MessageV2.WithParts>) {
     const result = [] as MessageV2.WithParts[]
     const completed = new Set<string>()
+
     for await (const msg of stream) {
+      const hasCompactionPart = msg.parts.some((part) => part.type === "compaction")
+      const isAssistantSummary =
+        msg.info.role === "assistant" && (msg.info as Assistant).summary && (msg.info as Assistant).finish
+
       result.push(msg)
-      if (
-        msg.info.role === "user" &&
-        completed.has(msg.info.id) &&
-        msg.parts.some((part) => part.type === "compaction")
-      )
+
+      // Check if this is a compaction breakpoint
+      if (msg.info.role === "user" && completed.has(msg.info.id) && hasCompactionPart) {
+        log.debug("breakpoint", { id: msg.info.id })
         break
-      if (msg.info.role === "assistant" && msg.info.summary && msg.info.finish) completed.add(msg.info.parentID)
+      }
+
+      // If assistant with summary=true and finish, add parentID to completed set
+      if (isAssistantSummary) {
+        completed.add((msg.info as Assistant).parentID)
+      }
     }
+
     result.reverse()
+    log.debug("filtered", { count: result.length })
     return result
   }
 
diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts
index 6bf71ef3653..f5a6c1fac7c 100644
--- a/packages/opencode/src/session/prompt.ts
+++ b/packages/opencode/src/session/prompt.ts
@@ -244,6 +244,7 @@ export namespace SessionPrompt {
       SessionStatus.set(sessionID, { type: "busy" })
       log.info("loop", { step, sessionID })
       if (abort.aborted) break
+
       let msgs = await MessageV2.filterCompacted(MessageV2.stream(sessionID))
 
       let lastUser: MessageV2.User | undefined
@@ -263,6 +264,12 @@ export namespace SessionPrompt {
         }
       }
 
+      log.debug("state", {
+        lastUser: lastUser?.id,
+        lastFinished: lastFinished?.id,
+        tasks: tasks.length,
+      })
+
       if (!lastUser) throw new Error("No user message found in stream. This should never happen.")
       if (
         lastAssistant?.finish &&
@@ -445,6 +452,7 @@ export namespace SessionPrompt {
 
       // pending compaction
       if (task?.type === "compaction") {
+        log.debug("compaction task", { auto: task.auto })
         const result = await SessionCompaction.process({
           messages: msgs,
           parentID: lastUser.id,
@@ -462,6 +470,7 @@ export namespace SessionPrompt {
         lastFinished.summary !== true &&
         (await SessionCompaction.isOverflow({ tokens: lastFinished.tokens, model }))
       ) {
+        log.info("overflow", { tokens: lastFinished.tokens })
         await SessionCompaction.create({
           sessionID,
           agent: lastUser.agent,
@@ -471,7 +480,6 @@ export namespace SessionPrompt {
         continue
       }
 
-      // normal processing
       const agent = await Agent.get(lastUser.agent)
       const maxSteps = agent.maxSteps ?? Infinity
       const isLastStep = step >= maxSteps
@@ -528,6 +536,16 @@ export namespace SessionPrompt {
 
       await Plugin.trigger("experimental.chat.messages.transform", {}, { messages: sessionMessages })
 
+      // Debug: log messages being sent to LLM
+      log.debug("llm messages", {
+        count: sessionMessages.length,
+        messageIds: sessionMessages.map((m) => m.info.id),
+        firstMessageId: sessionMessages[0]?.info.id,
+        hasCompactionSummary: sessionMessages.some(
+          (m) => m.info.role === "assistant" && (m.info as any).summary === true,
+        ),
+      })
+
       const result = await processor.process({
         user: lastUser,
         agent,
@@ -552,12 +570,31 @@ export namespace SessionPrompt {
       continue
     }
     SessionCompaction.prune({ sessionID })
+
+    // Check if there are queued requests - their user messages are already created
+    // and need processing. We need to grab them before defer() runs cancel().
+    const queued = state()[sessionID]?.callbacks ?? []
+    if (queued.length > 0) {
+      // Clear callbacks so cancel() doesn't reject them
+      state()[sessionID].callbacks = []
+      // Schedule re-entry after this function exits (and defer runs cancel)
+      // Use setImmediate to let defer() clear state first, then re-enter loop
+      setImmediate(async () => {
+        const result = await loop(sessionID)
+        for (const q of queued) {
+          q.resolve(result)
+        }
+      })
+      // Return last assistant for now - queued requests will get their real response
+      for await (const item of MessageV2.stream(sessionID)) {
+        if (item.info.role === "user") continue
+        return item
+      }
+    }
+
+    // No queued requests - return last assistant as before
     for await (const item of MessageV2.stream(sessionID)) {
       if (item.info.role === "user") continue
-      const queued = state()[sessionID]?.callbacks ?? []
-      for (const q of queued) {
-        q.resolve(item)
-      }
       return item
     }
     throw new Error("Impossible")
diff --git a/packages/sdk/js/src/v2/gen/types.gen.ts b/packages/sdk/js/src/v2/gen/types.gen.ts
index 8b3bece004f..e2af8443f6a 100644
--- a/packages/sdk/js/src/v2/gen/types.gen.ts
+++ b/packages/sdk/js/src/v2/gen/types.gen.ts
@@ -1616,6 +1616,22 @@ export type Config = {
      * Enable pruning of old tool outputs (default: true)
      */
     prune?: boolean
+    /**
+     * Compaction method: 'standard' summarizes entire conversation, 'collapse' extracts oldest messages and creates summary at breakpoint (default: collapse)
+     */
+    method?: "standard" | "collapse"
+    /**
+     * Trigger compaction at this fraction of total context (default: 0.85 = 85%)
+     */
+    trigger?: number
+    /**
+     * For collapse mode: fraction of oldest tokens to extract and summarize (default: 0.65)
+     */
+    extractRatio?: number
+    /**
+     * For collapse mode: fraction of newest tokens to use as reference context (default: 0.15)
+     */
+    recentRatio?: number
   }
   experimental?: {
     hook?: {
diff --git a/packages/sdk/openapi.json b/packages/sdk/openapi.json
index 4924a5bfac0..c693a643202 100644
--- a/packages/sdk/openapi.json
+++ b/packages/sdk/openapi.json
@@ -8684,6 +8684,29 @@
               "prune": {
                 "description": "Enable pruning of old tool outputs (default: true)",
                 "type": "boolean"
+              },
+              "method": {
+                "description": "Compaction method: 'standard' summarizes entire conversation, 'collapse' extracts oldest messages and creates summary at breakpoint (default: collapse)",
+                "type": "string",
+                "enum": ["standard", "collapse"]
+              },
+              "trigger": {
+                "description": "Trigger compaction at this fraction of total context (default: 0.85 = 85%)",
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1
+              },
+              "extractRatio": {
+                "description": "For collapse mode: fraction of oldest tokens to extract and summarize (default: 0.65)",
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1
+              },
+              "recentRatio": {
+                "description": "For collapse mode: fraction of newest tokens to use as reference context (default: 0.15)",
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1
               }
             }
           },