diff --git a/package.json b/package.json
index ca9602174a2..00fbaef518e 100644
--- a/package.json
+++ b/package.json
@@ -4,7 +4,7 @@
   "description": "AI-powered development tool",
   "private": true,
   "type": "module",
-  "packageManager": "bun@1.3.5",
+  "packageManager": "bun@1.3.6",
   "scripts": {
     "dev": "bun run --cwd packages/opencode --conditions=browser src/index.ts",
     "typecheck": "bun turbo typecheck",
diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts
index ddb3af4b0a8..5242b3fa10f 100644
--- a/packages/opencode/src/config/config.ts
+++ b/packages/opencode/src/config/config.ts
@@ -1028,6 +1028,22 @@ export namespace Config {
         .object({
           auto: z.boolean().optional().describe("Enable automatic compaction when context is full (default: true)"),
           prune: z.boolean().optional().describe("Enable pruning of old tool outputs (default: true)"),
+          threshold: z
+            .number()
+            .min(0.5)
+            .max(0.99)
+            .optional()
+            .describe(
+              "Percentage of context window to trigger compaction (default: 0.9). Value between 0.5 and 0.99.",
+            ),
+          maxContext: z
+            .number()
+            .int()
+            .positive()
+            .optional()
+            .describe(
+              "Override the model's context limit to a lower value. This sets a user-defined cap on context usage, useful for cost control on large models. Example: If your model supports 2M tokens but you set maxContext to 100k, only 100k will be used. The actual limit will be min(model.limit.context, maxContext).",
+            ),
         })
         .optional(),
       experimental: z
diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts
index c983bf32c4f..be54fa835e2 100644
--- a/packages/opencode/src/provider/transform.ts
+++ b/packages/opencode/src/provider/transform.ts
@@ -632,9 +632,21 @@ export namespace ProviderTransform {
     options: Record<string, any>,
     modelLimit: number,
     globalLimit: number,
+    contextWindow?: number,
+    estimatedInputTokens?: number,
   ): number {
     const modelCap = modelLimit || globalLimit
-    const standardLimit = Math.min(modelCap, globalLimit)
+    let standardLimit = Math.min(modelCap, globalLimit)
+
+    // Dynamic max_tokens calculation based on input size and context window
+    if (contextWindow && estimatedInputTokens) {
+      const SAFETY_BUFFER = 4000 // Buffer to account for estimation errors
+      const availableTokens = contextWindow - estimatedInputTokens - SAFETY_BUFFER
+
+      if (availableTokens > 0) {
+        standardLimit = Math.min(standardLimit, availableTokens)
+      }
+    }
 
     if (npm === "@ai-sdk/anthropic") {
       const thinking = options?.["thinking"]
@@ -649,7 +661,8 @@ export namespace ProviderTransform {
       }
     }
 
-    return standardLimit
+    // Ensure minimum of 1000 tokens
+    return Math.max(1000, standardLimit)
   }
 
   export function schema(model: Provider.Model, schema: JSONSchema.BaseSchema) {
diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts
index ae69221288f..1294d513ba0 100644
--- a/packages/opencode/src/session/compaction.ts
+++ b/packages/opencode/src/session/compaction.ts
@@ -14,6 +14,9 @@ import { fn } from "@/util/fn"
 import { Agent } from "@/agent/agent"
 import { Plugin } from "@/plugin"
 import { Config } from "@/config/config"
+import { LLM } from "./llm"
+import { SystemPrompt } from "./system"
+import type { ModelMessage } from "ai"
 
 export namespace SessionCompaction {
   const log = Log.create({ service: "session.compaction" })
@@ -30,12 +33,66 @@ export namespace SessionCompaction {
   export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
     const config = await Config.get()
     if (config.compaction?.auto === false) return false
-    const context = input.model.limit.context
-    if (context === 0) return false
+    const modelContextLimit = input.model.limit.context
+    if (modelContextLimit === 0) return false
+
+    // Use configured maxContext if provided, otherwise use model's context limit
+    const maxContext = config.compaction?.maxContext
+    const context = maxContext ? Math.min(maxContext, modelContextLimit) : modelContextLimit
+
+    // Use configured threshold (default: 0.9 = 90%)
+    const threshold = config.compaction?.threshold ?? 0.9
+
     const count = input.tokens.input + input.tokens.cache.read + input.tokens.output
     const output = Math.min(input.model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) || SessionPrompt.OUTPUT_TOKEN_MAX
-    const usable = input.model.limit.input || context - output
-    return count > usable
+
+    // When maxContext is set, use it to calculate usable; otherwise use input limit if available
+    const usable = maxContext
+      ? Math.min(input.model.limit.input || context, context) - output
+      : input.model.limit.input || context - output
+    return count > usable * threshold
+  }
+
+  /**
+   * Check if estimated tokens exceed threshold, used by pre-check and post-check.
+   *
+   * Context limit determination:
+   * 1. Get model's maximum context (from model.limit.input or model.limit.context)
+   * 2. If user set compaction.maxContext, use the smaller of the two
+   *
+   * Example:
+   *   - Model supports: 2M tokens
+   *   - User set maxContext: 100k tokens
+   *   - Actual limit used: 100k tokens (user override)
+   *
+   * @returns needed=true if estimatedTokens > contextLimit * threshold
+   */
+  export async function shouldCompact(input: {
+    model: Provider.Model
+    agent: Agent.Info
+    messages: ModelMessage[]
+  }): Promise<{ needed: boolean; estimatedTokens: number; contextLimit: number; threshold: number }> {
+    const config = await Config.get()
+    const compactionThreshold = config.compaction?.threshold ?? 0.9
+    const maxContext = config.compaction?.maxContext
+    const modelContextLimit = input.model.limit.input || input.model.limit.context
+
+    if (!modelContextLimit) {
+      return { needed: false, estimatedTokens: 0, contextLimit: 0, threshold: compactionThreshold }
+    }
+
+    // Use the smaller value: user's maxContext or model's limit
+    // This allows users to cap context usage on large models for cost control
+    const contextLimit = maxContext ? Math.min(maxContext, modelContextLimit) : modelContextLimit
+    const system = await SystemPrompt.build({ model: input.model, agent: input.agent })
+    const estimatedTokens = LLM.estimateInputTokens(input.messages, system)
+
+    return {
+      needed: estimatedTokens > contextLimit * compactionThreshold,
+      estimatedTokens,
+      contextLimit,
+      threshold: compactionThreshold,
+    }
   }
 
   export const PRUNE_MINIMUM = 20_000
diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts
index 1029b45ea0d..246990204a4 100644
--- a/packages/opencode/src/session/llm.ts
+++ b/packages/opencode/src/session/llm.ts
@@ -30,6 +30,36 @@ export namespace LLM {
 
   export const OUTPUT_TOKEN_MAX = Flag.OPENCODE_EXPERIMENTAL_OUTPUT_TOKEN_MAX || 32_000
 
+  // Estimate input tokens from messages and system prompt
+  // Uses standard tokenization estimate: ~4 characters per token for English text
+  export function estimateInputTokens(messages: ModelMessage[], systemPrompt: string[]): number {
+    let totalChars = 0
+
+    // Count system prompt
+    for (const sys of systemPrompt) {
+      totalChars += sys.length
+    }
+
+    // Count all messages
+    for (const msg of messages) {
+      if (typeof msg.content === "string") {
+        totalChars += msg.content.length
+      } else if (Array.isArray(msg.content)) {
+        for (const part of msg.content) {
+          if ("text" in part && typeof part.text === "string") {
+            totalChars += part.text.length
+          } else if ("image" in part) {
+            // Approximate image tokens (roughly 2000 tokens per image)
+            totalChars += 2000 * 4 // Convert to chars for consistent calculation
+          }
+        }
+      }
+    }
+
+    // Standard estimate: ~4 chars per token
+    return Math.ceil(totalChars / 4)
+  }
+
   export type StreamInput = {
     user: MessageV2.User
     sessionID: string
@@ -131,6 +161,10 @@ export namespace LLM {
       },
     )
 
+    // Estimate input tokens for dynamic max_tokens calculation
+    const estimatedInput = estimateInputTokens(input.messages, system)
+    const contextWindow = input.model.limit.input || input.model.limit.context
+
     const maxOutputTokens = isCodex
       ? undefined
       : ProviderTransform.maxOutputTokens(
@@ -138,6 +172,8 @@ export namespace LLM {
           params.options,
           input.model.limit.output,
           OUTPUT_TOKEN_MAX,
+          contextWindow,
+          estimatedInput,
         )
 
     const tools = await resolveTools(input)
diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts
index 71db7f13677..95e79e08625 100644
--- a/packages/opencode/src/session/processor.ts
+++ b/packages/opencode/src/session/processor.ts
@@ -20,6 +20,28 @@ export namespace SessionProcessor {
   const DOOM_LOOP_THRESHOLD = 3
   const log = Log.create({ service: "session.processor" })
 
+  // Detect context window overflow errors from various providers
+  function isContextWindowError(error: any): boolean {
+    const message = error?.message?.toLowerCase() || ""
+    const errorCode = error?.code?.toLowerCase() || ""
+
+    // Check common context window error patterns
+    const patterns = [
+      "context_length_exceeded",
+      "context window",
+      "context limit",
+      "maximum context length",
+      "token limit",
+      "too many tokens",
+      "request too large",
+      "prompt is too long",
+      "input is too long",
+      "exceeds the model's maximum",
+    ]
+
+    return patterns.some((pattern) => message.includes(pattern) || errorCode.includes(pattern))
+  }
+
   export type Info = Awaited<ReturnType<typeof create>>
   export type Result = Awaited<ReturnType<Info["process"]>>
 
@@ -189,6 +211,26 @@ export namespace SessionProcessor {
                     })
 
                     delete toolcalls[value.toolCallId]
+
+                    // Check if tool result might cause context overflow
+                    const msgs = await Session.messages({ sessionID: input.sessionID })
+                    const modelMessages = MessageV2.toModelMessage(msgs.map((m) => ({ info: m.info, parts: m.parts })))
+                    const agent = await Agent.get(input.assistantMessage.agent)
+                    const check = await SessionCompaction.shouldCompact({
+                      model: input.model,
+                      agent,
+                      messages: modelMessages,
+                    })
+
+                    if (check.needed) {
+                      log.info("context overflow after tool execution", {
+                        tool: match.tool,
+                        estimatedTokens: check.estimatedTokens,
+                        contextLimit: check.contextLimit,
+                        threshold: check.threshold,
+                      })
+                      needsCompaction = true
+                    }
                   }
                   break
                 }
@@ -341,6 +383,14 @@ export namespace SessionProcessor {
               error: e,
               stack: JSON.stringify(e.stack),
             })
+
+            // Check for context window overflow errors and trigger compaction
+            if (isContextWindowError(e)) {
+              log.info("context window overflow detected, triggering compaction")
+              needsCompaction = true
+              break
+            }
+
             const error = MessageV2.fromError(e, { providerID: input.model.providerID })
             const retry = SessionRetry.retryable(error)
             if (retry !== undefined) {
diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts
index f4793d1a798..bb716f82cf1 100644
--- a/packages/opencode/src/session/prompt.ts
+++ b/packages/opencode/src/session/prompt.ts
@@ -33,6 +33,7 @@ import { spawn } from "child_process"
 import { Command } from "../command"
 import { $, fileURLToPath } from "bun"
 import { ConfigMarkdown } from "../config/markdown"
+import { Config } from "../config/config"
 import { SessionSummary } from "./summary"
 import { NamedError } from "@opencode-ai/util/error"
 import { fn } from "@/util/fn"
@@ -507,8 +508,27 @@ export namespace SessionPrompt {
         continue
       }
 
-      // normal processing
+      // Pre-check: estimate input tokens before API call to prevent overflow errors
       const agent = await Agent.get(lastUser.agent)
+      const modelMessages = MessageV2.toModelMessage(msgs)
+      const check = await SessionCompaction.shouldCompact({ model, agent, messages: modelMessages })
+
+      if (check.needed) {
+        log.info("pre-check overflow", {
+          estimatedTokens: check.estimatedTokens,
+          contextLimit: check.contextLimit,
+          threshold: check.threshold,
+        })
+        await SessionCompaction.create({
+          sessionID,
+          agent: lastUser.agent,
+          model: lastUser.model,
+          auto: true,
+        })
+        continue
+      }
+
+      // normal processing
       const maxSteps = agent.steps ?? Infinity
       const isLastStep = step >= maxSteps
       msgs = await insertReminders({
@@ -688,6 +708,7 @@ export namespace SessionPrompt {
     for (const item of await ToolRegistry.tools(
       { modelID: input.model.api.id, providerID: input.model.providerID },
       input.agent,
+      input.model,
     )) {
       const schema = ProviderTransform.schema(input.model, z.toJSONSchema(item.parameters))
       tools[item.id] = tool({
diff --git a/packages/opencode/src/session/system.ts b/packages/opencode/src/session/system.ts
index fff90808864..301d5427e52 100644
--- a/packages/opencode/src/session/system.ts
+++ b/packages/opencode/src/session/system.ts
@@ -17,6 +17,7 @@ import PROMPT_CODEX from "./prompt/codex.txt"
 import PROMPT_CODEX_INSTRUCTIONS from "./prompt/codex_header.txt"
 import type { Provider } from "@/provider/provider"
 import { Flag } from "@/flag/flag"
+import type { Agent } from "@/agent/agent"
 
 export namespace SystemPrompt {
   export function header(providerID: string) {
@@ -37,6 +38,20 @@ export namespace SystemPrompt {
     return [PROMPT_ANTHROPIC_WITHOUT_TODO]
   }
 
+  export async function build(input: { model: Provider.Model; agent: Agent.Info }): Promise<string[]> {
+    const system = header(input.model.providerID)
+    system.push(
+      [
+        ...(input.agent.prompt ? [input.agent.prompt] : provider(input.model)),
+        ...(await environment()),
+        ...(await custom()),
+      ]
+        .filter((x) => x)
+        .join("\n"),
+    )
+    return system
+  }
+
   export async function environment() {
     const project = Instance.project
     return [
diff --git a/packages/opencode/src/tool/bash.ts b/packages/opencode/src/tool/bash.ts
index f3a1b04d431..563b2238fb0 100644
--- a/packages/opencode/src/tool/bash.ts
+++ b/packages/opencode/src/tool/bash.ts
@@ -17,7 +17,6 @@ import { Shell } from "@/shell/shell"
 import { BashArity } from "@/permission/arity"
 import { Truncate } from "./truncation"
 
-const MAX_METADATA_LENGTH = 30_000
 const DEFAULT_TIMEOUT = Flag.OPENCODE_EXPERIMENTAL_BASH_DEFAULT_TIMEOUT_MS || 2 * 60 * 1000
 
 export const log = Log.create({ service: "bash-tool" })
@@ -51,15 +50,13 @@ const parser = lazy(async () => {
 })
 
 // TODO: we may wanna rename this tool so it works better on other shells
-export const BashTool = Tool.define("bash", async () => {
+export const BashTool = Tool.define("bash", async (initCtx) => {
   const shell = Shell.acceptable()
   log.info("bash tool using shell", { shell })
+  const maxBytes = Truncate.getMaxBytes(initCtx?.model)
+  const maxMetadata = Truncate.getMaxMetadata(initCtx?.model)
 
-  return {
-    description: DESCRIPTION.replaceAll("${directory}", Instance.directory)
-      .replaceAll("${maxLines}", String(Truncate.MAX_LINES))
-      .replaceAll("${maxBytes}", String(Truncate.MAX_BYTES)),
-    parameters: z.object({
+  const parameters = z.object({
       command: z.string().describe("The command to execute"),
       timeout: z.number().describe("Optional timeout in milliseconds").optional(),
       workdir: z
@@ -73,8 +70,14 @@ export const BashTool = Tool.define("bash", async () => {
         .describe(
           "Clear, concise description of what this command does in 5-10 words. Examples:\nInput: ls\nOutput: Lists files in current directory\n\nInput: git status\nOutput: Shows working tree status\n\nInput: npm install\nOutput: Installs package dependencies\n\nInput: mkdir foo\nOutput: Creates directory 'foo'",
         ),
-    }),
-    async execute(params, ctx) {
+  })
+
+  return {
+    description: DESCRIPTION.replaceAll("${directory}", Instance.directory)
+      .replaceAll("${maxLines}", String(Truncate.MAX_LINES))
+      .replaceAll("${maxBytes}", String(maxBytes)),
+    parameters,
+    async execute(params: z.infer<typeof parameters>, ctx) {
       const cwd = params.workdir || Instance.directory
       if (params.timeout !== undefined && params.timeout < 0) {
         throw new Error(`Invalid timeout value: ${params.timeout}. Timeout must be a positive number.`)
@@ -179,7 +182,7 @@ export const BashTool = Tool.define("bash", async () => {
         ctx.metadata({
           metadata: {
             // truncate the metadata to avoid GIANT blobs of data (has nothing to do w/ what agent can access)
-            output: output.length > MAX_METADATA_LENGTH ? output.slice(0, MAX_METADATA_LENGTH) + "\n\n..." : output,
+            output: output.length > maxMetadata ? output.slice(0, maxMetadata) + "\n\n..." : output,
             description: params.description,
           },
         })
@@ -247,7 +250,7 @@ export const BashTool = Tool.define("bash", async () => {
       return {
         title: params.description,
         metadata: {
-          output: output.length > MAX_METADATA_LENGTH ? output.slice(0, MAX_METADATA_LENGTH) + "\n\n..." : output,
+          output: output.length > maxMetadata ? output.slice(0, maxMetadata) + "\n\n..." : output,
           exit: proc.exitCode,
           description: params.description,
         },
diff --git a/packages/opencode/src/tool/registry.ts b/packages/opencode/src/tool/registry.ts
index dad9914a289..b97e3ffc62b 100644
--- a/packages/opencode/src/tool/registry.ts
+++ b/packages/opencode/src/tool/registry.ts
@@ -26,6 +26,7 @@ import { Log } from "@/util/log"
 import { LspTool } from "./lsp"
 import { Truncate } from "./truncation"
 import { PlanExitTool, PlanEnterTool } from "./plan"
+import type { Provider } from "../provider/provider"
 import { ApplyPatchTool } from "./apply_patch"
 
 export namespace ToolRegistry {
@@ -68,7 +69,7 @@ export namespace ToolRegistry {
         description: def.description,
         execute: async (args, ctx) => {
           const result = await def.execute(args as any, ctx)
-          const out = await Truncate.output(result, {}, initCtx?.agent)
+          const out = await Truncate.output(result, { model: initCtx?.model }, initCtx?.agent)
           return {
             title: "",
             output: out.truncated ? out.content : result,
@@ -127,6 +128,7 @@ export namespace ToolRegistry {
       modelID: string
     },
     agent?: Agent.Info,
+    fullModel?: Provider.Model,
   ) {
     const tools = await all()
     const result = await Promise.all(
@@ -154,7 +156,7 @@ export namespace ToolRegistry {
           using _ = log.time(t.id)
           return {
             id: t.id,
-            ...(await t.init({ agent })),
+            ...(await t.init({ agent, model: fullModel })),
           }
         }),
     )
diff --git a/packages/opencode/src/tool/tool.ts b/packages/opencode/src/tool/tool.ts
index 78ab325af41..111471829d3 100644
--- a/packages/opencode/src/tool/tool.ts
+++ b/packages/opencode/src/tool/tool.ts
@@ -3,6 +3,7 @@ import type { MessageV2 } from "../session/message-v2"
 import type { Agent } from "../agent/agent"
 import type { PermissionNext } from "../permission/next"
 import { Truncate } from "./truncation"
+import type { Provider } from "../provider/provider"
 
 export namespace Tool {
   interface Metadata {
@@ -11,6 +12,7 @@ export namespace Tool {
 
   export interface InitContext {
     agent?: Agent.Info
+    model?: Provider.Model
   }
 
   export type Context<M extends Metadata = Metadata> = {
@@ -70,7 +72,7 @@ export namespace Tool {
           if (result.metadata.truncated !== undefined) {
             return result
           }
-          const truncated = await Truncate.output(result.output, {}, initCtx?.agent)
+          const truncated = await Truncate.output(result.output, { model: initCtx?.model }, initCtx?.agent)
           return {
             ...result,
             output: truncated.content,
diff --git a/packages/opencode/src/tool/truncation.ts b/packages/opencode/src/tool/truncation.ts
index 84e799c1310..18427de0bf8 100644
--- a/packages/opencode/src/tool/truncation.ts
+++ b/packages/opencode/src/tool/truncation.ts
@@ -4,22 +4,65 @@ import { Global } from "../global"
 import { Identifier } from "../id/id"
 import { PermissionNext } from "../permission/next"
 import type { Agent } from "../agent/agent"
+import type { Provider } from "../provider/provider"
 import { Scheduler } from "../scheduler"
 
 export namespace Truncate {
   export const MAX_LINES = 2000
-  export const MAX_BYTES = 50 * 1024
+  export const MAX_BYTES = 50 * 1024 // Fallback default
+  export const MAX_METADATA = 30_000 // Fallback default
   export const DIR = path.join(Global.Path.data, "tool-output")
   export const GLOB = path.join(DIR, "*")
   const RETENTION_MS = 7 * 24 * 60 * 60 * 1000 // 7 days
   const HOUR_MS = 60 * 60 * 1000
 
+  /**
+   * Calculate max bytes for tool output based on model's context size.
+   * Automatically scales limits based on model capabilities.
+   *
+   * Formula: context * 0.05 * 4
+   *   - Uses 5% of model's context window for tool output
+   *   - Converts tokens to bytes (4 chars per token)
+   *
+   * Examples:
+   *   - GPT-4 (128k):   25.6KB output limit
+   *   - Claude (200k):  40KB output limit
+   *   - Gemini (2M):    400KB output limit
+   *
+   * Bounds: min 10KB, max 2MB
+   *
+   * Note: This is different from compaction.maxContext
+   *   - compaction.maxContext: Limits total conversation context
+   *   - getMaxBytes: Limits individual tool call output
+   */
+  export function getMaxBytes(model?: Provider.Model): number {
+    if (!model?.limit?.context) return MAX_BYTES
+    const contextLimit = model.limit.context
+    if (contextLimit === 0) return MAX_BYTES
+
+    // 5% of context converted to bytes (4 chars per token)
+    const calculated = Math.floor(contextLimit * 0.05 * 4)
+
+    // Minimum 10KB, maximum 2MB
+    return Math.max(10 * 1024, Math.min(calculated, 2 * 1024 * 1024))
+  }
+
+  /**
+   * Calculate max metadata bytes (60% of max output bytes).
+   * Metadata is shown in UI while output goes to the model.
+   * Using 60% prevents UI from being overwhelmed with large outputs.
+   */
+  export function getMaxMetadata(model?: Provider.Model): number {
+    return Math.floor(getMaxBytes(model) * 0.6)
+  }
+
   export type Result = { content: string; truncated: false } | { content: string; truncated: true; outputPath: string }
 
   export interface Options {
     maxLines?: number
     maxBytes?: number
     direction?: "head" | "tail"
+    model?: Provider.Model
   }
 
   export function init() {
@@ -49,7 +92,7 @@ export namespace Truncate {
 
   export async function output(text: string, options: Options = {}, agent?: Agent.Info): Promise<Result> {
     const maxLines = options.maxLines ?? MAX_LINES
-    const maxBytes = options.maxBytes ?? MAX_BYTES
+    const maxBytes = options.maxBytes ?? getMaxBytes(options.model)
     const direction = options.direction ?? "head"
     const lines = text.split("\n")
     const totalBytes = Buffer.byteLength(text, "utf-8")
diff --git a/packages/opencode/test/config/config.test.ts b/packages/opencode/test/config/config.test.ts
index 0463d29d7c5..ea54c807fbd 100644
--- a/packages/opencode/test/config/config.test.ts
+++ b/packages/opencode/test/config/config.test.ts
@@ -1349,6 +1349,155 @@ describe("getPluginName", () => {
   })
 })
 
+// Compaction config tests
+
+test("handles compaction threshold config", async () => {
+  await using tmp = await tmpdir({
+    init: async (dir) => {
+      await Bun.write(
+        path.join(dir, "opencode.json"),
+        JSON.stringify({
+          $schema: "https://opencode.ai/config.json",
+          compaction: {
+            auto: true,
+            threshold: 0.9,
+          },
+        }),
+      )
+    },
+  })
+  await Instance.provide({
+    directory: tmp.path,
+    fn: async () => {
+      const config = await Config.get()
+      expect(config.compaction?.auto).toBe(true)
+      expect(config.compaction?.threshold).toBe(0.9)
+    },
+  })
+})
+
+test("handles compaction maxContext config", async () => {
+  await using tmp = await tmpdir({
+    init: async (dir) => {
+      await Bun.write(
+        path.join(dir, "opencode.json"),
+        JSON.stringify({
+          $schema: "https://opencode.ai/config.json",
+          compaction: {
+            auto: true,
+            maxContext: 100000,
+          },
+        }),
+      )
+    },
+  })
+  await Instance.provide({
+    directory: tmp.path,
+    fn: async () => {
+      const config = await Config.get()
+      expect(config.compaction?.auto).toBe(true)
+      expect(config.compaction?.maxContext).toBe(100000)
+    },
+  })
+})
+
+test("handles full compaction config", async () => {
+  await using tmp = await tmpdir({
+    init: async (dir) => {
+      await Bun.write(
+        path.join(dir, "opencode.json"),
+        JSON.stringify({
+          $schema: "https://opencode.ai/config.json",
+          compaction: {
+            auto: true,
+            prune: true,
+            threshold: 0.85,
+            maxContext: 50000,
+          },
+        }),
+      )
+    },
+  })
+  await Instance.provide({
+    directory: tmp.path,
+    fn: async () => {
+      const config = await Config.get()
+      expect(config.compaction).toEqual({
+        auto: true,
+        prune: true,
+        threshold: 0.85,
+        maxContext: 50000,
+      })
+    },
+  })
+})
+
+test("rejects invalid compaction threshold (below 0.5)", async () => {
+  await using tmp = await tmpdir({
+    init: async (dir) => {
+      await Bun.write(
+        path.join(dir, "opencode.json"),
+        JSON.stringify({
+          $schema: "https://opencode.ai/config.json",
+          compaction: {
+            threshold: 0.3,
+          },
+        }),
+      )
+    },
+  })
+  await Instance.provide({
+    directory: tmp.path,
+    fn: async () => {
+      await expect(Config.get()).rejects.toThrow()
+    },
+  })
+})
+
+test("rejects invalid compaction threshold (above 0.99)", async () => {
+  await using tmp = await tmpdir({
+    init: async (dir) => {
+      await Bun.write(
+        path.join(dir, "opencode.json"),
+        JSON.stringify({
+          $schema: "https://opencode.ai/config.json",
+          compaction: {
+            threshold: 1.0,
+          },
+        }),
+      )
+    },
+  })
+  await Instance.provide({
+    directory: tmp.path,
+    fn: async () => {
+      await expect(Config.get()).rejects.toThrow()
+    },
+  })
+})
+
+test("rejects invalid compaction maxContext (negative)", async () => {
+  await using tmp = await tmpdir({
+    init: async (dir) => {
+      await Bun.write(
+        path.join(dir, "opencode.json"),
+        JSON.stringify({
+          $schema: "https://opencode.ai/config.json",
+          compaction: {
+            maxContext: -1000,
+          },
+        }),
+      )
+    },
+  })
+  await Instance.provide({
+    directory: tmp.path,
+    fn: async () => {
+      await expect(Config.get()).rejects.toThrow()
+    },
+  })
+})
+
 describe("deduplicatePlugins", () => {
   test("removes duplicates keeping higher priority (later entries)", () => {
     const plugins = ["global-plugin@1.0.0", "shared-plugin@1.0.0", "local-plugin@2.0.0", "shared-plugin@2.0.0"]
diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts
index 2e9c091870e..76230c3f22a 100644
--- a/packages/opencode/test/session/compaction.test.ts
+++ b/packages/opencode/test/session/compaction.test.ts
@@ -1,6 +1,7 @@
 import { describe, expect, test } from "bun:test"
 import path from "path"
 import { SessionCompaction } from "../../src/session/compaction"
+import { LLM } from "../../src/session/llm"
 import { Token } from "../../src/util/token"
 import { Instance } from "../../src/project/instance"
 import { Log } from "../../src/util/log"
@@ -34,11 +35,194 @@ function createModel(opts: {
       input: { text: true, image: false, audio: false, video: false },
       output: { text: true, image: false, audio: false, video: false },
     },
-    api: { npm: "@ai-sdk/anthropic" },
+    api: { id: "anthropic", npm: "@ai-sdk/anthropic" },
     options: {},
   } as Provider.Model
 }
 
+describe("session.compaction.shouldCompact", () => {
+  test("returns needed=true when estimated tokens exceed threshold", async () => {
+    await using tmp = await tmpdir()
+    await Instance.provide({
+      directory: tmp.path,
+      fn: async () => {
+        const model = createModel({ context: 100_000, output: 32_000 })
+        const agent = { permission: [] } as any
+        const messages = [
+          { role: "user" as const, content: "x".repeat(400_000) }, // ~100k tokens
+        ]
+
+        const result = await SessionCompaction.shouldCompact({ model, agent, messages })
+
+        expect(result.needed).toBe(true)
+        expect(result.contextLimit).toBe(100_000)
+        expect(result.threshold).toBe(0.9)
+        expect(result.estimatedTokens).toBeGreaterThan(result.contextLimit * result.threshold)
+      },
+    })
+  })
+
+  test("returns needed=false when estimated tokens under threshold", async () => {
+    await using tmp = await tmpdir()
+    await Instance.provide({
+      directory: tmp.path,
+      fn: async () => {
+        const model = createModel({ context: 200_000, output: 32_000 })
+        const agent = { permission: [] } as any
+        const messages = [
+          { role: "user" as const, content: "x".repeat(100_000) }, // ~25k tokens
+        ]
+
+        const result = await SessionCompaction.shouldCompact({ model, agent, messages })
+
+        expect(result.needed).toBe(false)
+        expect(result.estimatedTokens).toBeLessThanOrEqual(result.contextLimit * result.threshold)
+      },
+    })
+  })
+
+  test("returns needed=false when model context limit is 0", async () => {
+    await using tmp = await tmpdir()
+    await Instance.provide({
+      directory: tmp.path,
+      fn: async () => {
+        const model = createModel({ context: 0, output: 32_000 })
+        const agent = { permission: [] } as any
+        const messages = [
+          { role: "user" as const, content: "x".repeat(400_000) },
+        ]
+
+        const result = await SessionCompaction.shouldCompact({ model, agent, messages })
+
+        expect(result.needed).toBe(false)
+        expect(result.contextLimit).toBe(0)
+      },
+    })
+  })
+
+  test("respects maxContext when set lower than model context", async () => {
+    await using tmp = await tmpdir({
+      init: async (dir) => {
+        await Bun.write(
+          path.join(dir, "opencode.json"),
+          JSON.stringify({
+            compaction: { maxContext: 50_000 },
+          }),
+        )
+      },
+    })
+    await Instance.provide({
+      directory: tmp.path,
+      fn: async () => {
+        const model = createModel({ context: 200_000, output: 32_000 })
+        const agent = { permission: [] } as any
+        const messages = [
+          { role: "user" as const, content: "x".repeat(200_000) }, // ~50k tokens
+        ]
+
+        const result = await SessionCompaction.shouldCompact({ model, agent, messages })
+
+        expect(result.contextLimit).toBe(50_000)
+        expect(result.needed).toBe(true) // 50k tokens > 50k * 0.9 = 45k
+      },
+    })
+  })
+
+  test("uses model context when maxContext is higher", async () => {
+    await using tmp = await tmpdir({
+      init: async (dir) => {
+        await Bun.write(
+          path.join(dir, "opencode.json"),
+          JSON.stringify({
+            compaction: { maxContext: 500_000 },
+          }),
+        )
+      },
+    })
+    await Instance.provide({
+      directory: tmp.path,
+      fn: async () => {
+        const model = createModel({ context: 100_000, output: 32_000 })
+        const agent = { permission: [] } as any
+        const messages = [
+          { role: "user" as const, content: "x".repeat(380_000) }, // ~95k tokens
+        ]
+
+        const result = await SessionCompaction.shouldCompact({ model, agent, messages })
+
+        expect(result.contextLimit).toBe(100_000) // Should use model's lower limit
+        expect(result.needed).toBe(true) // 95k > 100k * 0.9 = 90k
+      },
+    })
+  })
+
+  test("respects custom threshold from config", async () => {
+    await using tmp = await tmpdir({
+      init: async (dir) => {
+        await Bun.write(
+          path.join(dir, "opencode.json"),
+          JSON.stringify({
+            compaction: { threshold: 0.8 },
+          }),
+        )
+      },
+    })
+    await Instance.provide({
+      directory: tmp.path,
+      fn: async () => {
+        const model = createModel({ context: 100_000, output: 32_000 })
+        const agent = { permission: [] } as any
+        const messages = [
+          { role: "user" as const, content: "x".repeat(340_000) }, // ~85k tokens
+        ]
+
+        const result = await SessionCompaction.shouldCompact({ model, agent, messages })
+
+        expect(result.threshold).toBe(0.8)
+        expect(result.needed).toBe(true) // 85k > 100k * 0.8 = 80k
+      },
+    })
+  })
+
+  test("uses input limit when available", async () => {
+    await using tmp = await tmpdir()
+    await Instance.provide({
+      directory: tmp.path,
+      fn: async () => {
+        const model = createModel({ context: 400_000, input: 272_000, output: 128_000 })
+        const agent = { permission: [] } as any
+        const messages = [
+          { role: "user" as const, content: "x".repeat(1_000_000) }, // ~250k tokens
+        ]
+
+        const result = await SessionCompaction.shouldCompact({ model, agent, messages })
+
+        expect(result.contextLimit).toBe(272_000) // Should use input limit
+        expect(result.needed).toBe(true) // 250k > 272k * 0.9 = 244.8k
+      },
+    })
+  })
+
+  test("returns correct estimatedTokens value", async () => {
+    await using tmp = await tmpdir()
+    await Instance.provide({
+      directory: tmp.path,
+      fn: async () => {
+        const model = createModel({ context: 100_000, output: 32_000 })
+        const agent = { permission: [] } as any
+        const messages = [
+          { role: "user" as const, content: "x".repeat(4000) }, // exactly 1000 tokens
+        ]
+
+        const result = await SessionCompaction.shouldCompact({ model, agent, messages })
+
+        expect(result.estimatedTokens).toBeGreaterThan(0)
+        // Should be around 1000 tokens plus system prompt
+      },
+    })
+  })
+})
+
 describe("session.compaction.isOverflow", () => {
   test("returns true when token count exceeds usable context", async () => {
     await using tmp = await tmpdir()
@@ -144,6 +328,75 @@ describe("session.compaction.isOverflow", () => {
       },
     })
   })
+
+  test("respects maxContext when set lower than model context", async () => {
+    await using tmp = await tmpdir({
+      init: async (dir) => {
+        await Bun.write(
+          path.join(dir, "opencode.json"),
+          JSON.stringify({
+            compaction: { maxContext: 50_000 },
+          }),
+        )
+      },
+    })
+    await Instance.provide({
+      directory: tmp.path,
+      fn: async () => {
+        // Model has 200k context, but maxContext limits to 50k
+        const model = createModel({ context: 200_000, output: 32_000 })
+        // 30k tokens would be fine for 200k context, but exceeds 50k - 32k = 18k usable
+        const tokens = { input: 20_000, output: 5_000, reasoning: 0, cache: { read: 0, write: 0 } }
+        expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true)
+      },
+    })
+  })
+
+  test("uses model context when maxContext is higher", async () => {
+    await using tmp = await tmpdir({
+      init: async (dir) => {
+        await Bun.write(
+          path.join(dir, "opencode.json"),
+          JSON.stringify({
+            compaction: { maxContext: 500_000 },
+          }),
+        )
+      },
+    })
+    await Instance.provide({
+      directory: tmp.path,
+      fn: async () => {
+        // maxContext is 500k but model only has 100k
+        const model = createModel({ context: 100_000, output: 32_000 })
+        const tokens = { input: 75_000, output: 5_000, reasoning: 0, cache: { read: 0, write: 0 } }
+        // Should still overflow based on model's 100k limit
+        expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true)
+      },
+    })
+  })
+
+  test("maxContext works with input limit", async () => {
+    await using tmp = await tmpdir({
+      init: async (dir) => {
+        await Bun.write(
+          path.join(dir, "opencode.json"),
+          JSON.stringify({
+            compaction: { maxContext: 100_000 },
+          }),
+        )
+      },
+    })
+    await Instance.provide({
+      directory: tmp.path,
+      fn: async () => {
+        // Model has input limit of 272k, but maxContext is 100k
+        const model = createModel({ context: 400_000, input: 272_000, output: 128_000 })
+        // 90k tokens would be fine for 272k input limit, but should respect maxContext
+        const tokens = { input: 90_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } }
+        expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true)
+      },
+    })
+  })
 })
 
 describe("util.token.estimate", () => {
@@ -291,3 +544,60 @@ describe("session.getUsage", () => {
     expect(result.cost).toBe(3 + 1.5)
   })
 })
+
+describe("LLM.estimateInputTokens", () => {
+  test("estimates tokens from string content messages", () => {
+    const messages = [
+      { role: "user" as const, content: "x".repeat(1000) },
+      { role: "assistant" as const, content: "y".repeat(500) },
+    ]
+    const systemPrompt = ["z".repeat(200)]
+    const result = LLM.estimateInputTokens(messages, systemPrompt)
+    // Total chars: 1000 + 500 + 200 = 1700
+    // Tokens: Math.ceil(1700 / 4) = 425
+    expect(result).toBe(425)
+  })
+
+  test("estimates tokens from array content messages", () => {
+    const messages = [
+      {
+        role: "user" as const,
+        content: [{ type: "text" as const, text: "x".repeat(800) }],
+      },
+    ]
+    const systemPrompt: string[] = []
+    const result = LLM.estimateInputTokens(messages, systemPrompt)
+    // 800 chars / 4 = 200 tokens
+    expect(result).toBe(200)
+  })
+
+  test("estimates tokens for images", () => {
+    const messages = [
+      {
+        role: "user" as const,
+        content: [
+          { type: "text" as const, text: "describe this" },
+          { type: "image" as const, image: new URL("https://example.com/img.png") },
+        ],
+      },
+    ]
+    const systemPrompt: string[] = []
+    const result = LLM.estimateInputTokens(messages, systemPrompt)
+    // "describe this" (13 chars) + image (2000 * 4 = 8000 chars) = 8013 chars
+    // Math.ceil(8013 / 4) = 2004 tokens
+    expect(result).toBe(2004)
+  })
+
+  test("handles empty messages", () => {
+    const result = LLM.estimateInputTokens([], [])
+    expect(result).toBe(0)
+  })
+
+  test("handles multiple system prompts", () => {
+    const messages: { role: "user" | "assistant"; content: string }[] = []
+    const systemPrompt = ["prompt1".repeat(100), "prompt2".repeat(50)]
+    const result = LLM.estimateInputTokens(messages, systemPrompt)
+    // (700 + 350) / 4 = 262.5 → Math.ceil = 263
+    expect(result).toBe(263)
+  })
+})
diff --git a/packages/opencode/test/tool/truncation.test.ts b/packages/opencode/test/tool/truncation.test.ts
index 09222f279fa..7f7560065e6 100644
--- a/packages/opencode/test/tool/truncation.test.ts
+++ b/packages/opencode/test/tool/truncation.test.ts
@@ -122,6 +122,105 @@ describe("Truncate", () => {
     })
   })
 
+  describe("getMaxBytes", () => {
+    test("returns fallback when no model provided", () => {
+      const result = Truncate.getMaxBytes()
+      expect(result).toBe(Truncate.MAX_BYTES)
+    })
+
+    test("returns fallback when model has no context limit", () => {
+      const model = { limit: {} } as any
+      const result = Truncate.getMaxBytes(model)
+      expect(result).toBe(Truncate.MAX_BYTES)
+    })
+
+    test("returns fallback when context limit is 0", () => {
+      const model = { limit: { context: 0 } } as any
+      const result = Truncate.getMaxBytes(model)
+      expect(result).toBe(Truncate.MAX_BYTES)
+    })
+
+    test("returns minimum 10KB for very small models", () => {
+      const model = { limit: { context: 1000 } } as any
+      const result = Truncate.getMaxBytes(model)
+      expect(result).toBe(10 * 1024) // Should hit minimum
+    })
+
+    test("calculates correctly for GPT-4 (128k context)", () => {
+      const model = { limit: { context: 128_000 } } as any
+      const result = Truncate.getMaxBytes(model)
+      // 128000 * 0.05 * 4 = 25600
+      expect(result).toBe(25_600)
+    })
+
+    test("calculates correctly for Claude (200k context)", () => {
+      const model = { limit: { context: 200_000 } } as any
+      const result = Truncate.getMaxBytes(model)
+      // 200000 * 0.05 * 4 = 40000
+      expect(result).toBe(40_000)
+    })
+
+    test("calculates correctly for Gemini (2M context)", () => {
+      const model = { limit: { context: 2_000_000 } } as any
+      const result = Truncate.getMaxBytes(model)
+      // 2000000 * 0.05 * 4 = 400000 (400KB, well under 2MB cap)
+      expect(result).toBe(400_000)
+    })
+
+    test("caps at maximum 2MB for extremely large models", () => {
+      const model = { limit: { context: 20_000_000 } } as any
+      const result = Truncate.getMaxBytes(model)
+      // Would calculate to 4000000 (4MB), but should cap at 2MB
+      expect(result).toBe(2 * 1024 * 1024)
+    })
+
+    test("uses 5% of context converted to bytes (4 chars/token)", () => {
+      const model = { limit: { context: 100_000 } } as any
+      const result = Truncate.getMaxBytes(model)
+      // 100000 * 0.05 * 4 = 20000
+      expect(result).toBe(20_000)
+    })
+  })
+
+  describe("getMaxMetadata", () => {
+    test("returns 60% of max bytes when no model provided", () => {
+      const result = Truncate.getMaxMetadata()
+      expect(result).toBe(Math.floor(Truncate.MAX_BYTES * 0.6))
+    })
+
+    test("returns 60% of calculated max bytes for GPT-4", () => {
+      const model = { limit: { context: 128_000 } } as any
+      const result = Truncate.getMaxMetadata(model)
+      const maxBytes = Truncate.getMaxBytes(model)
+      expect(result).toBe(Math.floor(maxBytes * 0.6))
+      expect(result).toBe(15_360) // 25600 * 0.6
+    })
+
+    test("returns 60% of calculated max bytes for Claude", () => {
+      const model = { limit: { context: 200_000 } } as any
+      const result = Truncate.getMaxMetadata(model)
+      const maxBytes = Truncate.getMaxBytes(model)
+      expect(result).toBe(Math.floor(maxBytes * 0.6))
+      expect(result).toBe(24_000) // 40000 * 0.6
+    })
+
+    test("returns 60% of calculated max bytes for Gemini", () => {
+      const model = { limit: { context: 2_000_000 } } as any
+      const result = Truncate.getMaxMetadata(model)
+      const maxBytes = Truncate.getMaxBytes(model)
+      expect(result).toBe(Math.floor(maxBytes * 0.6))
+      expect(result).toBe(240_000) // 400000 * 0.6
+    })
+
+    test("returns 60% of capped max bytes for extremely large models", () => {
+      const model = { limit: { context: 20_000_000 } } as any
+      const result = Truncate.getMaxMetadata(model)
+      const maxBytes = Truncate.getMaxBytes(model)
+      expect(result).toBe(Math.floor(maxBytes * 0.6))
+      expect(result).toBe(Math.floor(2 * 1024 * 1024 * 0.6))
+    })
+  })
+
   describe("cleanup", () => {
     const DAY_MS = 24 * 60 * 60 * 1000
     let oldFile: string
diff --git a/packages/sdk/js/src/v2/gen/types.gen.ts b/packages/sdk/js/src/v2/gen/types.gen.ts
index b7e72fbad8f..a126d02b7f0 100644
--- a/packages/sdk/js/src/v2/gen/types.gen.ts
+++ b/packages/sdk/js/src/v2/gen/types.gen.ts
@@ -1757,6 +1757,14 @@ export type Config = {
      * Enable pruning of old tool outputs (default: true)
      */
     prune?: boolean
+    /**
+     * Percentage of context window to trigger compaction (default: 0.9). Value between 0.5 and 0.99.
+     */
+    threshold?: number
+    /**
+     * Override the model's context limit. Useful for limiting context usage on models with large context windows.
+     */
+    maxContext?: number
   }
   experimental?: {
     hook?: {