diff --git a/package.json b/package.json index ca9602174a2..00fbaef518e 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,7 @@ "description": "AI-powered development tool", "private": true, "type": "module", - "packageManager": "bun@1.3.5", + "packageManager": "bun@1.3.6", "scripts": { "dev": "bun run --cwd packages/opencode --conditions=browser src/index.ts", "typecheck": "bun turbo typecheck", diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts index ddb3af4b0a8..5242b3fa10f 100644 --- a/packages/opencode/src/config/config.ts +++ b/packages/opencode/src/config/config.ts @@ -1028,6 +1028,22 @@ export namespace Config { .object({ auto: z.boolean().optional().describe("Enable automatic compaction when context is full (default: true)"), prune: z.boolean().optional().describe("Enable pruning of old tool outputs (default: true)"), + threshold: z + .number() + .min(0.5) + .max(0.99) + .optional() + .describe( + "Percentage of context window to trigger compaction (default: 0.9). Value between 0.5 and 0.99.", + ), + maxContext: z + .number() + .int() + .positive() + .optional() + .describe( + "Override the model's context limit to a lower value. This sets a user-defined cap on context usage, useful for cost control on large models. Example: If your model supports 2M tokens but you set maxContext to 100k, only 100k will be used. The actual limit will be min(model.limit.context, maxContext).", + ), }) .optional(), experimental: z diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts index c983bf32c4f..be54fa835e2 100644 --- a/packages/opencode/src/provider/transform.ts +++ b/packages/opencode/src/provider/transform.ts @@ -632,9 +632,21 @@ export namespace ProviderTransform { options: Record, modelLimit: number, globalLimit: number, + contextWindow?: number, + estimatedInputTokens?: number, ): number { const modelCap = modelLimit || globalLimit - const standardLimit = Math.min(modelCap, globalLimit) + let standardLimit = Math.min(modelCap, globalLimit) + + // Dynamic max_tokens calculation based on input size and context window + if (contextWindow && estimatedInputTokens) { + const SAFETY_BUFFER = 4000 // Buffer to account for estimation errors + const availableTokens = contextWindow - estimatedInputTokens - SAFETY_BUFFER + + if (availableTokens > 0) { + standardLimit = Math.min(standardLimit, availableTokens) + } + } if (npm === "@ai-sdk/anthropic") { const thinking = options?.["thinking"] @@ -649,7 +661,8 @@ export namespace ProviderTransform { } } - return standardLimit + // Ensure minimum of 1000 tokens + return Math.max(1000, standardLimit) } export function schema(model: Provider.Model, schema: JSONSchema.BaseSchema) { diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index ae69221288f..1294d513ba0 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -14,6 +14,9 @@ import { fn } from "@/util/fn" import { Agent } from "@/agent/agent" import { Plugin } from "@/plugin" import { Config } from "@/config/config" +import { LLM } from "./llm" +import { SystemPrompt } from "./system" +import type { ModelMessage } from "ai" export namespace SessionCompaction { const log = Log.create({ service: "session.compaction" }) @@ -30,12 +33,66 @@ export namespace SessionCompaction { export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) { const config = await Config.get() if (config.compaction?.auto === false) return false - const context = input.model.limit.context - if (context === 0) return false + const modelContextLimit = input.model.limit.context + if (modelContextLimit === 0) return false + + // Use configured maxContext if provided, otherwise use model's context limit + const maxContext = config.compaction?.maxContext + const context = maxContext ? Math.min(maxContext, modelContextLimit) : modelContextLimit + + // Use configured threshold (default: 0.9 = 90%) + const threshold = config.compaction?.threshold ?? 0.9 + const count = input.tokens.input + input.tokens.cache.read + input.tokens.output const output = Math.min(input.model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) || SessionPrompt.OUTPUT_TOKEN_MAX - const usable = input.model.limit.input || context - output - return count > usable + + // When maxContext is set, use it to calculate usable; otherwise use input limit if available + const usable = maxContext + ? Math.min(input.model.limit.input || context, context) - output + : input.model.limit.input || context - output + return count > usable * threshold + } + + /** + * Check if estimated tokens exceed threshold, used by pre-check and post-check. + * + * Context limit determination: + * 1. Get model's maximum context (from model.limit.input or model.limit.context) + * 2. If user set compaction.maxContext, use the smaller of the two + * + * Example: + * - Model supports: 2M tokens + * - User set maxContext: 100k tokens + * - Actual limit used: 100k tokens (user override) + * + * @returns needed=true if estimatedTokens > contextLimit * threshold + */ + export async function shouldCompact(input: { + model: Provider.Model + agent: Agent.Info + messages: ModelMessage[] + }): Promise<{ needed: boolean; estimatedTokens: number; contextLimit: number; threshold: number }> { + const config = await Config.get() + const compactionThreshold = config.compaction?.threshold ?? 0.9 + const maxContext = config.compaction?.maxContext + const modelContextLimit = input.model.limit.input || input.model.limit.context + + if (!modelContextLimit) { + return { needed: false, estimatedTokens: 0, contextLimit: 0, threshold: compactionThreshold } + } + + // Use the smaller value: user's maxContext or model's limit + // This allows users to cap context usage on large models for cost control + const contextLimit = maxContext ? Math.min(maxContext, modelContextLimit) : modelContextLimit + const system = await SystemPrompt.build({ model: input.model, agent: input.agent }) + const estimatedTokens = LLM.estimateInputTokens(input.messages, system) + + return { + needed: estimatedTokens > contextLimit * compactionThreshold, + estimatedTokens, + contextLimit, + threshold: compactionThreshold, + } } export const PRUNE_MINIMUM = 20_000 diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts index 1029b45ea0d..246990204a4 100644 --- a/packages/opencode/src/session/llm.ts +++ b/packages/opencode/src/session/llm.ts @@ -30,6 +30,36 @@ export namespace LLM { export const OUTPUT_TOKEN_MAX = Flag.OPENCODE_EXPERIMENTAL_OUTPUT_TOKEN_MAX || 32_000 + // Estimate input tokens from messages and system prompt + // Uses standard tokenization estimate: ~4 characters per token for English text + export function estimateInputTokens(messages: ModelMessage[], systemPrompt: string[]): number { + let totalChars = 0 + + // Count system prompt + for (const sys of systemPrompt) { + totalChars += sys.length + } + + // Count all messages + for (const msg of messages) { + if (typeof msg.content === "string") { + totalChars += msg.content.length + } else if (Array.isArray(msg.content)) { + for (const part of msg.content) { + if ("text" in part && typeof part.text === "string") { + totalChars += part.text.length + } else if ("image" in part) { + // Approximate image tokens (roughly 2000 tokens per image) + totalChars += 2000 * 4 // Convert to chars for consistent calculation + } + } + } + } + + // Standard estimate: ~4 chars per token + return Math.ceil(totalChars / 4) + } + export type StreamInput = { user: MessageV2.User sessionID: string @@ -131,6 +161,10 @@ export namespace LLM { }, ) + // Estimate input tokens for dynamic max_tokens calculation + const estimatedInput = estimateInputTokens(input.messages, system) + const contextWindow = input.model.limit.input || input.model.limit.context + const maxOutputTokens = isCodex ? undefined : ProviderTransform.maxOutputTokens( @@ -138,6 +172,8 @@ export namespace LLM { params.options, input.model.limit.output, OUTPUT_TOKEN_MAX, + contextWindow, + estimatedInput, ) const tools = await resolveTools(input) diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index 71db7f13677..95e79e08625 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -20,6 +20,28 @@ export namespace SessionProcessor { const DOOM_LOOP_THRESHOLD = 3 const log = Log.create({ service: "session.processor" }) + // Detect context window overflow errors from various providers + function isContextWindowError(error: any): boolean { + const message = error?.message?.toLowerCase() || "" + const errorCode = error?.code?.toLowerCase() || "" + + // Check common context window error patterns + const patterns = [ + "context_length_exceeded", + "context window", + "context limit", + "maximum context length", + "token limit", + "too many tokens", + "request too large", + "prompt is too long", + "input is too long", + "exceeds the model's maximum", + ] + + return patterns.some((pattern) => message.includes(pattern) || errorCode.includes(pattern)) + } + export type Info = Awaited> export type Result = Awaited> @@ -189,6 +211,26 @@ export namespace SessionProcessor { }) delete toolcalls[value.toolCallId] + + // Check if tool result might cause context overflow + const msgs = await Session.messages({ sessionID: input.sessionID }) + const modelMessages = MessageV2.toModelMessage(msgs.map((m) => ({ info: m.info, parts: m.parts }))) + const agent = await Agent.get(input.assistantMessage.agent) + const check = await SessionCompaction.shouldCompact({ + model: input.model, + agent, + messages: modelMessages, + }) + + if (check.needed) { + log.info("context overflow after tool execution", { + tool: match.tool, + estimatedTokens: check.estimatedTokens, + contextLimit: check.contextLimit, + threshold: check.threshold, + }) + needsCompaction = true + } } break } @@ -341,6 +383,14 @@ export namespace SessionProcessor { error: e, stack: JSON.stringify(e.stack), }) + + // Check for context window overflow errors and trigger compaction + if (isContextWindowError(e)) { + log.info("context window overflow detected, triggering compaction") + needsCompaction = true + break + } + const error = MessageV2.fromError(e, { providerID: input.model.providerID }) const retry = SessionRetry.retryable(error) if (retry !== undefined) { diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts index f4793d1a798..bb716f82cf1 100644 --- a/packages/opencode/src/session/prompt.ts +++ b/packages/opencode/src/session/prompt.ts @@ -33,6 +33,7 @@ import { spawn } from "child_process" import { Command } from "../command" import { $, fileURLToPath } from "bun" import { ConfigMarkdown } from "../config/markdown" +import { Config } from "../config/config" import { SessionSummary } from "./summary" import { NamedError } from "@opencode-ai/util/error" import { fn } from "@/util/fn" @@ -507,8 +508,27 @@ export namespace SessionPrompt { continue } - // normal processing + // Pre-check: estimate input tokens before API call to prevent overflow errors const agent = await Agent.get(lastUser.agent) + const modelMessages = MessageV2.toModelMessage(msgs) + const check = await SessionCompaction.shouldCompact({ model, agent, messages: modelMessages }) + + if (check.needed) { + log.info("pre-check overflow", { + estimatedTokens: check.estimatedTokens, + contextLimit: check.contextLimit, + threshold: check.threshold, + }) + await SessionCompaction.create({ + sessionID, + agent: lastUser.agent, + model: lastUser.model, + auto: true, + }) + continue + } + + // normal processing const maxSteps = agent.steps ?? Infinity const isLastStep = step >= maxSteps msgs = await insertReminders({ @@ -688,6 +708,7 @@ export namespace SessionPrompt { for (const item of await ToolRegistry.tools( { modelID: input.model.api.id, providerID: input.model.providerID }, input.agent, + input.model, )) { const schema = ProviderTransform.schema(input.model, z.toJSONSchema(item.parameters)) tools[item.id] = tool({ diff --git a/packages/opencode/src/session/system.ts b/packages/opencode/src/session/system.ts index fff90808864..301d5427e52 100644 --- a/packages/opencode/src/session/system.ts +++ b/packages/opencode/src/session/system.ts @@ -17,6 +17,7 @@ import PROMPT_CODEX from "./prompt/codex.txt" import PROMPT_CODEX_INSTRUCTIONS from "./prompt/codex_header.txt" import type { Provider } from "@/provider/provider" import { Flag } from "@/flag/flag" +import type { Agent } from "@/agent/agent" export namespace SystemPrompt { export function header(providerID: string) { @@ -37,6 +38,20 @@ export namespace SystemPrompt { return [PROMPT_ANTHROPIC_WITHOUT_TODO] } + export async function build(input: { model: Provider.Model; agent: Agent.Info }): Promise { + const system = header(input.model.providerID) + system.push( + [ + ...(input.agent.prompt ? [input.agent.prompt] : provider(input.model)), + ...(await environment()), + ...(await custom()), + ] + .filter((x) => x) + .join("\n"), + ) + return system + } + export async function environment() { const project = Instance.project return [ diff --git a/packages/opencode/src/tool/bash.ts b/packages/opencode/src/tool/bash.ts index f3a1b04d431..563b2238fb0 100644 --- a/packages/opencode/src/tool/bash.ts +++ b/packages/opencode/src/tool/bash.ts @@ -17,7 +17,6 @@ import { Shell } from "@/shell/shell" import { BashArity } from "@/permission/arity" import { Truncate } from "./truncation" -const MAX_METADATA_LENGTH = 30_000 const DEFAULT_TIMEOUT = Flag.OPENCODE_EXPERIMENTAL_BASH_DEFAULT_TIMEOUT_MS || 2 * 60 * 1000 export const log = Log.create({ service: "bash-tool" }) @@ -51,15 +50,13 @@ const parser = lazy(async () => { }) // TODO: we may wanna rename this tool so it works better on other shells -export const BashTool = Tool.define("bash", async () => { +export const BashTool = Tool.define("bash", async (initCtx) => { const shell = Shell.acceptable() log.info("bash tool using shell", { shell }) + const maxBytes = Truncate.getMaxBytes(initCtx?.model) + const maxMetadata = Truncate.getMaxMetadata(initCtx?.model) - return { - description: DESCRIPTION.replaceAll("${directory}", Instance.directory) - .replaceAll("${maxLines}", String(Truncate.MAX_LINES)) - .replaceAll("${maxBytes}", String(Truncate.MAX_BYTES)), - parameters: z.object({ + const parameters = z.object({ command: z.string().describe("The command to execute"), timeout: z.number().describe("Optional timeout in milliseconds").optional(), workdir: z @@ -73,8 +70,14 @@ export const BashTool = Tool.define("bash", async () => { .describe( "Clear, concise description of what this command does in 5-10 words. Examples:\nInput: ls\nOutput: Lists files in current directory\n\nInput: git status\nOutput: Shows working tree status\n\nInput: npm install\nOutput: Installs package dependencies\n\nInput: mkdir foo\nOutput: Creates directory 'foo'", ), - }), - async execute(params, ctx) { + }) + + return { + description: DESCRIPTION.replaceAll("${directory}", Instance.directory) + .replaceAll("${maxLines}", String(Truncate.MAX_LINES)) + .replaceAll("${maxBytes}", String(maxBytes)), + parameters, + async execute(params: z.infer, ctx) { const cwd = params.workdir || Instance.directory if (params.timeout !== undefined && params.timeout < 0) { throw new Error(`Invalid timeout value: ${params.timeout}. Timeout must be a positive number.`) @@ -179,7 +182,7 @@ export const BashTool = Tool.define("bash", async () => { ctx.metadata({ metadata: { // truncate the metadata to avoid GIANT blobs of data (has nothing to do w/ what agent can access) - output: output.length > MAX_METADATA_LENGTH ? output.slice(0, MAX_METADATA_LENGTH) + "\n\n..." : output, + output: output.length > maxMetadata ? output.slice(0, maxMetadata) + "\n\n..." : output, description: params.description, }, }) @@ -247,7 +250,7 @@ export const BashTool = Tool.define("bash", async () => { return { title: params.description, metadata: { - output: output.length > MAX_METADATA_LENGTH ? output.slice(0, MAX_METADATA_LENGTH) + "\n\n..." : output, + output: output.length > maxMetadata ? output.slice(0, maxMetadata) + "\n\n..." : output, exit: proc.exitCode, description: params.description, }, diff --git a/packages/opencode/src/tool/registry.ts b/packages/opencode/src/tool/registry.ts index dad9914a289..b97e3ffc62b 100644 --- a/packages/opencode/src/tool/registry.ts +++ b/packages/opencode/src/tool/registry.ts @@ -26,6 +26,7 @@ import { Log } from "@/util/log" import { LspTool } from "./lsp" import { Truncate } from "./truncation" import { PlanExitTool, PlanEnterTool } from "./plan" +import type { Provider } from "../provider/provider" import { ApplyPatchTool } from "./apply_patch" export namespace ToolRegistry { @@ -68,7 +69,7 @@ export namespace ToolRegistry { description: def.description, execute: async (args, ctx) => { const result = await def.execute(args as any, ctx) - const out = await Truncate.output(result, {}, initCtx?.agent) + const out = await Truncate.output(result, { model: initCtx?.model }, initCtx?.agent) return { title: "", output: out.truncated ? out.content : result, @@ -127,6 +128,7 @@ export namespace ToolRegistry { modelID: string }, agent?: Agent.Info, + fullModel?: Provider.Model, ) { const tools = await all() const result = await Promise.all( @@ -154,7 +156,7 @@ export namespace ToolRegistry { using _ = log.time(t.id) return { id: t.id, - ...(await t.init({ agent })), + ...(await t.init({ agent, model: fullModel })), } }), ) diff --git a/packages/opencode/src/tool/tool.ts b/packages/opencode/src/tool/tool.ts index 78ab325af41..111471829d3 100644 --- a/packages/opencode/src/tool/tool.ts +++ b/packages/opencode/src/tool/tool.ts @@ -3,6 +3,7 @@ import type { MessageV2 } from "../session/message-v2" import type { Agent } from "../agent/agent" import type { PermissionNext } from "../permission/next" import { Truncate } from "./truncation" +import type { Provider } from "../provider/provider" export namespace Tool { interface Metadata { @@ -11,6 +12,7 @@ export namespace Tool { export interface InitContext { agent?: Agent.Info + model?: Provider.Model } export type Context = { @@ -70,7 +72,7 @@ export namespace Tool { if (result.metadata.truncated !== undefined) { return result } - const truncated = await Truncate.output(result.output, {}, initCtx?.agent) + const truncated = await Truncate.output(result.output, { model: initCtx?.model }, initCtx?.agent) return { ...result, output: truncated.content, diff --git a/packages/opencode/src/tool/truncation.ts b/packages/opencode/src/tool/truncation.ts index 84e799c1310..18427de0bf8 100644 --- a/packages/opencode/src/tool/truncation.ts +++ b/packages/opencode/src/tool/truncation.ts @@ -4,22 +4,65 @@ import { Global } from "../global" import { Identifier } from "../id/id" import { PermissionNext } from "../permission/next" import type { Agent } from "../agent/agent" +import type { Provider } from "../provider/provider" import { Scheduler } from "../scheduler" export namespace Truncate { export const MAX_LINES = 2000 - export const MAX_BYTES = 50 * 1024 + export const MAX_BYTES = 50 * 1024 // Fallback default + export const MAX_METADATA = 30_000 // Fallback default export const DIR = path.join(Global.Path.data, "tool-output") export const GLOB = path.join(DIR, "*") const RETENTION_MS = 7 * 24 * 60 * 60 * 1000 // 7 days const HOUR_MS = 60 * 60 * 1000 + /** + * Calculate max bytes for tool output based on model's context size. + * Automatically scales limits based on model capabilities. + * + * Formula: context * 0.05 * 4 + * - Uses 5% of model's context window for tool output + * - Converts tokens to bytes (4 chars per token) + * + * Examples: + * - GPT-4 (128k): 25.6KB output limit + * - Claude (200k): 40KB output limit + * - Gemini (2M): 400KB output limit + * + * Bounds: min 10KB, max 2MB + * + * Note: This is different from compaction.maxContext + * - compaction.maxContext: Limits total conversation context + * - getMaxBytes: Limits individual tool call output + */ + export function getMaxBytes(model?: Provider.Model): number { + if (!model?.limit?.context) return MAX_BYTES + const contextLimit = model.limit.context + if (contextLimit === 0) return MAX_BYTES + + // 5% of context converted to bytes (4 chars per token) + const calculated = Math.floor(contextLimit * 0.05 * 4) + + // Minimum 10KB, maximum 2MB + return Math.max(10 * 1024, Math.min(calculated, 2 * 1024 * 1024)) + } + + /** + * Calculate max metadata bytes (60% of max output bytes). + * Metadata is shown in UI while output goes to the model. + * Using 60% prevents UI from being overwhelmed with large outputs. + */ + export function getMaxMetadata(model?: Provider.Model): number { + return Math.floor(getMaxBytes(model) * 0.6) + } + export type Result = { content: string; truncated: false } | { content: string; truncated: true; outputPath: string } export interface Options { maxLines?: number maxBytes?: number direction?: "head" | "tail" + model?: Provider.Model } export function init() { @@ -49,7 +92,7 @@ export namespace Truncate { export async function output(text: string, options: Options = {}, agent?: Agent.Info): Promise { const maxLines = options.maxLines ?? MAX_LINES - const maxBytes = options.maxBytes ?? MAX_BYTES + const maxBytes = options.maxBytes ?? getMaxBytes(options.model) const direction = options.direction ?? "head" const lines = text.split("\n") const totalBytes = Buffer.byteLength(text, "utf-8") diff --git a/packages/opencode/test/config/config.test.ts b/packages/opencode/test/config/config.test.ts index 0463d29d7c5..ea54c807fbd 100644 --- a/packages/opencode/test/config/config.test.ts +++ b/packages/opencode/test/config/config.test.ts @@ -1349,6 +1349,155 @@ describe("getPluginName", () => { }) }) +// Compaction config tests + +test("handles compaction threshold config", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + await Bun.write( + path.join(dir, "opencode.json"), + JSON.stringify({ + $schema: "https://opencode.ai/config.json", + compaction: { + auto: true, + threshold: 0.9, + }, + }), + ) + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const config = await Config.get() + expect(config.compaction?.auto).toBe(true) + expect(config.compaction?.threshold).toBe(0.9) + }, + }) +}) + +test("handles compaction maxContext config", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + await Bun.write( + path.join(dir, "opencode.json"), + JSON.stringify({ + $schema: "https://opencode.ai/config.json", + compaction: { + auto: true, + maxContext: 100000, + }, + }), + ) + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const config = await Config.get() + expect(config.compaction?.auto).toBe(true) + expect(config.compaction?.maxContext).toBe(100000) + }, + }) +}) + +test("handles full compaction config", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + await Bun.write( + path.join(dir, "opencode.json"), + JSON.stringify({ + $schema: "https://opencode.ai/config.json", + compaction: { + auto: true, + prune: true, + threshold: 0.85, + maxContext: 50000, + }, + }), + ) + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const config = await Config.get() + expect(config.compaction).toEqual({ + auto: true, + prune: true, + threshold: 0.85, + maxContext: 50000, + }) + }, + }) +}) + +test("rejects invalid compaction threshold (below 0.5)", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + await Bun.write( + path.join(dir, "opencode.json"), + JSON.stringify({ + $schema: "https://opencode.ai/config.json", + compaction: { + threshold: 0.3, + }, + }), + ) + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + await expect(Config.get()).rejects.toThrow() + }, + }) +}) + +test("rejects invalid compaction threshold (above 0.99)", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + await Bun.write( + path.join(dir, "opencode.json"), + JSON.stringify({ + $schema: "https://opencode.ai/config.json", + compaction: { + threshold: 1.0, + }, + }), + ) + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + await expect(Config.get()).rejects.toThrow() + }, + }) +}) + +test("rejects invalid compaction maxContext (negative)", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + await Bun.write( + path.join(dir, "opencode.json"), + JSON.stringify({ + $schema: "https://opencode.ai/config.json", + compaction: { + maxContext: -1000, + }, + }), + ) + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + await expect(Config.get()).rejects.toThrow() + }, + }) +}) + describe("deduplicatePlugins", () => { test("removes duplicates keeping higher priority (later entries)", () => { const plugins = ["global-plugin@1.0.0", "shared-plugin@1.0.0", "local-plugin@2.0.0", "shared-plugin@2.0.0"] diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts index 2e9c091870e..76230c3f22a 100644 --- a/packages/opencode/test/session/compaction.test.ts +++ b/packages/opencode/test/session/compaction.test.ts @@ -1,6 +1,7 @@ import { describe, expect, test } from "bun:test" import path from "path" import { SessionCompaction } from "../../src/session/compaction" +import { LLM } from "../../src/session/llm" import { Token } from "../../src/util/token" import { Instance } from "../../src/project/instance" import { Log } from "../../src/util/log" @@ -34,11 +35,194 @@ function createModel(opts: { input: { text: true, image: false, audio: false, video: false }, output: { text: true, image: false, audio: false, video: false }, }, - api: { npm: "@ai-sdk/anthropic" }, + api: { id: "anthropic", npm: "@ai-sdk/anthropic" }, options: {}, } as Provider.Model } +describe("session.compaction.shouldCompact", () => { + test("returns needed=true when estimated tokens exceed threshold", async () => { + await using tmp = await tmpdir() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const model = createModel({ context: 100_000, output: 32_000 }) + const agent = { permission: [] } as any + const messages = [ + { role: "user" as const, content: "x".repeat(400_000) }, // ~100k tokens + ] + + const result = await SessionCompaction.shouldCompact({ model, agent, messages }) + + expect(result.needed).toBe(true) + expect(result.contextLimit).toBe(100_000) + expect(result.threshold).toBe(0.9) + expect(result.estimatedTokens).toBeGreaterThan(result.contextLimit * result.threshold) + }, + }) + }) + + test("returns needed=false when estimated tokens under threshold", async () => { + await using tmp = await tmpdir() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const model = createModel({ context: 200_000, output: 32_000 }) + const agent = { permission: [] } as any + const messages = [ + { role: "user" as const, content: "x".repeat(100_000) }, // ~25k tokens + ] + + const result = await SessionCompaction.shouldCompact({ model, agent, messages }) + + expect(result.needed).toBe(false) + expect(result.estimatedTokens).toBeLessThanOrEqual(result.contextLimit * result.threshold) + }, + }) + }) + + test("returns needed=false when model context limit is 0", async () => { + await using tmp = await tmpdir() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const model = createModel({ context: 0, output: 32_000 }) + const agent = { permission: [] } as any + const messages = [ + { role: "user" as const, content: "x".repeat(400_000) }, + ] + + const result = await SessionCompaction.shouldCompact({ model, agent, messages }) + + expect(result.needed).toBe(false) + expect(result.contextLimit).toBe(0) + }, + }) + }) + + test("respects maxContext when set lower than model context", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + await Bun.write( + path.join(dir, "opencode.json"), + JSON.stringify({ + compaction: { maxContext: 50_000 }, + }), + ) + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const model = createModel({ context: 200_000, output: 32_000 }) + const agent = { permission: [] } as any + const messages = [ + { role: "user" as const, content: "x".repeat(200_000) }, // ~50k tokens + ] + + const result = await SessionCompaction.shouldCompact({ model, agent, messages }) + + expect(result.contextLimit).toBe(50_000) + expect(result.needed).toBe(true) // 50k tokens > 50k * 0.9 = 45k + }, + }) + }) + + test("uses model context when maxContext is higher", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + await Bun.write( + path.join(dir, "opencode.json"), + JSON.stringify({ + compaction: { maxContext: 500_000 }, + }), + ) + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const model = createModel({ context: 100_000, output: 32_000 }) + const agent = { permission: [] } as any + const messages = [ + { role: "user" as const, content: "x".repeat(380_000) }, // ~95k tokens + ] + + const result = await SessionCompaction.shouldCompact({ model, agent, messages }) + + expect(result.contextLimit).toBe(100_000) // Should use model's lower limit + expect(result.needed).toBe(true) // 95k > 100k * 0.9 = 90k + }, + }) + }) + + test("respects custom threshold from config", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + await Bun.write( + path.join(dir, "opencode.json"), + JSON.stringify({ + compaction: { threshold: 0.8 }, + }), + ) + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const model = createModel({ context: 100_000, output: 32_000 }) + const agent = { permission: [] } as any + const messages = [ + { role: "user" as const, content: "x".repeat(340_000) }, // ~85k tokens + ] + + const result = await SessionCompaction.shouldCompact({ model, agent, messages }) + + expect(result.threshold).toBe(0.8) + expect(result.needed).toBe(true) // 85k > 100k * 0.8 = 80k + }, + }) + }) + + test("uses input limit when available", async () => { + await using tmp = await tmpdir() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const model = createModel({ context: 400_000, input: 272_000, output: 128_000 }) + const agent = { permission: [] } as any + const messages = [ + { role: "user" as const, content: "x".repeat(1_000_000) }, // ~250k tokens + ] + + const result = await SessionCompaction.shouldCompact({ model, agent, messages }) + + expect(result.contextLimit).toBe(272_000) // Should use input limit + expect(result.needed).toBe(true) // 250k > 272k * 0.9 = 244.8k + }, + }) + }) + + test("returns correct estimatedTokens value", async () => { + await using tmp = await tmpdir() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const model = createModel({ context: 100_000, output: 32_000 }) + const agent = { permission: [] } as any + const messages = [ + { role: "user" as const, content: "x".repeat(4000) }, // exactly 1000 tokens + ] + + const result = await SessionCompaction.shouldCompact({ model, agent, messages }) + + expect(result.estimatedTokens).toBeGreaterThan(0) + // Should be around 1000 tokens plus system prompt + }, + }) + }) +}) + describe("session.compaction.isOverflow", () => { test("returns true when token count exceeds usable context", async () => { await using tmp = await tmpdir() @@ -144,6 +328,75 @@ describe("session.compaction.isOverflow", () => { }, }) }) + + test("respects maxContext when set lower than model context", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + await Bun.write( + path.join(dir, "opencode.json"), + JSON.stringify({ + compaction: { maxContext: 50_000 }, + }), + ) + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + // Model has 200k context, but maxContext limits to 50k + const model = createModel({ context: 200_000, output: 32_000 }) + // 30k tokens would be fine for 200k context, but exceeds 50k - 32k = 18k usable + const tokens = { input: 20_000, output: 5_000, reasoning: 0, cache: { read: 0, write: 0 } } + expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true) + }, + }) + }) + + test("uses model context when maxContext is higher", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + await Bun.write( + path.join(dir, "opencode.json"), + JSON.stringify({ + compaction: { maxContext: 500_000 }, + }), + ) + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + // maxContext is 500k but model only has 100k + const model = createModel({ context: 100_000, output: 32_000 }) + const tokens = { input: 75_000, output: 5_000, reasoning: 0, cache: { read: 0, write: 0 } } + // Should still overflow based on model's 100k limit + expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true) + }, + }) + }) + + test("maxContext works with input limit", async () => { + await using tmp = await tmpdir({ + init: async (dir) => { + await Bun.write( + path.join(dir, "opencode.json"), + JSON.stringify({ + compaction: { maxContext: 100_000 }, + }), + ) + }, + }) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + // Model has input limit of 272k, but maxContext is 100k + const model = createModel({ context: 400_000, input: 272_000, output: 128_000 }) + // 90k tokens would be fine for 272k input limit, but should respect maxContext + const tokens = { input: 90_000, output: 10_000, reasoning: 0, cache: { read: 5_000, write: 0 } } + expect(await SessionCompaction.isOverflow({ tokens, model })).toBe(true) + }, + }) + }) }) describe("util.token.estimate", () => { @@ -291,3 +544,60 @@ describe("session.getUsage", () => { expect(result.cost).toBe(3 + 1.5) }) }) + +describe("LLM.estimateInputTokens", () => { + test("estimates tokens from string content messages", () => { + const messages = [ + { role: "user" as const, content: "x".repeat(1000) }, + { role: "assistant" as const, content: "y".repeat(500) }, + ] + const systemPrompt = ["z".repeat(200)] + const result = LLM.estimateInputTokens(messages, systemPrompt) + // Total chars: 1000 + 500 + 200 = 1700 + // Tokens: Math.ceil(1700 / 4) = 425 + expect(result).toBe(425) + }) + + test("estimates tokens from array content messages", () => { + const messages = [ + { + role: "user" as const, + content: [{ type: "text" as const, text: "x".repeat(800) }], + }, + ] + const systemPrompt: string[] = [] + const result = LLM.estimateInputTokens(messages, systemPrompt) + // 800 chars / 4 = 200 tokens + expect(result).toBe(200) + }) + + test("estimates tokens for images", () => { + const messages = [ + { + role: "user" as const, + content: [ + { type: "text" as const, text: "describe this" }, + { type: "image" as const, image: new URL("https://example.com/img.png") }, + ], + }, + ] + const systemPrompt: string[] = [] + const result = LLM.estimateInputTokens(messages, systemPrompt) + // "describe this" (13 chars) + image (2000 * 4 = 8000 chars) = 8013 chars + // Math.ceil(8013 / 4) = 2004 tokens + expect(result).toBe(2004) + }) + + test("handles empty messages", () => { + const result = LLM.estimateInputTokens([], []) + expect(result).toBe(0) + }) + + test("handles multiple system prompts", () => { + const messages: { role: "user" | "assistant"; content: string }[] = [] + const systemPrompt = ["prompt1".repeat(100), "prompt2".repeat(50)] + const result = LLM.estimateInputTokens(messages, systemPrompt) + // (700 + 350) / 4 = 262.5 → Math.ceil = 263 + expect(result).toBe(263) + }) +}) diff --git a/packages/opencode/test/tool/truncation.test.ts b/packages/opencode/test/tool/truncation.test.ts index 09222f279fa..7f7560065e6 100644 --- a/packages/opencode/test/tool/truncation.test.ts +++ b/packages/opencode/test/tool/truncation.test.ts @@ -122,6 +122,105 @@ describe("Truncate", () => { }) }) + describe("getMaxBytes", () => { + test("returns fallback when no model provided", () => { + const result = Truncate.getMaxBytes() + expect(result).toBe(Truncate.MAX_BYTES) + }) + + test("returns fallback when model has no context limit", () => { + const model = { limit: {} } as any + const result = Truncate.getMaxBytes(model) + expect(result).toBe(Truncate.MAX_BYTES) + }) + + test("returns fallback when context limit is 0", () => { + const model = { limit: { context: 0 } } as any + const result = Truncate.getMaxBytes(model) + expect(result).toBe(Truncate.MAX_BYTES) + }) + + test("returns minimum 10KB for very small models", () => { + const model = { limit: { context: 1000 } } as any + const result = Truncate.getMaxBytes(model) + expect(result).toBe(10 * 1024) // Should hit minimum + }) + + test("calculates correctly for GPT-4 (128k context)", () => { + const model = { limit: { context: 128_000 } } as any + const result = Truncate.getMaxBytes(model) + // 128000 * 0.05 * 4 = 25600 + expect(result).toBe(25_600) + }) + + test("calculates correctly for Claude (200k context)", () => { + const model = { limit: { context: 200_000 } } as any + const result = Truncate.getMaxBytes(model) + // 200000 * 0.05 * 4 = 40000 + expect(result).toBe(40_000) + }) + + test("calculates correctly for Gemini (2M context)", () => { + const model = { limit: { context: 2_000_000 } } as any + const result = Truncate.getMaxBytes(model) + // 2000000 * 0.05 * 4 = 400000 (400KB, well under 2MB cap) + expect(result).toBe(400_000) + }) + + test("caps at maximum 2MB for extremely large models", () => { + const model = { limit: { context: 20_000_000 } } as any + const result = Truncate.getMaxBytes(model) + // Would calculate to 4000000 (4MB), but should cap at 2MB + expect(result).toBe(2 * 1024 * 1024) + }) + + test("uses 5% of context converted to bytes (4 chars/token)", () => { + const model = { limit: { context: 100_000 } } as any + const result = Truncate.getMaxBytes(model) + // 100000 * 0.05 * 4 = 20000 + expect(result).toBe(20_000) + }) + }) + + describe("getMaxMetadata", () => { + test("returns 60% of max bytes when no model provided", () => { + const result = Truncate.getMaxMetadata() + expect(result).toBe(Math.floor(Truncate.MAX_BYTES * 0.6)) + }) + + test("returns 60% of calculated max bytes for GPT-4", () => { + const model = { limit: { context: 128_000 } } as any + const result = Truncate.getMaxMetadata(model) + const maxBytes = Truncate.getMaxBytes(model) + expect(result).toBe(Math.floor(maxBytes * 0.6)) + expect(result).toBe(15_360) // 25600 * 0.6 + }) + + test("returns 60% of calculated max bytes for Claude", () => { + const model = { limit: { context: 200_000 } } as any + const result = Truncate.getMaxMetadata(model) + const maxBytes = Truncate.getMaxBytes(model) + expect(result).toBe(Math.floor(maxBytes * 0.6)) + expect(result).toBe(24_000) // 40000 * 0.6 + }) + + test("returns 60% of calculated max bytes for Gemini", () => { + const model = { limit: { context: 2_000_000 } } as any + const result = Truncate.getMaxMetadata(model) + const maxBytes = Truncate.getMaxBytes(model) + expect(result).toBe(Math.floor(maxBytes * 0.6)) + expect(result).toBe(240_000) // 400000 * 0.6 + }) + + test("returns 60% of capped max bytes for extremely large models", () => { + const model = { limit: { context: 20_000_000 } } as any + const result = Truncate.getMaxMetadata(model) + const maxBytes = Truncate.getMaxBytes(model) + expect(result).toBe(Math.floor(maxBytes * 0.6)) + expect(result).toBe(Math.floor(2 * 1024 * 1024 * 0.6)) + }) + }) + describe("cleanup", () => { const DAY_MS = 24 * 60 * 60 * 1000 let oldFile: string diff --git a/packages/sdk/js/src/v2/gen/types.gen.ts b/packages/sdk/js/src/v2/gen/types.gen.ts index b7e72fbad8f..a126d02b7f0 100644 --- a/packages/sdk/js/src/v2/gen/types.gen.ts +++ b/packages/sdk/js/src/v2/gen/types.gen.ts @@ -1757,6 +1757,14 @@ export type Config = { * Enable pruning of old tool outputs (default: true) */ prune?: boolean + /** + * Percentage of context window to trigger compaction (default: 0.9). Value between 0.5 and 0.99. + */ + threshold?: number + /** + * Override the model's context limit. Useful for limiting context usage on models with large context windows. + */ + maxContext?: number } experimental?: { hook?: {