diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index fb3825302918..8499e02abd84 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -43,6 +43,74 @@ export namespace SessionCompaction { const PRUNE_PROTECTED_TOOLS = ["skill"] + // Reserve tokens for compaction prompt + const COMPACTION_PROMPT_RESERVE = 2000 + + function estimateMessageTokens(msg: MessageV2.WithParts): number { + let chars = 0 + for (const part of msg.parts) { + if (part.type === "text") chars += part.text.length + if (part.type === "reasoning") chars += part.text.length + if (part.type === "tool" && part.state.status === "completed") { + if (!part.state.time.compacted) chars += part.state.output.length + chars += JSON.stringify(part.state.input).length + } + if (part.type === "tool" && part.state.status === "error") { + chars += part.state.error.length + } + } + return Token.estimate(String.fromCharCode(0).repeat(chars)) + } + + // truncates messages to fit within context window for compaction. + // prioritizes recent messages while preserving summary messages. + export function truncateForCompaction( + messages: MessageV2.WithParts[], + model: Provider.Model + ): MessageV2.WithParts[] { + const outputReserve = Math.min(model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) || SessionPrompt.OUTPUT_TOKEN_MAX + const inputLimit = model.limit.input || model.limit.context - outputReserve + const targetLimit = inputLimit - COMPACTION_PROMPT_RESERVE + + if (targetLimit <= 0) return messages + + // collect summary messages first + const summaryMessages: MessageV2.WithParts[] = [] + let summaryTokens = 0 + for (const msg of messages) { + if (msg.info.role === "assistant" && (msg.info as MessageV2.Assistant).summary) { + summaryMessages.push(msg) + summaryTokens += estimateMessageTokens(msg) + } + } + + if (summaryTokens >= targetLimit) return summaryMessages + + // add messages from end until limit reached + const result: MessageV2.WithParts[] = [] + let estimatedTokens = summaryTokens + const summaryIds = new Set(summaryMessages.map((m) => m.info.id)) + + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i] + if (summaryIds.has(msg.info.id)) continue + + const msgTokens = estimateMessageTokens(msg) + if (estimatedTokens + msgTokens > targetLimit) { + log.info("truncateForCompaction", { included: result.length, skipped: i + 1 }) + break + } + + result.unshift(msg) + estimatedTokens += msgTokens + } + + const finalResult = [...summaryMessages, ...result] + finalResult.sort((a, b) => (a.info.id > b.info.id ? 1 : -1)) + + return finalResult + } + // goes backwards through parts until there are 40_000 tokens worth of tool // calls. then erases output of previous tool calls. idea is to throw away old // tool calls that are no longer relevant. @@ -149,7 +217,7 @@ export namespace SessionCompaction { tools: {}, system: [], messages: [ - ...MessageV2.toModelMessages(input.messages, model), + ...MessageV2.toModelMessages(truncateForCompaction(input.messages, model), model), { role: "user", content: [ diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts index 2e9c091870ed..975352859ed0 100644 --- a/packages/opencode/test/session/compaction.test.ts +++ b/packages/opencode/test/session/compaction.test.ts @@ -146,6 +146,161 @@ describe("session.compaction.isOverflow", () => { }) }) +describe("session.compaction.truncateForCompaction", () => { + function createMessage(opts: { + id: string + role: "user" | "assistant" + textLength: number + isSummary?: boolean + }): import("../../src/session/message-v2").MessageV2.WithParts { + const baseInfo = { + id: opts.id, + sessionID: "test-session", + } + + const parts: import("../../src/session/message-v2").MessageV2.Part[] = [ + { + id: `part-${opts.id}`, + sessionID: "test-session", + messageID: opts.id, + type: "text" as const, + text: "x".repeat(opts.textLength), + }, + ] + + if (opts.role === "user") { + return { + info: { + ...baseInfo, + role: "user" as const, + time: { created: Date.now() }, + agent: "test-agent", + model: { providerID: "test", modelID: "test-model" }, + }, + parts, + } + } + + return { + info: { + ...baseInfo, + role: "assistant" as const, + time: { created: Date.now() }, + parentID: "parent", + modelID: "test-model", + providerID: "test", + mode: "test", + agent: "test-agent", + path: { cwd: "/", root: "/" }, + cost: 0, + tokens: { input: 0, output: 0, reasoning: 0, cache: { read: 0, write: 0 } }, + summary: opts.isSummary ?? false, + }, + parts, + } + } + + test("returns all messages when within limit", async () => { + await using tmp = await tmpdir() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const model = createModel({ context: 100_000, output: 8_000 }) + const messages = [ + createMessage({ id: "1", role: "user", textLength: 1000 }), + createMessage({ id: "2", role: "assistant", textLength: 1000 }), + ] + + const result = SessionCompaction.truncateForCompaction(messages, model) + expect(result.length).toBe(2) + }, + }) + }) + + test("truncates messages when exceeding limit", async () => { + await using tmp = await tmpdir() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + // Model with 10k context, 2k output = 8k input, minus 2k reserve = 6k usable + // Each message with 8000 chars = ~2000 tokens + const model = createModel({ context: 10_000, output: 2_000 }) + const messages = [ + createMessage({ id: "1", role: "user", textLength: 8000 }), + createMessage({ id: "2", role: "assistant", textLength: 8000 }), + createMessage({ id: "3", role: "user", textLength: 8000 }), + createMessage({ id: "4", role: "assistant", textLength: 8000 }), + ] + + const result = SessionCompaction.truncateForCompaction(messages, model) + expect(result.length).toBeLessThan(messages.length) + }, + }) + }) + + test("preserves summary messages even when truncating", async () => { + await using tmp = await tmpdir() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const model = createModel({ context: 10_000, output: 2_000 }) + const messages = [ + createMessage({ id: "1", role: "assistant", textLength: 1000, isSummary: true }), + createMessage({ id: "2", role: "user", textLength: 8000 }), + createMessage({ id: "3", role: "assistant", textLength: 8000 }), + createMessage({ id: "4", role: "user", textLength: 8000 }), + ] + + const result = SessionCompaction.truncateForCompaction(messages, model) + const hasSummary = result.some( + (m) => m.info.role === "assistant" && (m.info as any).summary === true + ) + expect(hasSummary).toBe(true) + }, + }) + }) + + test("prioritizes recent messages over older ones", async () => { + await using tmp = await tmpdir() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const model = createModel({ context: 10_000, output: 2_000 }) + const messages = [ + createMessage({ id: "1", role: "user", textLength: 8000 }), + createMessage({ id: "2", role: "assistant", textLength: 8000 }), + createMessage({ id: "3", role: "user", textLength: 4000 }), + createMessage({ id: "4", role: "assistant", textLength: 4000 }), + ] + + const result = SessionCompaction.truncateForCompaction(messages, model) + const hasMessage4 = result.some((m) => m.info.id === "4") + expect(hasMessage4).toBe(true) + }, + }) + }) + + test("maintains chronological order after truncation", async () => { + await using tmp = await tmpdir() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const model = createModel({ context: 10_000, output: 2_000 }) + const messages = [ + createMessage({ id: "1", role: "user", textLength: 4000 }), + createMessage({ id: "2", role: "assistant", textLength: 4000 }), + createMessage({ id: "3", role: "user", textLength: 4000 }), + ] + + const result = SessionCompaction.truncateForCompaction(messages, model) + for (let i = 1; i < result.length; i++) { + expect(result[i].info.id > result[i - 1].info.id).toBe(true) + } + }, + }) + }) +}) + describe("util.token.estimate", () => { test("estimates tokens from text (4 chars per token)", () => { const text = "x".repeat(4000)