diff --git a/src/core/context-management/__tests__/context-management.spec.ts b/src/core/context-management/__tests__/context-management.spec.ts index 0ed8f94ed05..3ee36fc5956 100644 --- a/src/core/context-management/__tests__/context-management.spec.ts +++ b/src/core/context-management/__tests__/context-management.spec.ts @@ -1407,4 +1407,97 @@ describe("Context Management", () => { expect(resultWithLastMessage).toBe(true) }) }) + + /** + * Tests for newContextTokensAfterTruncation including system prompt + */ + describe("newContextTokensAfterTruncation", () => { + const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({ + contextWindow, + supportsPromptCache: true, + maxTokens, + }) + + it("should include system prompt tokens in newContextTokensAfterTruncation", async () => { + const modelInfo = createModelInfo(100000, 30000) + const totalTokens = 70001 // Above threshold to trigger truncation + + const messages: ApiMessage[] = [ + { role: "user", content: "First message" }, + { role: "assistant", content: "Second message" }, + { role: "user", content: "Third message" }, + { role: "assistant", content: "Fourth message" }, + { role: "user", content: "" }, // Small content in last message + ] + + const systemPrompt = "You are a helpful assistant. Follow these rules carefully." + + const result = await manageContext({ + messages, + totalTokens, + contextWindow: modelInfo.contextWindow, + maxTokens: modelInfo.maxTokens, + apiHandler: mockApiHandler, + autoCondenseContext: false, + autoCondenseContextPercent: 100, + systemPrompt, + taskId, + profileThresholds: {}, + currentProfileId: "default", + }) + + // Should have truncation + expect(result.truncationId).toBeDefined() + expect(result.newContextTokensAfterTruncation).toBeDefined() + + // The newContextTokensAfterTruncation should include system prompt tokens + // Count system prompt tokens to verify + const systemPromptTokens = await estimateTokenCount([{ type: "text", text: systemPrompt }], mockApiHandler) + expect(systemPromptTokens).toBeGreaterThan(0) + + // newContextTokensAfterTruncation should be >= system prompt tokens + // (since it includes system prompt + remaining message tokens) + expect(result.newContextTokensAfterTruncation).toBeGreaterThanOrEqual(systemPromptTokens) + }) + + it("should produce consistent prev vs new token comparison (both including system prompt)", async () => { + const modelInfo = createModelInfo(100000, 30000) + const totalTokens = 70001 // Above threshold to trigger truncation + + const messages: ApiMessage[] = [ + { role: "user", content: "First message" }, + { role: "assistant", content: "Second message" }, + { role: "user", content: "Third message" }, + { role: "assistant", content: "Fourth message" }, + { role: "user", content: "" }, // Small content in last message + ] + + const systemPrompt = "System prompt for testing" + + const result = await manageContext({ + messages, + totalTokens, + contextWindow: modelInfo.contextWindow, + maxTokens: modelInfo.maxTokens, + apiHandler: mockApiHandler, + autoCondenseContext: false, + autoCondenseContextPercent: 100, + systemPrompt, + taskId, + profileThresholds: {}, + currentProfileId: "default", + }) + + // After truncation, newContextTokensAfterTruncation should be less than prevContextTokens + // because we removed some messages + expect(result.newContextTokensAfterTruncation).toBeDefined() + expect(result.newContextTokensAfterTruncation).toBeLessThan(result.prevContextTokens) + + // But newContextTokensAfterTruncation should still be a reasonable value + // (not near-zero like the bug showed) - it should be at least + // a significant fraction of prevContextTokens after 50% truncation + // With system prompt included, we expect roughly 50% of the messages remaining + expect(result.newContextTokensAfterTruncation).toBeGreaterThan(0) + }) + }) }) diff --git a/src/core/context-management/index.ts b/src/core/context-management/index.ts index 993c69a3657..a94a53c9d5a 100644 --- a/src/core/context-management/index.ts +++ b/src/core/context-management/index.ts @@ -323,7 +323,14 @@ export async function manageContext({ const effectiveMessages = truncationResult.messages.filter( (msg) => !msg.truncationParent && !msg.isTruncationMarker, ) - let newContextTokensAfterTruncation = 0 + + // Include system prompt tokens so this value matches what we send to the API. + // Note: `prevContextTokens` is computed locally here (totalTokens + lastMessageTokens). + let newContextTokensAfterTruncation = await estimateTokenCount( + [{ type: "text", text: systemPrompt }], + apiHandler, + ) + for (const msg of effectiveMessages) { const content = msg.content if (Array.isArray(content)) { diff --git a/src/utils/__tests__/tiktoken.spec.ts b/src/utils/__tests__/tiktoken.spec.ts index c0596a60aba..bae81adcf2a 100644 --- a/src/utils/__tests__/tiktoken.spec.ts +++ b/src/utils/__tests__/tiktoken.spec.ts @@ -134,4 +134,163 @@ describe("tiktoken", () => { // Both calls should return the same token count expect(result1).toBe(result2) }) + + describe("tool_use blocks", () => { + it("should count tokens for tool_use blocks with simple arguments", async () => { + const content = [ + { + type: "tool_use", + id: "tool_123", + name: "read_file", + input: { path: "/src/main.ts" }, + }, + ] as Anthropic.Messages.ContentBlockParam[] + + const result = await tiktoken(content) + // Should return a positive token count for the serialized tool call + expect(result).toBeGreaterThan(0) + }) + + it("should count tokens for tool_use blocks with complex arguments", async () => { + const content = [ + { + type: "tool_use", + id: "tool_456", + name: "write_to_file", + input: { + path: "/src/components/Button.tsx", + content: + "import React from 'react';\n\nexport const Button = ({ children, onClick }) => {\n return ;\n};", + }, + }, + ] as Anthropic.Messages.ContentBlockParam[] + + const result = await tiktoken(content) + // Should return a token count reflecting the larger content + expect(result).toBeGreaterThan(10) + }) + + it("should handle tool_use blocks with empty input", async () => { + const content = [ + { + type: "tool_use", + id: "tool_789", + name: "list_files", + input: {}, + }, + ] as Anthropic.Messages.ContentBlockParam[] + + const result = await tiktoken(content) + // Should still count the tool name (and empty args) + expect(result).toBeGreaterThan(0) + }) + }) + + describe("tool_result blocks", () => { + it("should count tokens for tool_result blocks with string content", async () => { + const content = [ + { + type: "tool_result", + tool_use_id: "tool_123", + content: "File content: export const foo = 'bar';", + }, + ] as Anthropic.Messages.ContentBlockParam[] + + const result = await tiktoken(content) + // Should return a positive token count + expect(result).toBeGreaterThan(0) + }) + + it("should count tokens for tool_result blocks with array content", async () => { + const content = [ + { + type: "tool_result", + tool_use_id: "tool_456", + content: [ + { type: "text", text: "First part of the result" }, + { type: "text", text: "Second part of the result" }, + ], + }, + ] as Anthropic.Messages.ContentBlockParam[] + + const result = await tiktoken(content) + // Should count tokens from all text parts + expect(result).toBeGreaterThan(0) + }) + + it("should count tokens for tool_result blocks with error flag", async () => { + const content = [ + { + type: "tool_result", + tool_use_id: "tool_789", + is_error: true, + content: "Error: File not found", + }, + ] as Anthropic.Messages.ContentBlockParam[] + + const result = await tiktoken(content) + // Should include the error indicator and content + expect(result).toBeGreaterThan(0) + }) + + it("should handle tool_result blocks with image content in array", async () => { + const content = [ + { + type: "tool_result", + tool_use_id: "tool_abc", + content: [ + { type: "text", text: "Screenshot captured" }, + { type: "image", source: { type: "base64", media_type: "image/png", data: "abc123" } }, + ], + }, + ] as Anthropic.Messages.ContentBlockParam[] + + const result = await tiktoken(content) + // Should count text and include placeholder for images + expect(result).toBeGreaterThan(0) + }) + }) + + describe("mixed content with tools", () => { + it("should count tokens for conversation with tool_use and tool_result", async () => { + const content = [ + { type: "text", text: "Let me read that file for you." }, + { + type: "tool_use", + id: "tool_123", + name: "read_file", + input: { path: "/src/index.ts" }, + }, + ] as Anthropic.Messages.ContentBlockParam[] + + const result = await tiktoken(content) + // Should count both text and tool_use tokens + expect(result).toBeGreaterThan(5) + }) + + it("should produce larger count for tool_result with large content vs small content", async () => { + const smallContent = [ + { + type: "tool_result", + tool_use_id: "tool_1", + content: "OK", + }, + ] as Anthropic.Messages.ContentBlockParam[] + + const largeContent = [ + { + type: "tool_result", + tool_use_id: "tool_2", + content: + "This is a much longer result that contains a lot more text and should therefore have a significantly higher token count than the small content.", + }, + ] as Anthropic.Messages.ContentBlockParam[] + + const smallResult = await tiktoken(smallContent) + const largeResult = await tiktoken(largeContent) + + // Large content should have more tokens + expect(largeResult).toBeGreaterThan(smallResult) + }) + }) }) diff --git a/src/utils/tiktoken.ts b/src/utils/tiktoken.ts index 96eafa7e499..b543873fc63 100644 --- a/src/utils/tiktoken.ts +++ b/src/utils/tiktoken.ts @@ -6,6 +6,52 @@ const TOKEN_FUDGE_FACTOR = 1.5 let encoder: Tiktoken | null = null +/** + * Serializes a tool_use block to text for token counting. + * Approximates how the API sees the tool call. + */ +function serializeToolUse(block: Anthropic.Messages.ToolUseBlockParam): string { + const parts = [`Tool: ${block.name}`] + if (block.input !== undefined) { + try { + parts.push(`Arguments: ${JSON.stringify(block.input)}`) + } catch { + parts.push(`Arguments: [serialization error]`) + } + } + return parts.join("\n") +} + +/** + * Serializes a tool_result block to text for token counting. + * Handles both string content and array content. + */ +function serializeToolResult(block: Anthropic.Messages.ToolResultBlockParam): string { + const parts = [`Tool Result (${block.tool_use_id})`] + + if (block.is_error) { + parts.push(`[Error]`) + } + + const content = block.content + if (typeof content === "string") { + parts.push(content) + } else if (Array.isArray(content)) { + // Handle array of content blocks recursively + for (const item of content) { + if (item.type === "text") { + parts.push(item.text || "") + } else if (item.type === "image") { + parts.push("[Image content]") + } else { + parts.push(`[Unsupported content block: ${String((item as { type?: unknown }).type)}]`) + } + } + } + + return parts.join("\n") +} + export async function tiktoken(content: Anthropic.Messages.ContentBlockParam[]): Promise { if (content.length === 0) { return 0 @@ -37,6 +83,20 @@ export async function tiktoken(content: Anthropic.Messages.ContentBlockParam[]): } else { totalTokens += 300 // Conservative estimate for unknown images } + } else if (block.type === "tool_use") { + // Serialize tool_use block to text and count tokens + const serialized = serializeToolUse(block as Anthropic.Messages.ToolUseBlockParam) + if (serialized.length > 0) { + const tokens = encoder.encode(serialized, undefined, []) + totalTokens += tokens.length + } + } else if (block.type === "tool_result") { + // Serialize tool_result block to text and count tokens + const serialized = serializeToolResult(block as Anthropic.Messages.ToolResultBlockParam) + if (serialized.length > 0) { + const tokens = encoder.encode(serialized, undefined, []) + totalTokens += tokens.length + } } }