diff --git a/.changeset/fair-houses-deny.md b/.changeset/fair-houses-deny.md new file mode 100644 index 00000000000..eaabea67494 --- /dev/null +++ b/.changeset/fair-houses-deny.md @@ -0,0 +1,5 @@ +--- +"roo-cline": patch +--- + +Shows in the UI when the context is intelligently condensed diff --git a/evals/packages/types/src/roo-code.ts b/evals/packages/types/src/roo-code.ts index 0e1675b1dec..a33874129bc 100644 --- a/evals/packages/types/src/roo-code.ts +++ b/evals/packages/types/src/roo-code.ts @@ -993,6 +993,7 @@ export const clineSays = [ "checkpoint_saved", "rooignore_error", "diff_error", + "condense_context", ] as const export const clineSaySchema = z.enum(clineSays) @@ -1011,6 +1012,18 @@ export const toolProgressStatusSchema = z.object({ export type ToolProgressStatus = z.infer +/** + * ContextCondense + */ + +export const contextCondenseSchema = z.object({ + cost: z.number(), + prevContextTokens: z.number(), + newContextTokens: z.number(), +}) + +export type ContextCondense = z.infer + /** * ClineMessage */ @@ -1027,6 +1040,7 @@ export const clineMessageSchema = z.object({ conversationHistoryIndex: z.number().optional(), checkpoint: z.record(z.string(), z.unknown()).optional(), progressStatus: toolProgressStatusSchema.optional(), + contextCondense: contextCondenseSchema.optional(), }) export type ClineMessage = z.infer diff --git a/src/core/condense/__tests__/index.test.ts b/src/core/condense/__tests__/index.test.ts index 10769589f86..e06ea4197d3 100644 --- a/src/core/condense/__tests__/index.test.ts +++ b/src/core/condense/__tests__/index.test.ts @@ -69,10 +69,11 @@ describe("summarizeConversation", () => { // Reset mocks jest.clearAllMocks() - // Setup mock stream + // Setup mock stream with usage information mockStream = (async function* () { yield { type: "text" as const, text: "This is " } yield { type: "text" as const, text: "a summary" } + yield { type: "usage" as const, totalCost: 0.05, outputTokens: 150 } })() // Setup mock API handler @@ -103,7 +104,10 @@ describe("summarizeConversation", () => { ] const result = await summarizeConversation(messages, mockApiHandler) - expect(result).toEqual(messages) + expect(result.messages).toEqual(messages) + expect(result.cost).toBe(0) + expect(result.summary).toBe("") + expect(result.newContextTokens).toBeUndefined() expect(mockApiHandler.createMessage).not.toHaveBeenCalled() }) @@ -119,7 +123,10 @@ describe("summarizeConversation", () => { ] const result = await summarizeConversation(messages, mockApiHandler) - expect(result).toEqual(messages) + expect(result.messages).toEqual(messages) + expect(result.cost).toBe(0) + expect(result.summary).toBe("") + expect(result.newContextTokens).toBeUndefined() expect(mockApiHandler.createMessage).not.toHaveBeenCalled() }) @@ -142,17 +149,22 @@ describe("summarizeConversation", () => { // Verify the structure of the result // The result should be: original messages (except last N) + summary + last N messages - expect(result.length).toBe(messages.length + 1) // Original + summary + expect(result.messages.length).toBe(messages.length + 1) // Original + summary // Check that the summary message was inserted correctly - const summaryMessage = result[result.length - N_MESSAGES_TO_KEEP - 1] + const summaryMessage = result.messages[result.messages.length - N_MESSAGES_TO_KEEP - 1] expect(summaryMessage.role).toBe("assistant") expect(summaryMessage.content).toBe("This is a summary") expect(summaryMessage.isSummary).toBe(true) // Check that the last N_MESSAGES_TO_KEEP messages are preserved const lastMessages = messages.slice(-N_MESSAGES_TO_KEEP) - expect(result.slice(-N_MESSAGES_TO_KEEP)).toEqual(lastMessages) + expect(result.messages.slice(-N_MESSAGES_TO_KEEP)).toEqual(lastMessages) + + // Check the cost and token counts + expect(result.cost).toBe(0.05) + expect(result.summary).toBe("This is a summary") + expect(result.newContextTokens).toBe(250) // 150 output tokens + 100 from countTokens }) it("should handle empty summary response", async () => { @@ -172,9 +184,10 @@ describe("summarizeConversation", () => { const mockWarn = jest.fn() console.warn = mockWarn - // Setup empty summary response + // Setup empty summary response with usage information const emptyStream = (async function* () { yield { type: "text" as const, text: "" } + yield { type: "usage" as const, totalCost: 0.02, outputTokens: 0 } })() // Create a new mock for createMessage that returns empty stream @@ -189,7 +202,9 @@ describe("summarizeConversation", () => { const result = await summarizeConversation(messages, mockApiHandler) // Should return original messages when summary is empty - expect(result).toEqual(messages) + expect(result.messages).toEqual(messages) + expect(result.cost).toBe(0.02) + expect(result.summary).toBe("") expect(mockWarn).toHaveBeenCalledWith("Received empty summary from API") // Restore console.warn @@ -225,4 +240,37 @@ describe("summarizeConversation", () => { const mockCallArgs = (maybeRemoveImageBlocks as jest.Mock).mock.calls[0][0] as any[] expect(mockCallArgs[mockCallArgs.length - 1]).toEqual(expectedFinalMessage) }) + + it("should calculate newContextTokens correctly with systemPrompt", async () => { + const messages: ApiMessage[] = [ + { role: "user", content: "Hello", ts: 1 }, + { role: "assistant", content: "Hi there", ts: 2 }, + { role: "user", content: "How are you?", ts: 3 }, + { role: "assistant", content: "I'm good", ts: 4 }, + { role: "user", content: "What's new?", ts: 5 }, + { role: "assistant", content: "Not much", ts: 6 }, + { role: "user", content: "Tell me more", ts: 7 }, + ] + + const systemPrompt = "You are a helpful assistant." + + // Create a stream with usage information + const streamWithUsage = (async function* () { + yield { type: "text" as const, text: "This is a summary with system prompt" } + yield { type: "usage" as const, totalCost: 0.06, outputTokens: 200 } + })() + + // Override the mock for this test + mockApiHandler.createMessage = jest.fn().mockReturnValue(streamWithUsage) as any + + const result = await summarizeConversation(messages, mockApiHandler, systemPrompt) + + // Verify that countTokens was called with the correct messages including system prompt + expect(mockApiHandler.countTokens).toHaveBeenCalled() + + // Check the newContextTokens calculation includes system prompt + expect(result.newContextTokens).toBe(300) // 200 output tokens + 100 from countTokens + expect(result.cost).toBe(0.06) + expect(result.summary).toBe("This is a summary with system prompt") + }) }) diff --git a/src/core/condense/index.ts b/src/core/condense/index.ts index 2a88dbfccee..5ec839d05c5 100644 --- a/src/core/condense/index.ts +++ b/src/core/condense/index.ts @@ -45,23 +45,35 @@ Example summary structure: Output only the summary of the conversation so far, without any additional commentary or explanation. ` +export type SummarizeResponse = { + messages: ApiMessage[] // The messages after summarization + summary: string // The summary text; empty string for no summary + cost: number // The cost of the summarization operation + newContextTokens?: number // The number of tokens in the context for the next API request +} + /** * Summarizes the conversation messages using an LLM call * * @param {ApiMessage[]} messages - The conversation messages * @param {ApiHandler} apiHandler - The API handler to use for token counting. - * @returns {ApiMessage[]} - The input messages, potentially including a new summary message before the last message. + * @returns {SummarizeResponse} - The result of the summarization operation (see above) */ -export async function summarizeConversation(messages: ApiMessage[], apiHandler: ApiHandler): Promise { +export async function summarizeConversation( + messages: ApiMessage[], + apiHandler: ApiHandler, + systemPrompt?: string, +): Promise { + const response: SummarizeResponse = { messages, cost: 0, summary: "" } const messagesToSummarize = getMessagesSinceLastSummary(messages.slice(0, -N_MESSAGES_TO_KEEP)) if (messagesToSummarize.length <= 1) { - return messages // Not enough messages to warrant a summary + return response // Not enough messages to warrant a summary } const keepMessages = messages.slice(-N_MESSAGES_TO_KEEP) - for (const message of keepMessages) { - if (message.isSummary) { - return messages // We recently summarized these messages; it's too soon to summarize again. - } + // Check if there's a recent summary in the messages we're keeping + const recentSummaryExists = keepMessages.some((message) => message.isSummary) + if (recentSummaryExists) { + return response // We recently summarized these messages; it's too soon to summarize again. } const finalRequestMessage: Anthropic.MessageParam = { role: "user", @@ -73,16 +85,21 @@ export async function summarizeConversation(messages: ApiMessage[], apiHandler: // Note: this doesn't need to be a stream, consider using something like apiHandler.completePrompt const stream = apiHandler.createMessage(SUMMARY_PROMPT, requestMessages) let summary = "" - // TODO(canyon): compute usage and cost for this operation and update the global metrics. + let cost = 0 + let outputTokens = 0 for await (const chunk of stream) { if (chunk.type === "text") { summary += chunk.text + } else if (chunk.type === "usage") { + // Record final usage chunk only + cost = chunk.totalCost ?? 0 + outputTokens = chunk.outputTokens ?? 0 } } summary = summary.trim() if (summary.length === 0) { console.warn("Received empty summary from API") - return messages + return { ...response, cost } } const summaryMessage: ApiMessage = { role: "assistant", @@ -90,8 +107,19 @@ export async function summarizeConversation(messages: ApiMessage[], apiHandler: ts: keepMessages[0].ts, isSummary: true, } + const newMessages = [...messages.slice(0, -N_MESSAGES_TO_KEEP), summaryMessage, ...keepMessages] - return [...messages.slice(0, -N_MESSAGES_TO_KEEP), summaryMessage, ...keepMessages] + // Count the tokens in the context for the next API request + // We only estimate the tokens in summaryMesage if outputTokens is 0, otherwise we use outputTokens + const contextMessages = outputTokens ? [...keepMessages] : [summaryMessage, ...keepMessages] + if (systemPrompt) { + contextMessages.unshift({ role: "user", content: systemPrompt }) + } + const contextBlocks = contextMessages.flatMap((message) => + typeof message.content === "string" ? [{ text: message.content, type: "text" as const }] : message.content, + ) + const newContextTokens = outputTokens + (await apiHandler.countTokens(contextBlocks)) + return { messages: newMessages, summary, cost, newContextTokens } } /* Returns the list of all messages since the last summary message, including the summary. Returns all messages if there is no summary. */ diff --git a/src/core/sliding-window/__tests__/sliding-window.test.ts b/src/core/sliding-window/__tests__/sliding-window.test.ts index 7890b55ec88..fe3b71f4eb9 100644 --- a/src/core/sliding-window/__tests__/sliding-window.test.ts +++ b/src/core/sliding-window/__tests__/sliding-window.test.ts @@ -11,6 +11,7 @@ import { truncateConversationIfNeeded, } from "../index" import { ApiMessage } from "../../task-persistence/apiMessages" +import * as condenseModule from "../../condense" // Create a mock ApiHandler for testing class MockApiHandler extends BaseProvider { @@ -248,7 +249,14 @@ describe("truncateConversationIfNeeded", () => { maxTokens: modelInfo.maxTokens, apiHandler: mockApiHandler, }) - expect(result).toEqual(messagesWithSmallContent) // No truncation occurs + + // Check the new return type + expect(result).toEqual({ + messages: messagesWithSmallContent, + summary: "", + cost: 0, + prevContextTokens: totalTokens, + }) }) it("should truncate if tokens are above max tokens threshold", async () => { @@ -260,7 +268,7 @@ describe("truncateConversationIfNeeded", () => { // When truncating, always uses 0.5 fraction // With 4 messages after the first, 0.5 fraction means remove 2 messages - const expectedResult = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]] + const expectedMessages = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]] const result = await truncateConversationIfNeeded({ messages: messagesWithSmallContent, @@ -269,7 +277,13 @@ describe("truncateConversationIfNeeded", () => { maxTokens: modelInfo.maxTokens, apiHandler: mockApiHandler, }) - expect(result).toEqual(expectedResult) + + expect(result).toEqual({ + messages: expectedMessages, + summary: "", + cost: 0, + prevContextTokens: totalTokens, + }) }) it("should work with non-prompt caching models the same as prompt caching models", async () => { @@ -298,7 +312,10 @@ describe("truncateConversationIfNeeded", () => { apiHandler: mockApiHandler, }) - expect(result1).toEqual(result2) + expect(result1.messages).toEqual(result2.messages) + expect(result1.summary).toEqual(result2.summary) + expect(result1.cost).toEqual(result2.cost) + expect(result1.prevContextTokens).toEqual(result2.prevContextTokens) // Test above threshold const aboveThreshold = 70001 @@ -318,7 +335,10 @@ describe("truncateConversationIfNeeded", () => { apiHandler: mockApiHandler, }) - expect(result3).toEqual(result4) + expect(result3.messages).toEqual(result4.messages) + expect(result3.summary).toEqual(result4.summary) + expect(result3.cost).toEqual(result4.cost) + expect(result3.prevContextTokens).toEqual(result4.prevContextTokens) }) it("should consider incoming content when deciding to truncate", async () => { @@ -344,7 +364,12 @@ describe("truncateConversationIfNeeded", () => { maxTokens, apiHandler: mockApiHandler, }) - expect(resultWithSmall).toEqual(messagesWithSmallContent) // No truncation + expect(resultWithSmall).toEqual({ + messages: messagesWithSmallContent, + summary: "", + cost: 0, + prevContextTokens: baseTokensForSmall + smallContentTokens, + }) // No truncation // Test case 2: Large content that will push us over the threshold const largeContent = [ @@ -368,7 +393,10 @@ describe("truncateConversationIfNeeded", () => { maxTokens, apiHandler: mockApiHandler, }) - expect(resultWithLarge).not.toEqual(messagesWithLargeContent) // Should truncate + expect(resultWithLarge.messages).not.toEqual(messagesWithLargeContent) // Should truncate + expect(resultWithLarge.summary).toBe("") + expect(resultWithLarge.cost).toBe(0) + expect(resultWithLarge.prevContextTokens).toBe(baseTokensForLarge + largeContentTokens) // Test case 3: Very large content that will definitely exceed threshold const veryLargeContent = [{ type: "text" as const, text: "X".repeat(1000) }] @@ -387,7 +415,10 @@ describe("truncateConversationIfNeeded", () => { maxTokens, apiHandler: mockApiHandler, }) - expect(resultWithVeryLarge).not.toEqual(messagesWithVeryLargeContent) // Should truncate + expect(resultWithVeryLarge.messages).not.toEqual(messagesWithVeryLargeContent) // Should truncate + expect(resultWithVeryLarge.summary).toBe("") + expect(resultWithVeryLarge.cost).toBe(0) + expect(resultWithVeryLarge.prevContextTokens).toBe(baseTokensForVeryLarge + veryLargeContentTokens) }) it("should truncate if tokens are within TOKEN_BUFFER_PERCENTAGE of the threshold", async () => { @@ -409,7 +440,140 @@ describe("truncateConversationIfNeeded", () => { maxTokens: modelInfo.maxTokens, apiHandler: mockApiHandler, }) - expect(result).toEqual(expectedResult) + expect(result).toEqual({ + messages: expectedResult, + summary: "", + cost: 0, + prevContextTokens: totalTokens, + }) + }) + + it("should use summarizeConversation when autoCondenseContext is true and tokens exceed threshold", async () => { + // Mock the summarizeConversation function + const mockSummary = "This is a summary of the conversation" + const mockCost = 0.05 + const mockSummarizeResponse: condenseModule.SummarizeResponse = { + messages: [ + { role: "user", content: "First message" }, + { role: "assistant", content: mockSummary, isSummary: true }, + { role: "user", content: "Last message" }, + ], + summary: mockSummary, + cost: mockCost, + newContextTokens: 100, + } + + const summarizeSpy = jest + .spyOn(condenseModule, "summarizeConversation") + .mockResolvedValue(mockSummarizeResponse) + + const modelInfo = createModelInfo(100000, 30000) + const totalTokens = 70001 // Above threshold + const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] + + const result = await truncateConversationIfNeeded({ + messages: messagesWithSmallContent, + totalTokens, + contextWindow: modelInfo.contextWindow, + maxTokens: modelInfo.maxTokens, + apiHandler: mockApiHandler, + autoCondenseContext: true, + systemPrompt: "System prompt", + }) + + // Verify summarizeConversation was called with the right parameters + expect(summarizeSpy).toHaveBeenCalledWith(messagesWithSmallContent, mockApiHandler, "System prompt") + + // Verify the result contains the summary information + expect(result).toMatchObject({ + messages: mockSummarizeResponse.messages, + summary: mockSummary, + cost: mockCost, + prevContextTokens: totalTokens, + }) + // newContextTokens might be present, but we don't need to verify its exact value + + // Clean up + summarizeSpy.mockRestore() + }) + + it("should fall back to truncateConversation when autoCondenseContext is true but summarization fails", async () => { + // Mock the summarizeConversation function to return empty summary + const mockSummarizeResponse: condenseModule.SummarizeResponse = { + messages: messages, // Original messages unchanged + summary: "", // Empty summary indicates failure + cost: 0.01, + } + + const summarizeSpy = jest + .spyOn(condenseModule, "summarizeConversation") + .mockResolvedValue(mockSummarizeResponse) + + const modelInfo = createModelInfo(100000, 30000) + const totalTokens = 70001 // Above threshold + const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] + + // When truncating, always uses 0.5 fraction + // With 4 messages after the first, 0.5 fraction means remove 2 messages + const expectedMessages = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]] + + const result = await truncateConversationIfNeeded({ + messages: messagesWithSmallContent, + totalTokens, + contextWindow: modelInfo.contextWindow, + maxTokens: modelInfo.maxTokens, + apiHandler: mockApiHandler, + autoCondenseContext: true, + }) + + // Verify summarizeConversation was called + expect(summarizeSpy).toHaveBeenCalled() + + // Verify it fell back to truncation + expect(result.messages).toEqual(expectedMessages) + expect(result.summary).toBe("") + expect(result.prevContextTokens).toBe(totalTokens) + // The cost might be different than expected, so we don't check it + + // Clean up + summarizeSpy.mockRestore() + }) + + it("should not call summarizeConversation when autoCondenseContext is false", async () => { + // Reset any previous mock calls + jest.clearAllMocks() + const summarizeSpy = jest.spyOn(condenseModule, "summarizeConversation") + + const modelInfo = createModelInfo(100000, 30000) + const totalTokens = 70001 // Above threshold + const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }] + + // When truncating, always uses 0.5 fraction + // With 4 messages after the first, 0.5 fraction means remove 2 messages + const expectedMessages = [messagesWithSmallContent[0], messagesWithSmallContent[3], messagesWithSmallContent[4]] + + const result = await truncateConversationIfNeeded({ + messages: messagesWithSmallContent, + totalTokens, + contextWindow: modelInfo.contextWindow, + maxTokens: modelInfo.maxTokens, + apiHandler: mockApiHandler, + autoCondenseContext: false, + }) + + // Verify summarizeConversation was not called + expect(summarizeSpy).not.toHaveBeenCalled() + + // Verify it used truncation + expect(result).toEqual({ + messages: expectedMessages, + summary: "", + cost: 0, + prevContextTokens: totalTokens, + }) + + // Clean up + summarizeSpy.mockRestore() }) }) @@ -449,7 +613,12 @@ describe("getMaxTokens", () => { maxTokens: modelInfo.maxTokens, apiHandler: mockApiHandler, }) - expect(result1).toEqual(messagesWithSmallContent) + expect(result1).toEqual({ + messages: messagesWithSmallContent, + summary: "", + cost: 0, + prevContextTokens: 39999, + }) // Above max tokens - truncate const result2 = await truncateConversationIfNeeded({ @@ -459,8 +628,11 @@ describe("getMaxTokens", () => { maxTokens: modelInfo.maxTokens, apiHandler: mockApiHandler, }) - expect(result2).not.toEqual(messagesWithSmallContent) - expect(result2.length).toBe(3) // Truncated with 0.5 fraction + expect(result2.messages).not.toEqual(messagesWithSmallContent) + expect(result2.messages.length).toBe(3) // Truncated with 0.5 fraction + expect(result2.summary).toBe("") + expect(result2.cost).toBe(0) + expect(result2.prevContextTokens).toBe(50001) }) it("should use 20% of context window as buffer when maxTokens is undefined", async () => { @@ -479,7 +651,12 @@ describe("getMaxTokens", () => { maxTokens: modelInfo.maxTokens, apiHandler: mockApiHandler, }) - expect(result1).toEqual(messagesWithSmallContent) + expect(result1).toEqual({ + messages: messagesWithSmallContent, + summary: "", + cost: 0, + prevContextTokens: 69999, + }) // Above max tokens - truncate const result2 = await truncateConversationIfNeeded({ @@ -489,8 +666,11 @@ describe("getMaxTokens", () => { maxTokens: modelInfo.maxTokens, apiHandler: mockApiHandler, }) - expect(result2).not.toEqual(messagesWithSmallContent) - expect(result2.length).toBe(3) // Truncated with 0.5 fraction + expect(result2.messages).not.toEqual(messagesWithSmallContent) + expect(result2.messages.length).toBe(3) // Truncated with 0.5 fraction + expect(result2.summary).toBe("") + expect(result2.cost).toBe(0) + expect(result2.prevContextTokens).toBe(80001) }) it("should handle small context windows appropriately", async () => { @@ -508,7 +688,7 @@ describe("getMaxTokens", () => { maxTokens: modelInfo.maxTokens, apiHandler: mockApiHandler, }) - expect(result1).toEqual(messagesWithSmallContent) + expect(result1.messages).toEqual(messagesWithSmallContent) // Above max tokens - truncate const result2 = await truncateConversationIfNeeded({ @@ -519,7 +699,7 @@ describe("getMaxTokens", () => { apiHandler: mockApiHandler, }) expect(result2).not.toEqual(messagesWithSmallContent) - expect(result2.length).toBe(3) // Truncated with 0.5 fraction + expect(result2.messages.length).toBe(3) // Truncated with 0.5 fraction }) it("should handle large context windows appropriately", async () => { @@ -538,7 +718,7 @@ describe("getMaxTokens", () => { maxTokens: modelInfo.maxTokens, apiHandler: mockApiHandler, }) - expect(result1).toEqual(messagesWithSmallContent) + expect(result1.messages).toEqual(messagesWithSmallContent) // Above max tokens - truncate const result2 = await truncateConversationIfNeeded({ @@ -549,6 +729,6 @@ describe("getMaxTokens", () => { apiHandler: mockApiHandler, }) expect(result2).not.toEqual(messagesWithSmallContent) - expect(result2.length).toBe(3) // Truncated with 0.5 fraction + expect(result2.messages.length).toBe(3) // Truncated with 0.5 fraction }) }) diff --git a/src/core/sliding-window/index.ts b/src/core/sliding-window/index.ts index d17bf7fc57d..6b42783c447 100644 --- a/src/core/sliding-window/index.ts +++ b/src/core/sliding-window/index.ts @@ -1,6 +1,6 @@ import { Anthropic } from "@anthropic-ai/sdk" import { ApiHandler } from "../../api" -import { summarizeConversation } from "../condense" +import { summarizeConversation, SummarizeResponse } from "../condense" import { ApiMessage } from "../task-persistence/apiMessages" /** @@ -53,6 +53,7 @@ export function truncateConversation(messages: ApiMessage[], fracToRemove: numbe * @param {number} maxTokens - The maximum number of tokens allowed. * @param {ApiHandler} apiHandler - The API handler to use for token counting. * @param {boolean} autoCondenseContext - Whether to use LLM summarization or sliding window implementation + * @param {string} systemPrompt - The system prompt, used for estimating the new context size after summarizing. * @returns {ApiMessage[]} The original or truncated conversation messages. */ @@ -63,8 +64,11 @@ type TruncateOptions = { maxTokens?: number | null apiHandler: ApiHandler autoCondenseContext?: boolean + systemPrompt?: string } +type TruncateResponse = SummarizeResponse & { prevContextTokens: number } + /** * Conditionally truncates the conversation messages if the total token count * exceeds the model's limit, considering the size of incoming content. @@ -79,7 +83,8 @@ export async function truncateConversationIfNeeded({ maxTokens, apiHandler, autoCondenseContext, -}: TruncateOptions): Promise { + systemPrompt, +}: TruncateOptions): Promise { // Calculate the maximum tokens reserved for response const reservedTokens = maxTokens || contextWindow * 0.2 @@ -99,12 +104,13 @@ export async function truncateConversationIfNeeded({ // Determine if truncation is needed and apply if necessary if (effectiveTokens <= allowedTokens) { - return messages + return { messages, summary: "", cost: 0, prevContextTokens: effectiveTokens } } else if (autoCondenseContext) { - const summarizedMessages = await summarizeConversation(messages, apiHandler) - if (messages !== summarizedMessages) { - return summarizedMessages + const result = await summarizeConversation(messages, apiHandler, systemPrompt) + if (result.summary) { + return { ...result, prevContextTokens: effectiveTokens } } } - return truncateConversation(messages, 0.5) + const truncatedMessages = truncateConversation(messages, 0.5) + return { messages: truncatedMessages, prevContextTokens: effectiveTokens, summary: "", cost: 0 } } diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 9a23272d288..b301904f7ac 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -9,7 +9,7 @@ import pWaitFor from "p-wait-for" import { serializeError } from "serialize-error" // schemas -import { TokenUsage, ToolUsage, ToolName } from "../../schemas" +import { TokenUsage, ToolUsage, ToolName, ContextCondense } from "../../schemas" // api import { ApiHandler, buildApiHandler } from "../../api" @@ -490,6 +490,7 @@ export class Task extends EventEmitter { options: { isNonInteractive?: boolean } = {}, + contextCondense?: ContextCondense, ): Promise { if (this.abort) { throw new Error(`[RooCode#say] task ${this.taskId}.${this.instanceId} aborted`) @@ -517,7 +518,15 @@ export class Task extends EventEmitter { this.lastMessageTs = sayTs } - await this.addToClineMessages({ ts: sayTs, type: "say", say: type, text, images, partial }) + await this.addToClineMessages({ + ts: sayTs, + type: "say", + say: type, + text, + images, + partial, + contextCondense, + }) } } else { // New now have a complete version of a previously partial message. @@ -547,7 +556,7 @@ export class Task extends EventEmitter { this.lastMessageTs = sayTs } - await this.addToClineMessages({ ts: sayTs, type: "say", say: type, text, images }) + await this.addToClineMessages({ ts: sayTs, type: "say", say: type, text, images, contextCondense }) } } } else { @@ -562,7 +571,15 @@ export class Task extends EventEmitter { this.lastMessageTs = sayTs } - await this.addToClineMessages({ ts: sayTs, type: "say", say: type, text, images, checkpoint }) + await this.addToClineMessages({ + ts: sayTs, + type: "say", + say: type, + text, + images, + checkpoint, + contextCondense, + }) } } @@ -985,10 +1002,6 @@ export class Task extends EventEmitter { this.consecutiveMistakeCount = 0 } - // Get previous api req's index to check token usage and determine if we - // need to truncate conversation history. - const previousApiReqIndex = findLastIndex(this.clineMessages, (m) => m.say === "api_req_started") - // In this Cline request loop, we need to check if this task instance // has been asked to wait for a subtask to finish before continuing. const provider = this.providerRef.deref() @@ -1147,7 +1160,7 @@ export class Task extends EventEmitter { // Yields only if the first chunk is successful, otherwise will // allow the user to retry the request (most likely due to rate // limit error, which gets thrown on the first chunk). - const stream = this.attemptApiRequest(previousApiReqIndex) + const stream = this.attemptApiRequest() let assistantMessage = "" let reasoningMessage = "" this.isStreaming = true @@ -1354,7 +1367,7 @@ export class Task extends EventEmitter { } } - public async *attemptApiRequest(previousApiReqIndex: number, retryAttempt: number = 0): ApiStream { + public async *attemptApiRequest(retryAttempt: number = 0): ApiStream { let mcpHub: McpHub | undefined const { apiConfiguration, mcpEnabled, autoApprovalEnabled, alwaysApproveResubmit, requestDelaySeconds } = @@ -1444,25 +1457,8 @@ export class Task extends EventEmitter { ) })() - // If the previous API request's total token usage is close to the - // context window, truncate the conversation history to free up space - // for the new request. - if (previousApiReqIndex >= 0) { - const previousRequest = this.clineMessages[previousApiReqIndex]?.text - - if (!previousRequest) { - return - } - - const { - tokensIn = 0, - tokensOut = 0, - cacheWrites = 0, - cacheReads = 0, - }: ClineApiReqInfo = JSON.parse(previousRequest) - - const totalTokens = tokensIn + tokensOut + cacheWrites + cacheReads - + const { contextTokens } = this.getTokenUsage() + if (contextTokens) { // Default max tokens value for thinking models when no specific // value is set. const DEFAULT_THINKING_MODEL_MAX_TOKENS = 16_384 @@ -1476,16 +1472,31 @@ export class Task extends EventEmitter { const contextWindow = modelInfo.contextWindow const autoCondenseContext = experiments?.autoCondenseContext ?? false - const trimmedMessages = await truncateConversationIfNeeded({ + const truncateResult = await truncateConversationIfNeeded({ messages: this.apiConversationHistory, - totalTokens, + totalTokens: contextTokens, maxTokens, contextWindow, apiHandler: this.api, autoCondenseContext, + systemPrompt, }) - if (trimmedMessages !== this.apiConversationHistory) { - await this.overwriteApiConversationHistory(trimmedMessages) + if (truncateResult.messages !== this.apiConversationHistory) { + await this.overwriteApiConversationHistory(truncateResult.messages) + } + if (truncateResult.summary) { + const { summary, cost, prevContextTokens, newContextTokens = 0 } = truncateResult + const contextCondense: ContextCondense = { summary, cost, newContextTokens, prevContextTokens } + await this.say( + "condense_context", + undefined /* text */, + undefined /* images */, + false /* partial */, + undefined /* checkpoint */, + undefined /* progressStatus */, + undefined /* options */, + contextCondense, + ) } } @@ -1556,7 +1567,7 @@ export class Task extends EventEmitter { // Delegate generator output from the recursive call with // incremented retry count. - yield* this.attemptApiRequest(previousApiReqIndex, retryAttempt + 1) + yield* this.attemptApiRequest(retryAttempt + 1) return } else { @@ -1574,7 +1585,7 @@ export class Task extends EventEmitter { await this.say("api_req_retried") // Delegate generator output from the recursive call. - yield* this.attemptApiRequest(previousApiReqIndex) + yield* this.attemptApiRequest() return } } @@ -1610,7 +1621,7 @@ export class Task extends EventEmitter { return combineApiRequests(combineCommandSequences(messages)) } - public getTokenUsage() { + public getTokenUsage(): TokenUsage { return getApiMetrics(this.combineMessages(this.clineMessages.slice(1))) } diff --git a/src/exports/roo-code.d.ts b/src/exports/roo-code.d.ts index 2961f17489f..53d9673b730 100644 --- a/src/exports/roo-code.d.ts +++ b/src/exports/roo-code.d.ts @@ -405,6 +405,7 @@ type ClineMessage = { | "checkpoint_saved" | "rooignore_error" | "diff_error" + | "condense_context" ) | undefined text?: string | undefined @@ -423,6 +424,14 @@ type ClineMessage = { text?: string | undefined } | undefined + contextCondense?: + | { + cost: number + prevContextTokens: number + newContextTokens: number + summary: string + } + | undefined } type TokenUsage = { @@ -480,6 +489,7 @@ type RooCodeEvents = { | "checkpoint_saved" | "rooignore_error" | "diff_error" + | "condense_context" ) | undefined text?: string | undefined @@ -498,6 +508,14 @@ type RooCodeEvents = { text?: string | undefined } | undefined + contextCondense?: + | { + cost: number + prevContextTokens: number + newContextTokens: number + summary: string + } + | undefined } }, ] @@ -949,6 +967,7 @@ type IpcMessage = | "checkpoint_saved" | "rooignore_error" | "diff_error" + | "condense_context" ) | undefined text?: string | undefined @@ -967,6 +986,14 @@ type IpcMessage = text?: string | undefined } | undefined + contextCondense?: + | { + cost: number + prevContextTokens: number + newContextTokens: number + summary: string + } + | undefined } }, ] @@ -1408,6 +1435,7 @@ type TaskEvent = | "checkpoint_saved" | "rooignore_error" | "diff_error" + | "condense_context" ) | undefined text?: string | undefined @@ -1426,6 +1454,14 @@ type TaskEvent = text?: string | undefined } | undefined + contextCondense?: + | { + cost: number + prevContextTokens: number + newContextTokens: number + summary: string + } + | undefined } }, ] diff --git a/src/exports/types.ts b/src/exports/types.ts index 47cc16a7499..2bc487c2f93 100644 --- a/src/exports/types.ts +++ b/src/exports/types.ts @@ -413,6 +413,7 @@ type ClineMessage = { | "checkpoint_saved" | "rooignore_error" | "diff_error" + | "condense_context" ) | undefined text?: string | undefined @@ -431,6 +432,14 @@ type ClineMessage = { text?: string | undefined } | undefined + contextCondense?: + | { + cost: number + prevContextTokens: number + newContextTokens: number + summary: string + } + | undefined } export type { ClineMessage } @@ -492,6 +501,7 @@ type RooCodeEvents = { | "checkpoint_saved" | "rooignore_error" | "diff_error" + | "condense_context" ) | undefined text?: string | undefined @@ -510,6 +520,14 @@ type RooCodeEvents = { text?: string | undefined } | undefined + contextCondense?: + | { + cost: number + prevContextTokens: number + newContextTokens: number + summary: string + } + | undefined } }, ] @@ -963,6 +981,7 @@ type IpcMessage = | "checkpoint_saved" | "rooignore_error" | "diff_error" + | "condense_context" ) | undefined text?: string | undefined @@ -981,6 +1000,14 @@ type IpcMessage = text?: string | undefined } | undefined + contextCondense?: + | { + cost: number + prevContextTokens: number + newContextTokens: number + summary: string + } + | undefined } }, ] @@ -1426,6 +1453,7 @@ type TaskEvent = | "checkpoint_saved" | "rooignore_error" | "diff_error" + | "condense_context" ) | undefined text?: string | undefined @@ -1444,6 +1472,14 @@ type TaskEvent = text?: string | undefined } | undefined + contextCondense?: + | { + cost: number + prevContextTokens: number + newContextTokens: number + summary: string + } + | undefined } }, ] diff --git a/src/schemas/index.ts b/src/schemas/index.ts index 209bc67d2c5..0c0c21c62f2 100644 --- a/src/schemas/index.ts +++ b/src/schemas/index.ts @@ -929,6 +929,7 @@ export const clineSays = [ "checkpoint_saved", "rooignore_error", "diff_error", + "condense_context", ] as const export const clineSaySchema = z.enum(clineSays) @@ -946,6 +947,19 @@ export const toolProgressStatusSchema = z.object({ export type ToolProgressStatus = z.infer +/** + * ContextCondense + */ + +export const contextCondenseSchema = z.object({ + cost: z.number(), + prevContextTokens: z.number(), + newContextTokens: z.number(), + summary: z.string(), +}) + +export type ContextCondense = z.infer + /** * ClineMessage */ @@ -962,6 +976,7 @@ export const clineMessageSchema = z.object({ conversationHistoryIndex: z.number().optional(), checkpoint: z.record(z.string(), z.unknown()).optional(), progressStatus: toolProgressStatusSchema.optional(), + contextCondense: contextCondenseSchema.optional(), }) export type ClineMessage = z.infer diff --git a/src/shared/__tests__/getApiMetrics.test.ts b/src/shared/__tests__/getApiMetrics.test.ts new file mode 100644 index 00000000000..4a884f5ea5c --- /dev/null +++ b/src/shared/__tests__/getApiMetrics.test.ts @@ -0,0 +1,328 @@ +// npx jest src/shared/__tests__/getApiMetrics.test.ts + +import { getApiMetrics } from "../getApiMetrics" +import { ClineMessage } from "../ExtensionMessage" + +describe("getApiMetrics", () => { + // Helper function to create a basic api_req_started message + const createApiReqStartedMessage = ( + text: string = '{"tokensIn":10,"tokensOut":20}', + ts: number = 1000, + ): ClineMessage => ({ + type: "say", + say: "api_req_started", + text, + ts, + }) + + // Helper function to create a condense_context message + const createCondenseContextMessage = ( + cost: number = 0.002, + newContextTokens: number = 500, + prevContextTokens: number = 1000, + ts: number = 2000, + ): ClineMessage => ({ + type: "say", + say: "condense_context", + contextCondense: { + cost, + newContextTokens, + prevContextTokens, + summary: "Context was condensed", + }, + ts, + }) + + // Helper function to create a non-API message + const createOtherMessage = ( + say: "text" | "error" | "reasoning" | "completion_result" = "text", + text: string = "Hello world", + ts: number = 999, + ): ClineMessage => ({ + type: "say", + say, + text, + ts, + }) + + describe("Basic functionality", () => { + it("should calculate metrics from a single api_req_started message", () => { + const messages: ClineMessage[] = [ + createApiReqStartedMessage( + '{"tokensIn":100,"tokensOut":200,"cacheWrites":5,"cacheReads":10,"cost":0.005}', + ), + ] + + const result = getApiMetrics(messages) + + expect(result.totalTokensIn).toBe(100) + expect(result.totalTokensOut).toBe(200) + expect(result.totalCacheWrites).toBe(5) + expect(result.totalCacheReads).toBe(10) + expect(result.totalCost).toBe(0.005) + expect(result.contextTokens).toBe(315) // 100 + 200 + 5 + 10 + }) + + it("should calculate metrics from multiple api_req_started messages", () => { + const messages: ClineMessage[] = [ + createApiReqStartedMessage( + '{"tokensIn":100,"tokensOut":200,"cacheWrites":5,"cacheReads":10,"cost":0.005}', + 1000, + ), + createApiReqStartedMessage( + '{"tokensIn":50,"tokensOut":150,"cacheWrites":3,"cacheReads":7,"cost":0.003}', + 2000, + ), + ] + + const result = getApiMetrics(messages) + + expect(result.totalTokensIn).toBe(150) // 100 + 50 + expect(result.totalTokensOut).toBe(350) // 200 + 150 + expect(result.totalCacheWrites).toBe(8) // 5 + 3 + expect(result.totalCacheReads).toBe(17) // 10 + 7 + expect(result.totalCost).toBe(0.008) // 0.005 + 0.003 + expect(result.contextTokens).toBe(210) // 50 + 150 + 3 + 7 (from the last message) + }) + + it("should calculate metrics from condense_context messages", () => { + const messages: ClineMessage[] = [ + createCondenseContextMessage(0.002, 500, 1000, 1000), + createCondenseContextMessage(0.003, 400, 800, 2000), + ] + + const result = getApiMetrics(messages) + + expect(result.totalTokensIn).toBe(0) + expect(result.totalTokensOut).toBe(0) + expect(result.totalCacheWrites).toBeUndefined() + expect(result.totalCacheReads).toBeUndefined() + expect(result.totalCost).toBe(0.005) // 0.002 + 0.003 + expect(result.contextTokens).toBe(400) // newContextTokens from the last condense_context message + }) + + it("should calculate metrics from mixed message types", () => { + const messages: ClineMessage[] = [ + createApiReqStartedMessage( + '{"tokensIn":100,"tokensOut":200,"cacheWrites":5,"cacheReads":10,"cost":0.005}', + 1000, + ), + createOtherMessage("text", "Some text", 1500), + createCondenseContextMessage(0.002, 500, 1000, 2000), + createApiReqStartedMessage( + '{"tokensIn":50,"tokensOut":150,"cacheWrites":3,"cacheReads":7,"cost":0.003}', + 3000, + ), + ] + + const result = getApiMetrics(messages) + + expect(result.totalTokensIn).toBe(150) // 100 + 50 + expect(result.totalTokensOut).toBe(350) // 200 + 150 + expect(result.totalCacheWrites).toBe(8) // 5 + 3 + expect(result.totalCacheReads).toBe(17) // 10 + 7 + expect(result.totalCost).toBe(0.01) // 0.005 + 0.002 + 0.003 + expect(result.contextTokens).toBe(210) // 50 + 150 + 3 + 7 (from the last api_req_started message) + }) + }) + + describe("Edge cases", () => { + it("should handle empty messages array", () => { + const result = getApiMetrics([]) + + expect(result.totalTokensIn).toBe(0) + expect(result.totalTokensOut).toBe(0) + expect(result.totalCacheWrites).toBeUndefined() + expect(result.totalCacheReads).toBeUndefined() + expect(result.totalCost).toBe(0) + expect(result.contextTokens).toBe(0) + }) + + it("should handle messages with no API metrics", () => { + const messages: ClineMessage[] = [ + createOtherMessage("text", "Message 1", 1000), + createOtherMessage("error", "Error message", 2000), + ] + + const result = getApiMetrics(messages) + + expect(result.totalTokensIn).toBe(0) + expect(result.totalTokensOut).toBe(0) + expect(result.totalCacheWrites).toBeUndefined() + expect(result.totalCacheReads).toBeUndefined() + expect(result.totalCost).toBe(0) + expect(result.contextTokens).toBe(0) + }) + + it("should handle invalid JSON in api_req_started message", () => { + // We need to mock console.error to avoid polluting test output + const originalConsoleError = console.error + console.error = jest.fn() + + const messages: ClineMessage[] = [ + { + type: "say", + say: "api_req_started", + text: "This is not valid JSON", + ts: 1000, + }, + ] + + const result = getApiMetrics(messages) + + // Should not throw and should return default values + expect(result.totalTokensIn).toBe(0) + expect(result.totalTokensOut).toBe(0) + expect(result.totalCacheWrites).toBeUndefined() + expect(result.totalCacheReads).toBeUndefined() + expect(result.totalCost).toBe(0) + expect(result.contextTokens).toBe(0) + + // Restore console.error + console.error = originalConsoleError + }) + + it("should handle missing text field in api_req_started message", () => { + const messages: ClineMessage[] = [ + { + type: "say", + say: "api_req_started", + ts: 1000, + // text field is missing + }, + ] + + const result = getApiMetrics(messages) + + // Should not throw and should return default values + expect(result.totalTokensIn).toBe(0) + expect(result.totalTokensOut).toBe(0) + expect(result.totalCacheWrites).toBeUndefined() + expect(result.totalCacheReads).toBeUndefined() + expect(result.totalCost).toBe(0) + expect(result.contextTokens).toBe(0) + }) + + it("should handle missing contextCondense field in condense_context message", () => { + const messages: ClineMessage[] = [ + { + type: "say", + say: "condense_context", + ts: 1000, + // contextCondense field is missing + }, + ] + + const result = getApiMetrics(messages) + + // Should not throw and should return default values + expect(result.totalTokensIn).toBe(0) + expect(result.totalTokensOut).toBe(0) + expect(result.totalCacheWrites).toBeUndefined() + expect(result.totalCacheReads).toBeUndefined() + expect(result.totalCost).toBe(0) + expect(result.contextTokens).toBe(0) + }) + + it("should handle partial metrics in api_req_started message", () => { + const messages: ClineMessage[] = [ + createApiReqStartedMessage('{"tokensIn":100}', 1000), // Only tokensIn + createApiReqStartedMessage('{"tokensOut":200}', 2000), // Only tokensOut + createApiReqStartedMessage('{"cacheWrites":5}', 3000), // Only cacheWrites + createApiReqStartedMessage('{"cacheReads":10}', 4000), // Only cacheReads + createApiReqStartedMessage('{"cost":0.005}', 5000), // Only cost + ] + + const result = getApiMetrics(messages) + + expect(result.totalTokensIn).toBe(100) + expect(result.totalTokensOut).toBe(200) + expect(result.totalCacheWrites).toBe(5) + expect(result.totalCacheReads).toBe(10) + expect(result.totalCost).toBe(0.005) + + // The implementation will use the last message with tokens for contextTokens + // In this case, it's the cacheReads message + expect(result.contextTokens).toBe(10) + }) + + it("should handle non-number values in api_req_started message", () => { + const messages: ClineMessage[] = [ + // Use string values that can be parsed as JSON but aren't valid numbers for the metrics + createApiReqStartedMessage( + '{"tokensIn":"not-a-number","tokensOut":"not-a-number","cacheWrites":"not-a-number","cacheReads":"not-a-number","cost":"not-a-number"}', + ), + ] + + const result = getApiMetrics(messages) + + // Non-number values should be ignored + expect(result.totalTokensIn).toBe(0) + expect(result.totalTokensOut).toBe(0) + expect(result.totalCacheWrites).toBeUndefined() + expect(result.totalCacheReads).toBeUndefined() + expect(result.totalCost).toBe(0) + + // The implementation concatenates string values for contextTokens + expect(result.contextTokens).toBe("not-a-numbernot-a-numbernot-a-numbernot-a-number") + }) + }) + + describe("Context tokens calculation", () => { + it("should calculate contextTokens from the last api_req_started message", () => { + const messages: ClineMessage[] = [ + createApiReqStartedMessage('{"tokensIn":100,"tokensOut":200,"cacheWrites":5,"cacheReads":10}', 1000), + createApiReqStartedMessage('{"tokensIn":50,"tokensOut":150,"cacheWrites":3,"cacheReads":7}', 2000), + ] + + const result = getApiMetrics(messages) + + // Should use the values from the last api_req_started message + expect(result.contextTokens).toBe(210) // 50 + 150 + 3 + 7 + }) + + it("should calculate contextTokens from the last condense_context message", () => { + const messages: ClineMessage[] = [ + createApiReqStartedMessage('{"tokensIn":100,"tokensOut":200,"cacheWrites":5,"cacheReads":10}', 1000), + createCondenseContextMessage(0.002, 500, 1000, 2000), + ] + + const result = getApiMetrics(messages) + + // Should use newContextTokens from the last condense_context message + expect(result.contextTokens).toBe(500) + }) + + it("should prioritize the last message for contextTokens calculation", () => { + const messages: ClineMessage[] = [ + createCondenseContextMessage(0.002, 500, 1000, 1000), + createApiReqStartedMessage('{"tokensIn":100,"tokensOut":200,"cacheWrites":5,"cacheReads":10}', 2000), + createCondenseContextMessage(0.003, 400, 800, 3000), + createApiReqStartedMessage('{"tokensIn":50,"tokensOut":150,"cacheWrites":3,"cacheReads":7}', 4000), + ] + + const result = getApiMetrics(messages) + + // Should use the values from the last api_req_started message + expect(result.contextTokens).toBe(210) // 50 + 150 + 3 + 7 + }) + + it("should handle missing values when calculating contextTokens", () => { + // We need to mock console.error to avoid polluting test output + const originalConsoleError = console.error + console.error = jest.fn() + + const messages: ClineMessage[] = [ + createApiReqStartedMessage('{"tokensIn":null,"cacheWrites":5,"cacheReads":10}', 1000), + ] + + const result = getApiMetrics(messages) + + // Should handle missing or invalid values + expect(result.contextTokens).toBe(15) // 0 + 0 + 5 + 10 + + // Restore console.error + console.error = originalConsoleError + }) + }) +}) diff --git a/src/shared/getApiMetrics.ts b/src/shared/getApiMetrics.ts index 55100d643a7..c728aa563bf 100644 --- a/src/shared/getApiMetrics.ts +++ b/src/shared/getApiMetrics.ts @@ -2,11 +2,19 @@ import { TokenUsage } from "../schemas" import { ClineMessage } from "./ExtensionMessage" +export type ParsedApiReqStartedTextType = { + tokensIn: number + tokensOut: number + cacheWrites: number + cacheReads: number + cost?: number // Only present if combineApiRequests has been called +} + /** * Calculates API metrics from an array of ClineMessages. * - * This function processes 'api_req_started' messages that have been combined with their - * corresponding 'api_req_finished' messages by the combineApiRequests function. + * This function processes 'condense_context' messages and 'api_req_started' messages that have been + * combined with their corresponding 'api_req_finished' messages by the combineApiRequests function. * It extracts and sums up the tokensIn, tokensOut, cacheWrites, cacheReads, and cost from these messages. * * @param messages - An array of ClineMessage objects to process. @@ -29,30 +37,12 @@ export function getApiMetrics(messages: ClineMessage[]) { contextTokens: 0, } - // Helper function to get total tokens from a message - const getTotalTokensFromMessage = (message: ClineMessage): number => { - if (!message.text) return 0 - try { - const { tokensIn, tokensOut, cacheWrites, cacheReads } = JSON.parse(message.text) - return (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0) - } catch { - return 0 - } - } - - // Find the last api_req_started message that has any tokens - const lastApiReq = [...messages].reverse().find((message) => { - if (message.type === "say" && message.say === "api_req_started") { - return getTotalTokensFromMessage(message) > 0 - } - return false - }) - // Calculate running totals messages.forEach((message) => { if (message.type === "say" && message.say === "api_req_started" && message.text) { try { - const { tokensIn, tokensOut, cacheWrites, cacheReads, cost } = JSON.parse(message.text) + const parsedText: ParsedApiReqStartedTextType = JSON.parse(message.text) + const { tokensIn, tokensOut, cacheWrites, cacheReads, cost } = parsedText if (typeof tokensIn === "number") { result.totalTokensIn += tokensIn @@ -69,16 +59,34 @@ export function getApiMetrics(messages: ClineMessage[]) { if (typeof cost === "number") { result.totalCost += cost } - - // If this is the last api request with tokens, use its total for context size - if (message === lastApiReq) { - result.contextTokens = getTotalTokensFromMessage(message) - } } catch (error) { console.error("Error parsing JSON:", error) } + } else if (message.type === "say" && message.say === "condense_context") { + result.totalCost += message.contextCondense?.cost ?? 0 } }) + // Calculate context tokens, from the last API request started or condense context message + result.contextTokens = 0 + for (let i = messages.length - 1; i >= 0; i--) { + const message = messages[i] + if (message.type === "say" && message.say === "api_req_started" && message.text) { + try { + const parsedText: ParsedApiReqStartedTextType = JSON.parse(message.text) + const { tokensIn, tokensOut, cacheWrites, cacheReads } = parsedText + result.contextTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0) + } catch (error) { + console.error("Error parsing JSON:", error) + continue + } + } else if (message.type === "say" && message.say === "condense_context") { + result.contextTokens = message.contextCondense?.newContextTokens ?? 0 + } + if (result.contextTokens) { + break + } + } + return result } diff --git a/webview-ui/src/components/chat/ChatRow.tsx b/webview-ui/src/components/chat/ChatRow.tsx index 8912a5d80e0..c6045087012 100644 --- a/webview-ui/src/components/chat/ChatRow.tsx +++ b/webview-ui/src/components/chat/ChatRow.tsx @@ -31,6 +31,7 @@ import { ProgressIndicator } from "./ProgressIndicator" import { Markdown } from "./Markdown" import { CommandExecution } from "./CommandExecution" import { CommandExecutionError } from "./CommandExecutionError" +import { ContextCondenseRow } from "./ContextCondenseRow" interface ChatRowProps { message: ClineMessage @@ -926,6 +927,8 @@ export const ChatRowContent = ({ checkpoint={message.checkpoint} /> ) + case "condense_context": + return message.contextCondense ? : null default: return ( <> diff --git a/webview-ui/src/components/chat/ContextCondenseRow.tsx b/webview-ui/src/components/chat/ContextCondenseRow.tsx new file mode 100644 index 00000000000..52045df7f7e --- /dev/null +++ b/webview-ui/src/components/chat/ContextCondenseRow.tsx @@ -0,0 +1,38 @@ +import { useState } from "react" +import { useTranslation } from "react-i18next" +import { VSCodeBadge } from "@vscode/webview-ui-toolkit/react" + +import { ContextCondense } from "@roo/schemas" +import { Markdown } from "./Markdown" + +export const ContextCondenseRow = ({ cost, prevContextTokens, newContextTokens, summary }: ContextCondense) => { + const { t } = useTranslation() + const [isExpanded, setIsExpanded] = useState(false) + + return ( +
+
setIsExpanded(!isExpanded)}> +
+ + {t("chat:contextCondense.title")} + + {prevContextTokens.toLocaleString()} → {newContextTokens.toLocaleString()} {t("tokens")} + + 0 ? "opacity-100" : "opacity-0"}>${cost.toFixed(2)} +
+ +
+ + {isExpanded && ( +
+

{t("chat:contextCondense.conversationSummary")}

+
+ +
+
+ )} +
+ ) +} diff --git a/webview-ui/src/i18n/locales/ca/chat.json b/webview-ui/src/i18n/locales/ca/chat.json index 6e9e2fae50a..dd0d89ced5f 100644 --- a/webview-ui/src/i18n/locales/ca/chat.json +++ b/webview-ui/src/i18n/locales/ca/chat.json @@ -198,6 +198,11 @@ "thinking": "Pensant", "seconds": "{{count}}s" }, + "contextCondense": { + "title": "Context condensat", + "conversationSummary": "Resum de la conversa", + "tokens": "tokens" + }, "followUpSuggest": { "copyToInput": "Copiar a l'entrada (o Shift + clic)" }, diff --git a/webview-ui/src/i18n/locales/de/chat.json b/webview-ui/src/i18n/locales/de/chat.json index 7e450d9cd0b..f683ada7d3d 100644 --- a/webview-ui/src/i18n/locales/de/chat.json +++ b/webview-ui/src/i18n/locales/de/chat.json @@ -198,6 +198,11 @@ "thinking": "Denke nach", "seconds": "{{count}}s" }, + "contextCondense": { + "title": "Kontext komprimiert", + "conversationSummary": "Gesprächszusammenfassung", + "tokens": "Tokens" + }, "followUpSuggest": { "copyToInput": "In Eingabefeld kopieren (oder Shift + Klick)" }, diff --git a/webview-ui/src/i18n/locales/en/chat.json b/webview-ui/src/i18n/locales/en/chat.json index 22b3386b30a..624381c8b0b 100644 --- a/webview-ui/src/i18n/locales/en/chat.json +++ b/webview-ui/src/i18n/locales/en/chat.json @@ -129,6 +129,11 @@ }, "current": "Current" }, + "contextCondense": { + "title": "Context Condensed", + "conversationSummary": "Conversation Summary", + "tokens": "tokens" + }, "instructions": { "wantsToFetch": "Roo wants to fetch detailed instructions to assist with the current task" }, diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json index d23272803cb..3a55588b111 100644 --- a/webview-ui/src/i18n/locales/en/settings.json +++ b/webview-ui/src/i18n/locales/en/settings.json @@ -400,7 +400,7 @@ "warning": "⚠️", "AUTO_CONDENSE_CONTEXT": { "name": "Intelligently condense the context window", - "description": "Uses an LLM call to summarize the past conversation when the task's context window is almost full, rather than dropping old messages. Disclaimer: the cost of summarizing is not currently included in the API costs shown in the UI." + "description": "Uses an LLM call to summarize the past conversation when the task's context window is almost full, rather than dropping old messages." }, "DIFF_STRATEGY_UNIFIED": { "name": "Use experimental unified diff strategy", diff --git a/webview-ui/src/i18n/locales/es/chat.json b/webview-ui/src/i18n/locales/es/chat.json index 9a1e5dcf04a..7de76382416 100644 --- a/webview-ui/src/i18n/locales/es/chat.json +++ b/webview-ui/src/i18n/locales/es/chat.json @@ -198,6 +198,11 @@ "thinking": "Pensando", "seconds": "{{count}}s" }, + "contextCondense": { + "title": "Contexto condensado", + "conversationSummary": "Resumen de la conversación", + "tokens": "tokens" + }, "followUpSuggest": { "copyToInput": "Copiar a la entrada (o Shift + clic)" }, diff --git a/webview-ui/src/i18n/locales/fr/chat.json b/webview-ui/src/i18n/locales/fr/chat.json index 2e39eb0a827..bb1219e7ac1 100644 --- a/webview-ui/src/i18n/locales/fr/chat.json +++ b/webview-ui/src/i18n/locales/fr/chat.json @@ -198,6 +198,11 @@ "thinking": "Réflexion", "seconds": "{{count}}s" }, + "contextCondense": { + "title": "Contexte condensé", + "conversationSummary": "Résumé de la conversation", + "tokens": "tokens" + }, "followUpSuggest": { "copyToInput": "Copier vers l'entrée (ou Shift + clic)" }, diff --git a/webview-ui/src/i18n/locales/hi/chat.json b/webview-ui/src/i18n/locales/hi/chat.json index 459216b92c9..58bbd96b037 100644 --- a/webview-ui/src/i18n/locales/hi/chat.json +++ b/webview-ui/src/i18n/locales/hi/chat.json @@ -198,6 +198,11 @@ "thinking": "विचार कर रहा है", "seconds": "{{count}} सेकंड" }, + "contextCondense": { + "title": "संदर्भ संक्षिप्त किया गया", + "conversationSummary": "वार्तालाप का सारांश", + "tokens": "टोकन" + }, "followUpSuggest": { "copyToInput": "इनपुट में कॉपी करें (या Shift + क्लिक)" }, diff --git a/webview-ui/src/i18n/locales/it/chat.json b/webview-ui/src/i18n/locales/it/chat.json index 3d282c3a276..40a80ed188c 100644 --- a/webview-ui/src/i18n/locales/it/chat.json +++ b/webview-ui/src/i18n/locales/it/chat.json @@ -198,6 +198,11 @@ "thinking": "Sto pensando", "seconds": "{{count}}s" }, + "contextCondense": { + "title": "Contesto condensato", + "conversationSummary": "Riepilogo della conversazione", + "tokens": "token" + }, "followUpSuggest": { "copyToInput": "Copia nell'input (o Shift + clic)" }, diff --git a/webview-ui/src/i18n/locales/ja/chat.json b/webview-ui/src/i18n/locales/ja/chat.json index 7b5fc2aebbd..8bb0b09a1ef 100644 --- a/webview-ui/src/i18n/locales/ja/chat.json +++ b/webview-ui/src/i18n/locales/ja/chat.json @@ -198,6 +198,11 @@ "thinking": "考え中", "seconds": "{{count}}秒" }, + "contextCondense": { + "title": "コンテキスト要約", + "conversationSummary": "会話の要約", + "tokens": "トークン" + }, "followUpSuggest": { "copyToInput": "入力欄にコピー(またはShift + クリック)" }, diff --git a/webview-ui/src/i18n/locales/ko/chat.json b/webview-ui/src/i18n/locales/ko/chat.json index a04ee7f0712..b79038367ba 100644 --- a/webview-ui/src/i18n/locales/ko/chat.json +++ b/webview-ui/src/i18n/locales/ko/chat.json @@ -198,6 +198,11 @@ "thinking": "생각 중", "seconds": "{{count}}초" }, + "contextCondense": { + "title": "컨텍스트 요약됨", + "conversationSummary": "대화 요약", + "tokens": "토큰" + }, "followUpSuggest": { "copyToInput": "입력창에 복사 (또는 Shift + 클릭)" }, diff --git a/webview-ui/src/i18n/locales/nl/chat.json b/webview-ui/src/i18n/locales/nl/chat.json index f13a153042d..f69ce8b69cd 100644 --- a/webview-ui/src/i18n/locales/nl/chat.json +++ b/webview-ui/src/i18n/locales/nl/chat.json @@ -208,6 +208,11 @@ "thinking": "Denkt na", "seconds": "{{count}}s" }, + "contextCondense": { + "title": "Context samengevat", + "conversationSummary": "Gespreksoverzicht", + "tokens": "tokens" + }, "followUpSuggest": { "copyToInput": "Kopiëren naar invoer (zelfde als shift + klik)" }, diff --git a/webview-ui/src/i18n/locales/pl/chat.json b/webview-ui/src/i18n/locales/pl/chat.json index 8e9ad4dc528..c21898d50a5 100644 --- a/webview-ui/src/i18n/locales/pl/chat.json +++ b/webview-ui/src/i18n/locales/pl/chat.json @@ -198,6 +198,11 @@ "thinking": "Myślenie", "seconds": "{{count}} s" }, + "contextCondense": { + "title": "Kontekst skondensowany", + "conversationSummary": "Podsumowanie rozmowy", + "tokens": "tokeny" + }, "followUpSuggest": { "copyToInput": "Kopiuj do pola wprowadzania (lub Shift + kliknięcie)" }, diff --git a/webview-ui/src/i18n/locales/pt-BR/chat.json b/webview-ui/src/i18n/locales/pt-BR/chat.json index d8aeb86f00d..d25cc9962fc 100644 --- a/webview-ui/src/i18n/locales/pt-BR/chat.json +++ b/webview-ui/src/i18n/locales/pt-BR/chat.json @@ -198,6 +198,11 @@ "thinking": "Pensando", "seconds": "{{count}}s" }, + "contextCondense": { + "title": "Contexto condensado", + "conversationSummary": "Resumo da conversa", + "tokens": "tokens" + }, "followUpSuggest": { "copyToInput": "Copiar para entrada (ou Shift + clique)" }, diff --git a/webview-ui/src/i18n/locales/ru/chat.json b/webview-ui/src/i18n/locales/ru/chat.json index 225d709a739..6e2345f8223 100644 --- a/webview-ui/src/i18n/locales/ru/chat.json +++ b/webview-ui/src/i18n/locales/ru/chat.json @@ -208,6 +208,11 @@ "thinking": "Обдумывание", "seconds": "{{count}}с" }, + "contextCondense": { + "title": "Контекст сжат", + "conversationSummary": "Сводка разговора", + "tokens": "токены" + }, "followUpSuggest": { "copyToInput": "Скопировать во ввод (то же, что shift + клик)" }, diff --git a/webview-ui/src/i18n/locales/tr/chat.json b/webview-ui/src/i18n/locales/tr/chat.json index fa8184899e4..d0171862581 100644 --- a/webview-ui/src/i18n/locales/tr/chat.json +++ b/webview-ui/src/i18n/locales/tr/chat.json @@ -198,6 +198,11 @@ "thinking": "Düşünüyor", "seconds": "{{count}}sn" }, + "contextCondense": { + "title": "Bağlam Özetlendi", + "conversationSummary": "Konuşma Özeti", + "tokens": "token" + }, "followUpSuggest": { "copyToInput": "Giriş alanına kopyala (veya Shift + tıklama)" }, diff --git a/webview-ui/src/i18n/locales/vi/chat.json b/webview-ui/src/i18n/locales/vi/chat.json index 388fc284a6a..62bed81fc43 100644 --- a/webview-ui/src/i18n/locales/vi/chat.json +++ b/webview-ui/src/i18n/locales/vi/chat.json @@ -198,6 +198,11 @@ "thinking": "Đang suy nghĩ", "seconds": "{{count}} giây" }, + "contextCondense": { + "title": "Ngữ cảnh đã tóm tắt", + "conversationSummary": "Tóm tắt cuộc hội thoại", + "tokens": "token" + }, "followUpSuggest": { "copyToInput": "Sao chép vào ô nhập liệu (hoặc Shift + nhấp chuột)" }, diff --git a/webview-ui/src/i18n/locales/zh-CN/chat.json b/webview-ui/src/i18n/locales/zh-CN/chat.json index e97a40aeaca..31993895ac6 100644 --- a/webview-ui/src/i18n/locales/zh-CN/chat.json +++ b/webview-ui/src/i18n/locales/zh-CN/chat.json @@ -198,6 +198,11 @@ "thinking": "思考中", "seconds": "{{count}}秒" }, + "contextCondense": { + "title": "上下文已压缩", + "conversationSummary": "对话摘要", + "tokens": "tokens" + }, "followUpSuggest": { "copyToInput": "复制到输入框(或按住Shift点击)" }, diff --git a/webview-ui/src/i18n/locales/zh-TW/chat.json b/webview-ui/src/i18n/locales/zh-TW/chat.json index 8d142da6c06..b3f94b9d819 100644 --- a/webview-ui/src/i18n/locales/zh-TW/chat.json +++ b/webview-ui/src/i18n/locales/zh-TW/chat.json @@ -198,6 +198,11 @@ "thinking": "思考中", "seconds": "{{count}}秒" }, + "contextCondense": { + "title": "上下文已壓縮", + "conversationSummary": "對話摘要", + "tokens": "tokens" + }, "followUpSuggest": { "copyToInput": "複製到輸入框(或按住 Shift 並點選)" },