diff --git a/src/core/sliding-window/__tests__/sliding-window.spec.ts b/src/core/context-management/__tests__/context-management.spec.ts similarity index 95% rename from src/core/sliding-window/__tests__/sliding-window.spec.ts rename to src/core/context-management/__tests__/context-management.spec.ts index 0f2c70c81bc..a2c6daec728 100644 --- a/src/core/sliding-window/__tests__/sliding-window.spec.ts +++ b/src/core/context-management/__tests__/context-management.spec.ts @@ -1,4 +1,4 @@ -// npx vitest src/core/sliding-window/__tests__/sliding-window.spec.ts +// cd src && npx vitest run core/context-management/__tests__/context-management.spec.ts import { Anthropic } from "@anthropic-ai/sdk" @@ -9,12 +9,7 @@ import { BaseProvider } from "../../../api/providers/base-provider" import { ApiMessage } from "../../task-persistence/apiMessages" import * as condenseModule from "../../condense" -import { - TOKEN_BUFFER_PERCENTAGE, - estimateTokenCount, - truncateConversation, - truncateConversationIfNeeded, -} from "../index" +import { TOKEN_BUFFER_PERCENTAGE, estimateTokenCount, truncateConversation, manageContext } from "../index" // Create a mock ApiHandler for testing class MockApiHandler extends BaseProvider { @@ -49,7 +44,7 @@ class MockApiHandler extends BaseProvider { const mockApiHandler = new MockApiHandler() const taskId = "test-task-id" -describe("Sliding Window", () => { +describe("Context Management", () => { beforeEach(() => { if (!TelemetryService.hasInstance()) { TelemetryService.createInstance([]) @@ -234,9 +229,9 @@ describe("Sliding Window", () => { }) /** - * Tests for the truncateConversationIfNeeded function + * Tests for the manageContext function */ - describe("truncateConversationIfNeeded", () => { + describe("manageContext", () => { const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({ contextWindow, supportsPromptCache: true, @@ -261,7 +256,7 @@ describe("Sliding Window", () => { { ...messages[messages.length - 1], content: "" }, ] - const result = await truncateConversationIfNeeded({ + const result = await manageContext({ messages: messagesWithSmallContent, totalTokens, contextWindow: modelInfo.contextWindow, @@ -302,7 +297,7 @@ describe("Sliding Window", () => { messagesWithSmallContent[4], ] - const result = await truncateConversationIfNeeded({ + const result = await manageContext({ messages: messagesWithSmallContent, totalTokens, contextWindow: modelInfo.contextWindow, @@ -337,7 +332,7 @@ describe("Sliding Window", () => { // Test below threshold const belowThreshold = 69999 - const result1 = await truncateConversationIfNeeded({ + const result1 = await manageContext({ messages: messagesWithSmallContent, totalTokens: belowThreshold, contextWindow: modelInfo1.contextWindow, @@ -351,7 +346,7 @@ describe("Sliding Window", () => { currentProfileId: "default", }) - const result2 = await truncateConversationIfNeeded({ + const result2 = await manageContext({ messages: messagesWithSmallContent, totalTokens: belowThreshold, contextWindow: modelInfo2.contextWindow, @@ -372,7 +367,7 @@ describe("Sliding Window", () => { // Test above threshold const aboveThreshold = 70001 - const result3 = await truncateConversationIfNeeded({ + const result3 = await manageContext({ messages: messagesWithSmallContent, totalTokens: aboveThreshold, contextWindow: modelInfo1.contextWindow, @@ -386,7 +381,7 @@ describe("Sliding Window", () => { currentProfileId: "default", }) - const result4 = await truncateConversationIfNeeded({ + const result4 = await manageContext({ messages: messagesWithSmallContent, totalTokens: aboveThreshold, contextWindow: modelInfo2.contextWindow, @@ -422,7 +417,7 @@ describe("Sliding Window", () => { // Set base tokens so total is well below threshold + buffer even with small content added const dynamicBuffer = modelInfo.contextWindow * TOKEN_BUFFER_PERCENTAGE const baseTokensForSmall = availableTokens - smallContentTokens - dynamicBuffer - 10 - const resultWithSmall = await truncateConversationIfNeeded({ + const resultWithSmall = await manageContext({ messages: messagesWithSmallContent, totalTokens: baseTokensForSmall, contextWindow: modelInfo.contextWindow, @@ -457,7 +452,7 @@ describe("Sliding Window", () => { // Set base tokens so we're just below threshold without content, but over with content const baseTokensForLarge = availableTokens - Math.floor(largeContentTokens / 2) - const resultWithLarge = await truncateConversationIfNeeded({ + const resultWithLarge = await manageContext({ messages: messagesWithLargeContent, totalTokens: baseTokensForLarge, contextWindow: modelInfo.contextWindow, @@ -485,7 +480,7 @@ describe("Sliding Window", () => { // Set base tokens so we're just below threshold without content const baseTokensForVeryLarge = availableTokens - Math.floor(veryLargeContentTokens / 2) - const resultWithVeryLarge = await truncateConversationIfNeeded({ + const resultWithVeryLarge = await manageContext({ messages: messagesWithVeryLargeContent, totalTokens: baseTokensForVeryLarge, contextWindow: modelInfo.contextWindow, @@ -523,7 +518,7 @@ describe("Sliding Window", () => { messagesWithSmallContent[4], ] - const result = await truncateConversationIfNeeded({ + const result = await manageContext({ messages: messagesWithSmallContent, totalTokens, contextWindow: modelInfo.contextWindow, @@ -570,7 +565,7 @@ describe("Sliding Window", () => { { ...messages[messages.length - 1], content: "" }, ] - const result = await truncateConversationIfNeeded({ + const result = await manageContext({ messages: messagesWithSmallContent, totalTokens, contextWindow: modelInfo.contextWindow, @@ -637,7 +632,7 @@ describe("Sliding Window", () => { messagesWithSmallContent[4], ] - const result = await truncateConversationIfNeeded({ + const result = await manageContext({ messages: messagesWithSmallContent, totalTokens, contextWindow: modelInfo.contextWindow, @@ -684,7 +679,7 @@ describe("Sliding Window", () => { messagesWithSmallContent[4], ] - const result = await truncateConversationIfNeeded({ + const result = await manageContext({ messages: messagesWithSmallContent, totalTokens, contextWindow: modelInfo.contextWindow, @@ -741,7 +736,7 @@ describe("Sliding Window", () => { { ...messages[messages.length - 1], content: "" }, ] - const result = await truncateConversationIfNeeded({ + const result = await manageContext({ messages: messagesWithSmallContent, totalTokens, contextWindow, @@ -793,7 +788,7 @@ describe("Sliding Window", () => { { ...messages[messages.length - 1], content: "" }, ] - const result = await truncateConversationIfNeeded({ + const result = await manageContext({ messages: messagesWithSmallContent, totalTokens, contextWindow, @@ -880,7 +875,7 @@ describe("Sliding Window", () => { .spyOn(condenseModule, "summarizeConversation") .mockResolvedValue(mockSummarizeResponse) - const result = await truncateConversationIfNeeded({ + const result = await manageContext({ messages: messagesWithSmallContent, totalTokens, contextWindow, @@ -946,7 +941,7 @@ describe("Sliding Window", () => { .spyOn(condenseModule, "summarizeConversation") .mockResolvedValue(mockSummarizeResponse) - const result = await truncateConversationIfNeeded({ + const result = await manageContext({ messages: messagesWithSmallContent, totalTokens, contextWindow, @@ -1000,7 +995,7 @@ describe("Sliding Window", () => { vi.clearAllMocks() const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation") - const result = await truncateConversationIfNeeded({ + const result = await manageContext({ messages: messagesWithSmallContent, totalTokens, contextWindow, @@ -1030,10 +1025,10 @@ describe("Sliding Window", () => { }) /** - * Tests for the getMaxTokens function (private but tested through truncateConversationIfNeeded) + * Tests for the getMaxTokens function (private but tested through manageContext) */ describe("getMaxTokens", () => { - // We'll test this indirectly through truncateConversationIfNeeded + // We'll test this indirectly through manageContext const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({ contextWindow, supportsPromptCache: true, // Not relevant for getMaxTokens @@ -1061,7 +1056,7 @@ describe("Sliding Window", () => { // Account for the dynamic buffer which is 10% of context window (10,000 tokens) // Below max tokens and buffer - no truncation - const result1 = await truncateConversationIfNeeded({ + const result1 = await manageContext({ messages: messagesWithSmallContent, totalTokens: 39999, // Well below threshold + dynamic buffer contextWindow: modelInfo.contextWindow, @@ -1082,7 +1077,7 @@ describe("Sliding Window", () => { }) // Above max tokens - truncate - const result2 = await truncateConversationIfNeeded({ + const result2 = await manageContext({ messages: messagesWithSmallContent, totalTokens: 50001, // Above threshold contextWindow: modelInfo.contextWindow, @@ -1114,7 +1109,7 @@ describe("Sliding Window", () => { // Account for the dynamic buffer which is 10% of context window (10,000 tokens) // Below max tokens and buffer - no truncation - const result1 = await truncateConversationIfNeeded({ + const result1 = await manageContext({ messages: messagesWithSmallContent, totalTokens: 81807, // Well below threshold + dynamic buffer (91808 - 10000 = 81808) contextWindow: modelInfo.contextWindow, @@ -1135,7 +1130,7 @@ describe("Sliding Window", () => { }) // Above max tokens - truncate - const result2 = await truncateConversationIfNeeded({ + const result2 = await manageContext({ messages: messagesWithSmallContent, totalTokens: 81809, // Above threshold (81808) contextWindow: modelInfo.contextWindow, @@ -1166,7 +1161,7 @@ describe("Sliding Window", () => { ] // Below max tokens and buffer - no truncation - const result1 = await truncateConversationIfNeeded({ + const result1 = await manageContext({ messages: messagesWithSmallContent, totalTokens: 34999, // Well below threshold + buffer contextWindow: modelInfo.contextWindow, @@ -1182,7 +1177,7 @@ describe("Sliding Window", () => { expect(result1.messages).toEqual(messagesWithSmallContent) // Above max tokens - truncate - const result2 = await truncateConversationIfNeeded({ + const result2 = await manageContext({ messages: messagesWithSmallContent, totalTokens: 40001, // Above threshold contextWindow: modelInfo.contextWindow, @@ -1211,7 +1206,7 @@ describe("Sliding Window", () => { // Account for the dynamic buffer which is 10% of context window (20,000 tokens for this test) // Below max tokens and buffer - no truncation - const result1 = await truncateConversationIfNeeded({ + const result1 = await manageContext({ messages: messagesWithSmallContent, totalTokens: 149999, // Well below threshold + dynamic buffer contextWindow: modelInfo.contextWindow, @@ -1227,7 +1222,7 @@ describe("Sliding Window", () => { expect(result1.messages).toEqual(messagesWithSmallContent) // Above max tokens - truncate - const result2 = await truncateConversationIfNeeded({ + const result2 = await manageContext({ messages: messagesWithSmallContent, totalTokens: 170001, // Above threshold contextWindow: modelInfo.contextWindow, diff --git a/src/core/sliding-window/index.ts b/src/core/context-management/index.ts similarity index 78% rename from src/core/sliding-window/index.ts rename to src/core/context-management/index.ts index 1e518c9a56d..fa91fc0c9d3 100644 --- a/src/core/sliding-window/index.ts +++ b/src/core/context-management/index.ts @@ -8,7 +8,18 @@ import { ApiMessage } from "../task-persistence/apiMessages" import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "@roo-code/types" /** - * Default percentage of the context window to use as a buffer when deciding when to truncate + * Context Management + * + * This module provides Context Management for conversations, combining: + * - Intelligent condensation of prior messages when approaching configured thresholds + * - Sliding window truncation as a fallback when necessary + * + * Behavior and exports are preserved exactly from the previous sliding-window implementation. + */ + +/** + * Default percentage of the context window to use as a buffer when deciding when to truncate. + * Used by Context Management to determine when to trigger condensation or (fallback) sliding window truncation. */ export const TOKEN_BUFFER_PERCENTAGE = 0.1 @@ -33,6 +44,8 @@ export async function estimateTokenCount( * The first message is always retained, and a specified fraction (rounded to an even number) * of messages from the beginning (excluding the first) is removed. * + * This implements the sliding window truncation behavior. + * * @param {ApiMessage[]} messages - The conversation messages. * @param {number} fracToRemove - The fraction (between 0 and 1) of messages (excluding the first) to remove. * @param {string} taskId - The task ID for the conversation, used for telemetry @@ -50,20 +63,16 @@ export function truncateConversation(messages: ApiMessage[], fracToRemove: numbe } /** - * Conditionally truncates the conversation messages if the total token count - * exceeds the model's limit, considering the size of incoming content. + * Context Management: Conditionally manages the conversation context when approaching limits. * - * @param {ApiMessage[]} messages - The conversation messages. - * @param {number} totalTokens - The total number of tokens in the conversation (excluding the last user message). - * @param {number} contextWindow - The context window size. - * @param {number} maxTokens - The maximum number of tokens allowed. - * @param {ApiHandler} apiHandler - The API handler to use for token counting. - * @param {boolean} autoCondenseContext - Whether to use LLM summarization or sliding window implementation - * @param {string} systemPrompt - The system prompt, used for estimating the new context size after summarizing. - * @returns {ApiMessage[]} The original or truncated conversation messages. + * Attempts intelligent condensation of prior messages when thresholds are reached. + * Falls back to sliding window truncation if condensation is unavailable or fails. + * + * @param {ContextManagementOptions} options - The options for truncation/condensation + * @returns {Promise} The original, condensed, or truncated conversation messages. */ -type TruncateOptions = { +export type ContextManagementOptions = { messages: ApiMessage[] totalTokens: number contextWindow: number @@ -79,16 +88,15 @@ type TruncateOptions = { currentProfileId: string } -type TruncateResponse = SummarizeResponse & { prevContextTokens: number } +export type ContextManagementResult = SummarizeResponse & { prevContextTokens: number } /** - * Conditionally truncates the conversation messages if the total token count - * exceeds the model's limit, considering the size of incoming content. + * Conditionally manages conversation context (condense and fallback truncation). * - * @param {TruncateOptions} options - The options for truncation - * @returns {Promise} The original or truncated conversation messages. + * @param {ContextManagementOptions} options - The options for truncation/condensation + * @returns {Promise} The original, condensed, or truncated conversation messages. */ -export async function truncateConversationIfNeeded({ +export async function manageContext({ messages, totalTokens, contextWindow, @@ -102,7 +110,7 @@ export async function truncateConversationIfNeeded({ condensingApiHandler, profileThresholds, currentProfileId, -}: TruncateOptions): Promise { +}: ContextManagementOptions): Promise { let error: string | undefined let cost = 0 // Calculate the maximum tokens reserved for response diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 5d63189e3d8..b4454dc547d 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -90,7 +90,7 @@ import { RooIgnoreController } from "../ignore/RooIgnoreController" import { RooProtectedController } from "../protect/RooProtectedController" import { type AssistantMessageContent, presentAssistantMessage } from "../assistant-message" import { AssistantMessageParser } from "../assistant-message/AssistantMessageParser" -import { truncateConversationIfNeeded } from "../sliding-window" +import { manageContext } from "../context-management" import { ClineProvider } from "../webview/ClineProvider" import { MultiSearchReplaceDiffStrategy } from "../diff/strategies/multi-search-replace" import { MultiFileSearchReplaceDiffStrategy } from "../diff/strategies/multi-file-search-replace" @@ -2664,7 +2664,7 @@ export class Task extends EventEmitter implements TaskLike { ) // Force aggressive truncation by keeping only 75% of the conversation history - const truncateResult = await truncateConversationIfNeeded({ + const truncateResult = await manageContext({ messages: this.apiConversationHistory, totalTokens: contextTokens || 0, maxTokens, @@ -2779,7 +2779,7 @@ export class Task extends EventEmitter implements TaskLike { // Get the current profile ID using the helper method const currentProfileId = this.getCurrentProfileId(state) - const truncateResult = await truncateConversationIfNeeded({ + const truncateResult = await manageContext({ messages: this.apiConversationHistory, totalTokens: contextTokens, maxTokens,