diff --git a/src/core/condense/__tests__/index.test.ts b/src/core/condense/__tests__/index.test.ts index 86afe2a8c4c..ac7ec3899d7 100644 --- a/src/core/condense/__tests__/index.test.ts +++ b/src/core/condense/__tests__/index.test.ts @@ -9,6 +9,8 @@ jest.mock("../../../api/transform/image-cleaning", () => ({ maybeRemoveImageBlocks: jest.fn((messages: ApiMessage[], _apiHandler: ApiHandler) => [...messages]), })) +const taskId = "test-task-id" + describe("getMessagesSinceLastSummary", () => { it("should return all messages when there is no summary", () => { const messages: ApiMessage[] = [ @@ -106,7 +108,7 @@ describe("summarizeConversation", () => { { role: "assistant", content: "Hi there", ts: 2 }, ] - const result = await summarizeConversation(messages, mockApiHandler, defaultSystemPrompt) + const result = await summarizeConversation(messages, mockApiHandler, defaultSystemPrompt, taskId) expect(result.messages).toEqual(messages) expect(result.cost).toBe(0) expect(result.summary).toBe("") @@ -125,7 +127,7 @@ describe("summarizeConversation", () => { { role: "user", content: "Tell me more", ts: 7 }, ] - const result = await summarizeConversation(messages, mockApiHandler, defaultSystemPrompt) + const result = await summarizeConversation(messages, mockApiHandler, defaultSystemPrompt, taskId) expect(result.messages).toEqual(messages) expect(result.cost).toBe(0) expect(result.summary).toBe("") @@ -144,7 +146,7 @@ describe("summarizeConversation", () => { { role: "user", content: "Tell me more", ts: 7 }, ] - const result = await summarizeConversation(messages, mockApiHandler, defaultSystemPrompt) + const result = await summarizeConversation(messages, mockApiHandler, defaultSystemPrompt, taskId) // Check that the API was called correctly expect(mockApiHandler.createMessage).toHaveBeenCalled() @@ -202,7 +204,7 @@ describe("summarizeConversation", () => { return messages.map(({ role, content }: { role: string; content: any }) => ({ role, content })) }) - const result = await summarizeConversation(messages, mockApiHandler, defaultSystemPrompt) + const result = await summarizeConversation(messages, mockApiHandler, defaultSystemPrompt, taskId) // Should return original messages when summary is empty expect(result.messages).toEqual(messages) @@ -225,7 +227,7 @@ describe("summarizeConversation", () => { { role: "user", content: "Tell me more", ts: 7 }, ] - await summarizeConversation(messages, mockApiHandler, defaultSystemPrompt) + await summarizeConversation(messages, mockApiHandler, defaultSystemPrompt, taskId) // Verify the final request message const expectedFinalMessage = { @@ -266,7 +268,7 @@ describe("summarizeConversation", () => { // Override the mock for this test mockApiHandler.createMessage = jest.fn().mockReturnValue(streamWithUsage) as any - const result = await summarizeConversation(messages, mockApiHandler, systemPrompt) + const result = await summarizeConversation(messages, mockApiHandler, systemPrompt, taskId) // Verify that countTokens was called with the correct messages including system prompt expect(mockApiHandler.countTokens).toHaveBeenCalled() diff --git a/src/core/condense/index.ts b/src/core/condense/index.ts index c5ab9310694..7c299432ff6 100644 --- a/src/core/condense/index.ts +++ b/src/core/condense/index.ts @@ -2,6 +2,7 @@ import Anthropic from "@anthropic-ai/sdk" import { ApiHandler } from "../../api" import { ApiMessage } from "../task-persistence/apiMessages" import { maybeRemoveImageBlocks } from "../../api/transform/image-cleaning" +import { telemetryService } from "../../services/telemetry/TelemetryService" export const N_MESSAGES_TO_KEEP = 3 @@ -58,13 +59,16 @@ export type SummarizeResponse = { * @param {ApiMessage[]} messages - The conversation messages * @param {ApiHandler} apiHandler - The API handler to use for token counting. * @param {string} systemPrompt - The system prompt for API requests, which should be considered in the context token count + * @param {string} taskId - The task ID for the conversation, used for telemetry * @returns {SummarizeResponse} - The result of the summarization operation (see above) */ export async function summarizeConversation( messages: ApiMessage[], apiHandler: ApiHandler, systemPrompt: string, + taskId: string, ): Promise { + telemetryService.captureContextCondensed(taskId) const response: SummarizeResponse = { messages, cost: 0, summary: "" } const messagesToSummarize = getMessagesSinceLastSummary(messages.slice(0, -N_MESSAGES_TO_KEEP)) if (messagesToSummarize.length <= 1) { diff --git a/src/core/sliding-window/__tests__/sliding-window.test.ts b/src/core/sliding-window/__tests__/sliding-window.test.ts index b2c3bb8f1bf..69e69479153 100644 --- a/src/core/sliding-window/__tests__/sliding-window.test.ts +++ b/src/core/sliding-window/__tests__/sliding-window.test.ts @@ -37,6 +37,7 @@ class MockApiHandler extends BaseProvider { // Create a singleton instance for tests const mockApiHandler = new MockApiHandler() +const taskId = "test-task-id" /** * Tests for the truncateConversation function @@ -49,7 +50,7 @@ describe("truncateConversation", () => { { role: "user", content: "Third message" }, ] - const result = truncateConversation(messages, 0.5) + const result = truncateConversation(messages, 0.5, taskId) // With 2 messages after the first, 0.5 fraction means remove 1 message // But 1 is odd, so it rounds down to 0 (to make it even) @@ -70,7 +71,7 @@ describe("truncateConversation", () => { // 4 messages excluding first, 0.5 fraction = 2 messages to remove // 2 is already even, so no rounding needed - const result = truncateConversation(messages, 0.5) + const result = truncateConversation(messages, 0.5, taskId) expect(result.length).toBe(3) expect(result[0]).toEqual(messages[0]) @@ -91,7 +92,7 @@ describe("truncateConversation", () => { // 6 messages excluding first, 0.3 fraction = 1.8 messages to remove // 1.8 rounds down to 1, then to 0 to make it even - const result = truncateConversation(messages, 0.3) + const result = truncateConversation(messages, 0.3, taskId) expect(result.length).toBe(7) // No messages removed expect(result).toEqual(messages) @@ -104,7 +105,7 @@ describe("truncateConversation", () => { { role: "user", content: "Third message" }, ] - const result = truncateConversation(messages, 0) + const result = truncateConversation(messages, 0, taskId) expect(result).toEqual(messages) }) @@ -119,7 +120,7 @@ describe("truncateConversation", () => { // 3 messages excluding first, 1.0 fraction = 3 messages to remove // But 3 is odd, so it rounds down to 2 to make it even - const result = truncateConversation(messages, 1) + const result = truncateConversation(messages, 1, taskId) expect(result.length).toBe(2) expect(result[0]).toEqual(messages[0]) @@ -251,6 +252,7 @@ describe("truncateConversationIfNeeded", () => { autoCondenseContext: false, autoCondenseContextPercent: 100, systemPrompt: "System prompt", + taskId, }) // Check the new return type @@ -282,6 +284,7 @@ describe("truncateConversationIfNeeded", () => { autoCondenseContext: false, autoCondenseContextPercent: 100, systemPrompt: "System prompt", + taskId, }) expect(result).toEqual({ @@ -311,6 +314,7 @@ describe("truncateConversationIfNeeded", () => { autoCondenseContext: false, autoCondenseContextPercent: 100, systemPrompt: "System prompt", + taskId, }) const result2 = await truncateConversationIfNeeded({ @@ -322,6 +326,7 @@ describe("truncateConversationIfNeeded", () => { autoCondenseContext: false, autoCondenseContextPercent: 100, systemPrompt: "System prompt", + taskId, }) expect(result1.messages).toEqual(result2.messages) @@ -340,6 +345,7 @@ describe("truncateConversationIfNeeded", () => { autoCondenseContext: false, autoCondenseContextPercent: 100, systemPrompt: "System prompt", + taskId, }) const result4 = await truncateConversationIfNeeded({ @@ -351,6 +357,7 @@ describe("truncateConversationIfNeeded", () => { autoCondenseContext: false, autoCondenseContextPercent: 100, systemPrompt: "System prompt", + taskId, }) expect(result3.messages).toEqual(result4.messages) @@ -384,6 +391,7 @@ describe("truncateConversationIfNeeded", () => { autoCondenseContext: false, autoCondenseContextPercent: 100, systemPrompt: "System prompt", + taskId, }) expect(resultWithSmall).toEqual({ messages: messagesWithSmallContent, @@ -416,6 +424,7 @@ describe("truncateConversationIfNeeded", () => { autoCondenseContext: false, autoCondenseContextPercent: 100, systemPrompt: "System prompt", + taskId, }) expect(resultWithLarge.messages).not.toEqual(messagesWithLargeContent) // Should truncate expect(resultWithLarge.summary).toBe("") @@ -441,6 +450,7 @@ describe("truncateConversationIfNeeded", () => { autoCondenseContext: false, autoCondenseContextPercent: 100, systemPrompt: "System prompt", + taskId, }) expect(resultWithVeryLarge.messages).not.toEqual(messagesWithVeryLargeContent) // Should truncate expect(resultWithVeryLarge.summary).toBe("") @@ -469,6 +479,7 @@ describe("truncateConversationIfNeeded", () => { autoCondenseContext: false, autoCondenseContextPercent: 100, systemPrompt: "System prompt", + taskId, }) expect(result).toEqual({ messages: expectedResult, @@ -510,10 +521,11 @@ describe("truncateConversationIfNeeded", () => { autoCondenseContext: true, autoCondenseContextPercent: 100, systemPrompt: "System prompt", + taskId, }) // Verify summarizeConversation was called with the right parameters - expect(summarizeSpy).toHaveBeenCalledWith(messagesWithSmallContent, mockApiHandler, "System prompt") + expect(summarizeSpy).toHaveBeenCalledWith(messagesWithSmallContent, mockApiHandler, "System prompt", taskId) // Verify the result contains the summary information expect(result).toMatchObject({ @@ -557,6 +569,7 @@ describe("truncateConversationIfNeeded", () => { autoCondenseContext: true, autoCondenseContextPercent: 100, systemPrompt: "System prompt", + taskId, }) // Verify summarizeConversation was called @@ -594,6 +607,7 @@ describe("truncateConversationIfNeeded", () => { autoCondenseContext: false, autoCondenseContextPercent: 50, // This shouldn't matter since autoCondenseContext is false systemPrompt: "System prompt", + taskId, }) // Verify summarizeConversation was not called @@ -645,10 +659,11 @@ describe("truncateConversationIfNeeded", () => { autoCondenseContext: true, autoCondenseContextPercent: 50, // Set threshold to 50% - our tokens are at 60% systemPrompt: "System prompt", + taskId, }) // Verify summarizeConversation was called with the right parameters - expect(summarizeSpy).toHaveBeenCalledWith(messagesWithSmallContent, mockApiHandler, "System prompt") + expect(summarizeSpy).toHaveBeenCalledWith(messagesWithSmallContent, mockApiHandler, "System prompt", taskId) // Verify the result contains the summary information expect(result).toMatchObject({ @@ -682,6 +697,7 @@ describe("truncateConversationIfNeeded", () => { autoCondenseContext: true, autoCondenseContextPercent: 50, // Set threshold to 50% - our tokens are at 40% systemPrompt: "System prompt", + taskId, }) // Verify summarizeConversation was not called @@ -738,6 +754,7 @@ describe("getMaxTokens", () => { autoCondenseContext: false, autoCondenseContextPercent: 100, systemPrompt: "System prompt", + taskId, }) expect(result1).toEqual({ messages: messagesWithSmallContent, @@ -756,6 +773,7 @@ describe("getMaxTokens", () => { autoCondenseContext: false, autoCondenseContextPercent: 100, systemPrompt: "System prompt", + taskId, }) expect(result2.messages).not.toEqual(messagesWithSmallContent) expect(result2.messages.length).toBe(3) // Truncated with 0.5 fraction @@ -782,6 +800,7 @@ describe("getMaxTokens", () => { autoCondenseContext: false, autoCondenseContextPercent: 100, systemPrompt: "System prompt", + taskId, }) expect(result1).toEqual({ messages: messagesWithSmallContent, @@ -800,6 +819,7 @@ describe("getMaxTokens", () => { autoCondenseContext: false, autoCondenseContextPercent: 100, systemPrompt: "System prompt", + taskId, }) expect(result2.messages).not.toEqual(messagesWithSmallContent) expect(result2.messages.length).toBe(3) // Truncated with 0.5 fraction @@ -825,6 +845,7 @@ describe("getMaxTokens", () => { autoCondenseContext: false, autoCondenseContextPercent: 100, systemPrompt: "System prompt", + taskId, }) expect(result1.messages).toEqual(messagesWithSmallContent) @@ -838,6 +859,7 @@ describe("getMaxTokens", () => { autoCondenseContext: false, autoCondenseContextPercent: 100, systemPrompt: "System prompt", + taskId, }) expect(result2).not.toEqual(messagesWithSmallContent) expect(result2.messages.length).toBe(3) // Truncated with 0.5 fraction @@ -861,6 +883,7 @@ describe("getMaxTokens", () => { autoCondenseContext: false, autoCondenseContextPercent: 100, systemPrompt: "System prompt", + taskId, }) expect(result1.messages).toEqual(messagesWithSmallContent) @@ -874,6 +897,7 @@ describe("getMaxTokens", () => { autoCondenseContext: false, autoCondenseContextPercent: 100, systemPrompt: "System prompt", + taskId, }) expect(result2).not.toEqual(messagesWithSmallContent) expect(result2.messages.length).toBe(3) // Truncated with 0.5 fraction diff --git a/src/core/sliding-window/index.ts b/src/core/sliding-window/index.ts index 087f60feb2a..875f5a704ba 100644 --- a/src/core/sliding-window/index.ts +++ b/src/core/sliding-window/index.ts @@ -2,6 +2,7 @@ import { Anthropic } from "@anthropic-ai/sdk" import { ApiHandler } from "../../api" import { summarizeConversation, SummarizeResponse } from "../condense" import { ApiMessage } from "../task-persistence/apiMessages" +import { telemetryService } from "../../services/telemetry/TelemetryService" /** * Default percentage of the context window to use as a buffer when deciding when to truncate @@ -31,9 +32,11 @@ export async function estimateTokenCount( * * @param {ApiMessage[]} messages - The conversation messages. * @param {number} fracToRemove - The fraction (between 0 and 1) of messages (excluding the first) to remove. + * @param {string} taskId - The task ID for the conversation, used for telemetry * @returns {ApiMessage[]} The truncated conversation messages. */ -export function truncateConversation(messages: ApiMessage[], fracToRemove: number): ApiMessage[] { +export function truncateConversation(messages: ApiMessage[], fracToRemove: number, taskId: string): ApiMessage[] { + telemetryService.captureSlidingWindowTruncation(taskId) const truncatedMessages = [messages[0]] const rawMessagesToRemove = Math.floor((messages.length - 1) * fracToRemove) const messagesToRemove = rawMessagesToRemove - (rawMessagesToRemove % 2) @@ -66,6 +69,7 @@ type TruncateOptions = { autoCondenseContext: boolean autoCondenseContextPercent: number systemPrompt: string + taskId: string } type TruncateResponse = SummarizeResponse & { prevContextTokens: number } @@ -86,6 +90,7 @@ export async function truncateConversationIfNeeded({ autoCondenseContext, autoCondenseContextPercent, systemPrompt, + taskId, }: TruncateOptions): Promise { // Calculate the maximum tokens reserved for response const reservedTokens = maxTokens || contextWindow * 0.2 @@ -108,7 +113,7 @@ export async function truncateConversationIfNeeded({ const contextPercent = (100 * prevContextTokens) / contextWindow if (contextPercent >= autoCondenseContextPercent || prevContextTokens > allowedTokens) { // Attempt to intelligently condense the context - const result = await summarizeConversation(messages, apiHandler, systemPrompt) + const result = await summarizeConversation(messages, apiHandler, systemPrompt, taskId) if (result.summary) { return { ...result, prevContextTokens } } @@ -117,7 +122,7 @@ export async function truncateConversationIfNeeded({ // Fall back to sliding window truncation if needed if (prevContextTokens > allowedTokens) { - const truncatedMessages = truncateConversation(messages, 0.5) + const truncatedMessages = truncateConversation(messages, 0.5, taskId) return { messages: truncatedMessages, prevContextTokens, summary: "", cost: 0 } } // No truncation or condensation needed diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index d66fc6996a5..f315e6bd897 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -488,7 +488,7 @@ export class Task extends EventEmitter { summary, cost, newContextTokens = 0, - } = await summarizeConversation(this.apiConversationHistory, this.api, systemPrompt) + } = await summarizeConversation(this.apiConversationHistory, this.api, systemPrompt, this.taskId) if (!summary) { return } @@ -1518,6 +1518,7 @@ export class Task extends EventEmitter { autoCondenseContext, autoCondenseContextPercent, systemPrompt, + taskId: this.taskId, }) if (truncateResult.messages !== this.apiConversationHistory) { await this.overwriteApiConversationHistory(truncateResult.messages) diff --git a/src/services/telemetry/PostHogClient.ts b/src/services/telemetry/PostHogClient.ts index 784c9476e8b..22fce0beb3c 100644 --- a/src/services/telemetry/PostHogClient.ts +++ b/src/services/telemetry/PostHogClient.ts @@ -28,6 +28,8 @@ export class PostHogClient { CHECKPOINT_DIFFED: "Checkpoint Diffed", CODE_ACTION_USED: "Code Action Used", PROMPT_ENHANCED: "Prompt Enhanced", + CONTEXT_CONDENSED: "Context Condensed", + SLIDING_WINDOW_TRUNCATION: "Sliding Window Truncation", }, ERRORS: { SCHEMA_VALIDATION_ERROR: "Schema Validation Error", diff --git a/src/services/telemetry/TelemetryService.ts b/src/services/telemetry/TelemetryService.ts index 37423542b09..17aef19755a 100644 --- a/src/services/telemetry/TelemetryService.ts +++ b/src/services/telemetry/TelemetryService.ts @@ -120,6 +120,14 @@ class TelemetryService { this.captureEvent(PostHogClient.EVENTS.TASK.CHECKPOINT_RESTORED, { taskId }) } + public captureContextCondensed(taskId: string): void { + this.captureEvent(PostHogClient.EVENTS.TASK.CONTEXT_CONDENSED, { taskId }) + } + + public captureSlidingWindowTruncation(taskId: string): void { + this.captureEvent(PostHogClient.EVENTS.TASK.SLIDING_WINDOW_TRUNCATION, { taskId }) + } + public captureCodeActionUsed(actionType: string): void { this.captureEvent(PostHogClient.EVENTS.TASK.CODE_ACTION_USED, { actionType }) }