diff --git a/src/core/tools/ReadFileTool.ts b/src/core/tools/ReadFileTool.ts
index d21c8cd247a..17282df7744 100644
--- a/src/core/tools/ReadFileTool.ts
+++ b/src/core/tools/ReadFileTool.ts
@@ -25,7 +25,7 @@ import {
processImageFile,
ImageMemoryTracker,
} from "./helpers/imageHelpers"
-import { validateFileTokenBudget, truncateFileContent } from "./helpers/fileTokenBudget"
+import { FILE_READ_BUDGET_PERCENT, readFileWithTokenBudget } from "./helpers/fileTokenBudget"
import { truncateDefinitionsToLineLimit } from "./helpers/truncateDefinitions"
import { BaseTool, ToolCallbacks } from "./BaseTool"
import type { ToolUse } from "../../shared/tools"
@@ -386,7 +386,38 @@ export class ReadFileTool extends BaseTool<"read_file"> {
}
if (supportedBinaryFormats && supportedBinaryFormats.includes(fileExtension)) {
- // Fall through to extractTextFromFile
+ // Use extractTextFromFile for supported binary formats (PDF, DOCX, etc.)
+ try {
+ const content = await extractTextFromFile(fullPath)
+ const numberedContent = addLineNumbers(content)
+ const lines = content.split("\n")
+ const lineCount = lines.length
+ const lineRangeAttr = lineCount > 0 ? ` lines="1-${lineCount}"` : ""
+
+ await task.fileContextTracker.trackFileContext(relPath, "read_tool" as RecordSource)
+
+ updateFileResult(relPath, {
+ xmlContent:
+ lineCount > 0
+ ? `${relPath}\n\n${numberedContent}\n`
+ : `${relPath}\nFile is empty\n`,
+ nativeContent:
+ lineCount > 0
+ ? `File: ${relPath}\nLines 1-${lineCount}:\n${numberedContent}`
+ : `File: ${relPath}\nNote: File is empty`,
+ })
+ continue
+ } catch (error) {
+ const errorMsg = error instanceof Error ? error.message : String(error)
+ updateFileResult(relPath, {
+ status: "error",
+ error: `Error extracting text: ${errorMsg}`,
+ xmlContent: `${relPath}Error extracting text: ${errorMsg}`,
+ nativeContent: `File: ${relPath}\nError: Error extracting text: ${errorMsg}`,
+ })
+ await task.say("error", `Error extracting text from ${relPath}: ${errorMsg}`)
+ continue
+ }
} else {
const fileFormat = fileExtension.slice(1) || "bin"
updateFileResult(relPath, {
@@ -492,48 +523,54 @@ export class ReadFileTool extends BaseTool<"read_file"> {
settings: task.apiConfiguration,
}) ?? ANTHROPIC_DEFAULT_MAX_TOKENS
- const budgetResult = await validateFileTokenBudget(
- fullPath,
- contextWindow - maxOutputTokens,
- contextTokens || 0,
- )
+ // Calculate available token budget (60% of remaining context)
+ const remainingTokens = contextWindow - maxOutputTokens - (contextTokens || 0)
+ const safeReadBudget = Math.floor(remainingTokens * FILE_READ_BUDGET_PERCENT)
- let content = await extractTextFromFile(fullPath)
+ let content: string
let xmlInfo = ""
-
let nativeInfo = ""
- if (budgetResult.shouldTruncate && budgetResult.maxChars !== undefined) {
- const truncateResult = truncateFileContent(
- content,
- budgetResult.maxChars,
- content.length,
- budgetResult.isPreview,
- )
- content = truncateResult.content
-
- let displayedLines = content.length === 0 ? 0 : content.split(/\r?\n/).length
- if (displayedLines > 0 && content.endsWith("\n")) {
- displayedLines--
- }
- const lineRangeAttr = displayedLines > 0 ? ` lines="1-${displayedLines}"` : ""
- xmlInfo =
- content.length > 0 ? `\n${content}\n` : ``
- xmlInfo += `${truncateResult.notice}\n`
-
- nativeInfo =
- content.length > 0
- ? `Lines 1-${displayedLines}:\n${content}\n\nNote: ${truncateResult.notice}`
- : `Note: ${truncateResult.notice}`
+ if (safeReadBudget <= 0) {
+ // No budget available
+ content = ""
+ const notice = "No available context budget for file reading"
+ xmlInfo = `\n${notice}\n`
+ nativeInfo = `Note: ${notice}`
} else {
- const lineRangeAttr = ` lines="1-${totalLines}"`
- xmlInfo = totalLines > 0 ? `\n${content}\n` : ``
+ // Read file with incremental token counting
+ const result = await readFileWithTokenBudget(fullPath, {
+ budgetTokens: safeReadBudget,
+ })
- if (totalLines === 0) {
- xmlInfo += `File is empty\n`
- nativeInfo = "Note: File is empty"
+ content = addLineNumbers(result.content)
+
+ if (!result.complete) {
+ // File was truncated
+ const notice = `File truncated: showing ${result.lineCount} lines (${result.tokenCount} tokens) due to context budget. Use line_range to read specific sections.`
+ const lineRangeAttr = result.lineCount > 0 ? ` lines="1-${result.lineCount}"` : ""
+ xmlInfo =
+ result.lineCount > 0
+ ? `\n${content}\n${notice}\n`
+ : `\n${notice}\n`
+ nativeInfo =
+ result.lineCount > 0
+ ? `Lines 1-${result.lineCount}:\n${content}\n\nNote: ${notice}`
+ : `Note: ${notice}`
} else {
- nativeInfo = `Lines 1-${totalLines}:\n${content}`
+ // Full file read
+ const lineRangeAttr = ` lines="1-${result.lineCount}"`
+ xmlInfo =
+ result.lineCount > 0
+ ? `\n${content}\n`
+ : ``
+
+ if (result.lineCount === 0) {
+ xmlInfo += `File is empty\n`
+ nativeInfo = "Note: File is empty"
+ } else {
+ nativeInfo = `Lines 1-${result.lineCount}:\n${content}`
+ }
}
}
diff --git a/src/core/tools/__tests__/readFileTool.spec.ts b/src/core/tools/__tests__/readFileTool.spec.ts
index d109a6d430a..d22c163636f 100644
--- a/src/core/tools/__tests__/readFileTool.spec.ts
+++ b/src/core/tools/__tests__/readFileTool.spec.ts
@@ -36,20 +36,29 @@ vi.mock("fs/promises", () => fsPromises)
// Mock input content for tests
let mockInputContent = ""
+// Create hoisted mocks that can be used in vi.mock factories
+const { addLineNumbersMock, mockReadFileWithTokenBudget } = vi.hoisted(() => {
+ const addLineNumbersMock = vi.fn().mockImplementation((text: string, startLine = 1) => {
+ if (!text) return ""
+ const lines = typeof text === "string" ? text.split("\n") : [text]
+ return lines.map((line: string, i: number) => `${startLine + i} | ${line}`).join("\n")
+ })
+ const mockReadFileWithTokenBudget = vi.fn()
+ return { addLineNumbersMock, mockReadFileWithTokenBudget }
+})
+
// First create all the mocks
vi.mock("../../../integrations/misc/extract-text", () => ({
extractTextFromFile: vi.fn(),
- addLineNumbers: vi.fn(),
+ addLineNumbers: addLineNumbersMock,
getSupportedBinaryFormats: vi.fn(() => [".pdf", ".docx", ".ipynb"]),
}))
vi.mock("../../../services/tree-sitter")
-// Then create the mock functions
-const addLineNumbersMock = vi.fn().mockImplementation((text, startLine = 1) => {
- if (!text) return ""
- const lines = typeof text === "string" ? text.split("\n") : [text]
- return lines.map((line, i) => `${startLine + i} | ${line}`).join("\n")
-})
+// Mock readFileWithTokenBudget - must be mocked to prevent actual file system access
+vi.mock("../../../integrations/misc/read-file-with-budget", () => ({
+ readFileWithTokenBudget: (...args: any[]) => mockReadFileWithTokenBudget(...args),
+}))
const extractTextFromFileMock = vi.fn()
const getSupportedBinaryFormatsMock = vi.fn(() => [".pdf", ".docx", ".ipynb"])
@@ -145,6 +154,27 @@ beforeEach(() => {
})
: []
})
+
+ // Reset addLineNumbers mock to its default implementation (prevents cross-test pollution)
+ addLineNumbersMock.mockReset()
+ addLineNumbersMock.mockImplementation((text: string, startLine = 1) => {
+ if (!text) return ""
+ const lines = typeof text === "string" ? text.split("\n") : [text]
+ return lines.map((line: string, i: number) => `${startLine + i} | ${line}`).join("\n")
+ })
+
+ // Reset readFileWithTokenBudget mock with default implementation
+ mockReadFileWithTokenBudget.mockClear()
+ mockReadFileWithTokenBudget.mockImplementation(async (_filePath: string, _options: any) => {
+ // Default: return the mockInputContent with 5 lines
+ const lines = mockInputContent ? mockInputContent.split("\n") : []
+ return {
+ content: mockInputContent,
+ tokenCount: mockInputContent.length / 4, // rough estimate
+ lineCount: lines.length,
+ complete: true,
+ }
+ })
})
// Mock i18n translation function
@@ -496,7 +526,16 @@ describe("read_file tool with maxReadFileLine setting", () => {
it("should read with extractTextFromFile when file has few lines", async () => {
// Setup
mockedCountFileLines.mockResolvedValue(3) // File shorter than maxReadFileLine
- mockInputContent = fileContent
+ const threeLineContent = "Line 1\nLine 2\nLine 3"
+ mockInputContent = threeLineContent
+
+ // Configure the mock to return the correct content for this test
+ mockReadFileWithTokenBudget.mockResolvedValueOnce({
+ content: threeLineContent,
+ tokenCount: threeLineContent.length / 4,
+ lineCount: 3,
+ complete: true,
+ })
// Execute
const result = await executeReadFileTool({}, { maxReadFileLine: 5, totalLines: 3 })
@@ -656,11 +695,15 @@ describe("read_file tool XML output structure", () => {
it("should produce XML output with no unnecessary indentation", async () => {
// Setup
const numberedContent = "1 | Line 1\n2 | Line 2\n3 | Line 3\n4 | Line 4\n5 | Line 5"
- // For XML structure test
- mockedExtractTextFromFile.mockImplementation(() => {
- addLineNumbersMock(mockInputContent)
- return Promise.resolve(numberedContent)
+
+ // Configure mockReadFileWithTokenBudget to return the 5-line content
+ mockReadFileWithTokenBudget.mockResolvedValueOnce({
+ content: fileContent, // "Line 1\nLine 2\nLine 3\nLine 4\nLine 5"
+ tokenCount: fileContent.length / 4,
+ lineCount: 5,
+ complete: true,
})
+
mockProvider.getState.mockResolvedValue({
maxReadFileLine: -1,
maxImageFileSize: 20,
@@ -693,7 +736,15 @@ describe("read_file tool XML output structure", () => {
it("should handle empty files correctly", async () => {
// Setup
mockedCountFileLines.mockResolvedValue(0)
- mockedExtractTextFromFile.mockResolvedValue("")
+
+ // Configure mockReadFileWithTokenBudget to return empty content
+ mockReadFileWithTokenBudget.mockResolvedValueOnce({
+ content: "",
+ tokenCount: 0,
+ lineCount: 0,
+ complete: true,
+ })
+
mockProvider.getState.mockResolvedValue({
maxReadFileLine: -1,
maxImageFileSize: 20,
diff --git a/src/core/tools/helpers/__tests__/fileTokenBudget.spec.ts b/src/core/tools/helpers/__tests__/fileTokenBudget.spec.ts
deleted file mode 100644
index 4eea6435a89..00000000000
--- a/src/core/tools/helpers/__tests__/fileTokenBudget.spec.ts
+++ /dev/null
@@ -1,357 +0,0 @@
-import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"
-import {
- validateFileTokenBudget,
- truncateFileContent,
- FILE_SIZE_THRESHOLD,
- MAX_FILE_SIZE_FOR_TOKENIZATION,
- PREVIEW_SIZE_FOR_LARGE_FILES,
-} from "../fileTokenBudget"
-
-// Mock dependencies
-vi.mock("fs/promises", () => ({
- stat: vi.fn(),
- readFile: vi.fn(),
- open: vi.fn(),
-}))
-
-vi.mock("../../../../utils/countTokens", () => ({
- countTokens: vi.fn(),
-}))
-
-// Import after mocking
-const fs = await import("fs/promises")
-const { countTokens } = await import("../../../../utils/countTokens")
-
-const mockStat = vi.mocked(fs.stat)
-const mockReadFile = vi.mocked(fs.readFile)
-const mockOpen = vi.mocked(fs.open)
-const mockCountTokens = vi.mocked(countTokens)
-
-describe("fileTokenBudget", () => {
- beforeEach(() => {
- vi.clearAllMocks()
- mockOpen.mockReset()
- })
-
- afterEach(() => {
- vi.restoreAllMocks()
- })
-
- describe("validateFileTokenBudget", () => {
- it("should not truncate files smaller than FILE_SIZE_THRESHOLD", async () => {
- const filePath = "/test/small-file.txt"
- const contextWindow = 200000
- const currentTokens = 10000
-
- // Mock file stats - small file (50KB)
- mockStat.mockResolvedValue({
- size: 50000,
- } as any)
-
- const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens)
-
- expect(result.shouldTruncate).toBe(false)
- expect(mockReadFile).not.toHaveBeenCalled()
- expect(mockCountTokens).not.toHaveBeenCalled()
- })
-
- it("should validate and not truncate large files that fit within budget", async () => {
- const filePath = "/test/large-file.txt"
- const contextWindow = 200000
- const currentTokens = 10000
- const fileContent = "x".repeat(150000) // 150KB file
-
- // Mock file stats - large file (150KB)
- mockStat.mockResolvedValue({
- size: 150000,
- } as any)
-
- // Mock file read
- mockReadFile.mockResolvedValue(fileContent)
-
- // Mock token counting - file uses 30k tokens (within 60% of 190k remaining = 114k budget)
- mockCountTokens.mockResolvedValue(30000)
-
- const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens)
-
- expect(result.shouldTruncate).toBe(false)
- expect(mockReadFile).toHaveBeenCalledWith(filePath, "utf-8")
- expect(mockCountTokens).toHaveBeenCalled()
- })
-
- it("should truncate large files that exceed token budget", async () => {
- const filePath = "/test/huge-file.txt"
- const contextWindow = 200000
- const currentTokens = 10000
- const fileContent = "x".repeat(500000) // 500KB file
-
- // Mock file stats - huge file (500KB)
- mockStat.mockResolvedValue({
- size: 500000,
- } as any)
-
- // Mock file read
- mockReadFile.mockResolvedValue(fileContent)
-
- // Mock token counting - file uses 150k tokens (exceeds 60% of 190k remaining = 114k budget)
- mockCountTokens.mockResolvedValue(150000)
-
- const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens)
-
- expect(result.shouldTruncate).toBe(true)
- expect(result.maxChars).toBeDefined()
- expect(result.maxChars).toBeGreaterThan(0)
- expect(result.reason).toContain("150000 tokens")
- expect(result.reason).toContain("114000 tokens available")
- })
-
- it("should handle case where no budget is available", async () => {
- const filePath = "/test/file.txt"
- const contextWindow = 200000
- const currentTokens = 200000 // Context is full
-
- // Mock file stats - large file
- mockStat.mockResolvedValue({
- size: 150000,
- } as any)
-
- const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens)
-
- expect(result.shouldTruncate).toBe(true)
- expect(result.maxChars).toBe(0)
- expect(result.reason).toContain("No available context budget")
- })
-
- it("should handle errors gracefully and not truncate", async () => {
- const filePath = "/test/error-file.txt"
- const contextWindow = 200000
- const currentTokens = 10000
-
- // Mock file stats to throw an error
- mockStat.mockRejectedValue(new Error("File not found"))
-
- const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens)
-
- expect(result.shouldTruncate).toBe(false)
- })
-
- it("should calculate correct token budget with 60/40 split", async () => {
- const filePath = "/test/file.txt"
- const contextWindow = 100000
- const currentTokens = 20000 // 80k remaining
- const fileContent = "test content"
-
- mockStat.mockResolvedValue({ size: 150000 } as any)
- mockReadFile.mockResolvedValue(fileContent)
-
- // Available budget should be: (100000 - 20000) * 0.6 = 48000
- // File uses 50k tokens, should be truncated
- mockCountTokens.mockResolvedValue(50000)
-
- const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens)
-
- expect(result.shouldTruncate).toBe(true)
- // maxChars should be approximately 48000 * 3 = 144000
- expect(result.maxChars).toBe(144000)
- })
-
- it("should validate files at the FILE_SIZE_THRESHOLD boundary", async () => {
- const filePath = "/test/boundary-file.txt"
- const contextWindow = 200000
- const currentTokens = 10000
- const fileContent = "x".repeat(1000)
-
- // Mock file stats - exactly at threshold (should trigger validation)
- mockStat.mockResolvedValue({
- size: FILE_SIZE_THRESHOLD,
- } as any)
-
- mockReadFile.mockResolvedValue(fileContent)
- mockCountTokens.mockResolvedValue(30000) // Within budget
-
- const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens)
-
- // At exactly the threshold, it should validate
- expect(mockReadFile).toHaveBeenCalled()
- expect(mockCountTokens).toHaveBeenCalled()
- expect(result.shouldTruncate).toBe(false)
- })
-
- it("should provide preview for files exceeding MAX_FILE_SIZE_FOR_TOKENIZATION", async () => {
- const filePath = "/test/huge-file.txt"
- const contextWindow = 200000
- const currentTokens = 10000
- const previewContent = "x".repeat(PREVIEW_SIZE_FOR_LARGE_FILES)
-
- // Mock file stats - file exceeds max tokenization size (e.g., 10MB when max is 5MB)
- mockStat.mockResolvedValue({
- size: MAX_FILE_SIZE_FOR_TOKENIZATION + 1000000, // 1MB over the limit
- } as any)
-
- // Mock file.open and read for preview
- const mockRead = vi.fn().mockResolvedValue({
- bytesRead: PREVIEW_SIZE_FOR_LARGE_FILES,
- })
- const mockClose = vi.fn().mockResolvedValue(undefined)
- mockOpen.mockResolvedValue({
- read: mockRead,
- close: mockClose,
- } as any)
-
- // Mock token counting for the preview
- mockCountTokens.mockResolvedValue(30000) // Preview fits within budget
-
- const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens)
-
- expect(result.shouldTruncate).toBe(true)
- expect(result.isPreview).toBe(true)
- expect(result.reason).toContain("too large")
- expect(result.reason).toContain("preview")
- // Should read preview and count tokens
- expect(mockOpen).toHaveBeenCalled()
- expect(mockCountTokens).toHaveBeenCalled()
- })
-
- it("should handle files exactly at MAX_FILE_SIZE_FOR_TOKENIZATION boundary", async () => {
- const filePath = "/test/boundary-file.txt"
- const contextWindow = 200000
- const currentTokens = 10000
- const fileContent = "x".repeat(1000)
-
- // Mock file stats - exactly at max size
- mockStat.mockResolvedValue({
- size: MAX_FILE_SIZE_FOR_TOKENIZATION,
- } as any)
-
- mockReadFile.mockResolvedValue(fileContent)
- mockCountTokens.mockResolvedValue(30000) // Within budget
-
- const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens)
-
- // At exactly the limit, should still attempt to tokenize
- expect(mockReadFile).toHaveBeenCalled()
- expect(mockCountTokens).toHaveBeenCalled()
- })
-
- it("should handle tokenizer unreachable errors gracefully", async () => {
- const filePath = "/test/problematic-file.txt"
- const contextWindow = 200000
- const currentTokens = 10000
- const fileContent = "x".repeat(200000) // Content that might cause issues
-
- // Mock file stats - within size limits but content causes tokenizer crash
- mockStat.mockResolvedValue({
- size: 200000,
- } as any)
-
- mockReadFile.mockResolvedValue(fileContent)
- // Simulate tokenizer "unreachable" error
- mockCountTokens.mockRejectedValue(new Error("unreachable"))
-
- const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens)
-
- // Should fallback with conservative estimation
- const remainingTokens = contextWindow - currentTokens
- const safeReadBudget = Math.floor(remainingTokens * 0.6) // 114000
-
- expect(result.shouldTruncate).toBe(true)
- expect(result.isPreview).toBe(true)
- expect(result.reason).toContain("tokenizer error")
-
- // The actual maxChars depends on conservative estimation
- // content.length (200000) is used as estimate since tokenizer failed
- expect(result.maxChars).toBeDefined()
- expect(typeof result.maxChars).toBe("number")
- })
-
- it("should handle other tokenizer errors conservatively", async () => {
- const filePath = "/test/error-file.txt"
- const contextWindow = 200000
- const currentTokens = 10000
- const fileContent = "test content"
-
- mockStat.mockResolvedValue({ size: 150000 } as any)
- mockReadFile.mockResolvedValue(fileContent)
- // Simulate a different error
- mockCountTokens.mockRejectedValue(new Error("Network error"))
-
- const result = await validateFileTokenBudget(filePath, contextWindow, currentTokens)
-
- // Should return safe fallback (don't truncate, let normal error handling take over)
- expect(result.shouldTruncate).toBe(false)
- })
- })
-
- describe("truncateFileContent", () => {
- it("should truncate content to specified character limit", () => {
- const content = "a".repeat(1000)
- const maxChars = 500
- const totalChars = 1000
-
- const result = truncateFileContent(content, maxChars, totalChars, false)
-
- expect(result.content).toHaveLength(500)
- expect(result.content).toBe("a".repeat(500))
- expect(result.notice).toContain("500 of 1000 characters")
- expect(result.notice).toContain("context limitations")
- })
-
- it("should show preview message for large files", () => {
- const content = "x".repeat(10000000) // ~10MB (9.54MB in binary)
- const maxChars = 100000 // 100KB preview
- const totalChars = 10000000
-
- const result = truncateFileContent(content, maxChars, totalChars, true)
-
- expect(result.content).toHaveLength(maxChars)
- expect(result.notice).toContain("Preview")
- expect(result.notice).toContain("0.1MB") // 100KB = 0.1MB
- expect(result.notice).toContain("9.54MB") // Binary MB calculation
- expect(result.notice).toContain("line_range")
- })
-
- it("should include helpful notice about using line_range", () => {
- const content = "test content that is very long"
- const maxChars = 10
- const totalChars = 31
-
- const result = truncateFileContent(content, maxChars, totalChars)
-
- expect(result.notice).toContain("line_range")
- expect(result.notice).toContain("specific sections")
- })
-
- it("should handle empty content", () => {
- const content = ""
- const maxChars = 100
- const totalChars = 0
-
- const result = truncateFileContent(content, maxChars, totalChars)
-
- expect(result.content).toBe("")
- expect(result.notice).toContain("0 of 0 characters")
- })
-
- it("should truncate multi-line content correctly", () => {
- const content = "line1\nline2\nline3\nline4\nline5"
- const maxChars = 15
- const totalChars = content.length
-
- const result = truncateFileContent(content, maxChars, totalChars)
-
- expect(result.content).toBe("line1\nline2\nlin")
- expect(result.content).toHaveLength(15)
- })
-
- it("should work with unicode characters", () => {
- const content = "Hello π World π Test π"
- const maxChars = 10
- const totalChars = content.length
-
- const result = truncateFileContent(content, maxChars, totalChars)
-
- expect(result.content).toHaveLength(10)
- expect(result.notice).toBeDefined()
- })
- })
-})
diff --git a/src/core/tools/helpers/fileTokenBudget.ts b/src/core/tools/helpers/fileTokenBudget.ts
index ad82f8fb410..4023802680f 100644
--- a/src/core/tools/helpers/fileTokenBudget.ts
+++ b/src/core/tools/helpers/fileTokenBudget.ts
@@ -1,228 +1,9 @@
-import * as fs from "fs/promises"
-import { countTokens } from "../../../utils/countTokens"
-import { Anthropic } from "@anthropic-ai/sdk"
-import { countFileLinesAndTokens } from "../../../integrations/misc/line-counter"
-
-/**
- * File size threshold (in bytes) above which token validation is triggered.
- * Files smaller than this are read without token counting overhead.
- */
-export const FILE_SIZE_THRESHOLD = 100_000 // 100KB
-
-/**
- * Absolute maximum file size (in bytes) that will be read for token validation.
- * Files larger than this cannot be tokenized due to tokenizer limitations.
- * This prevents WASM "unreachable" errors in tiktoken.
- */
-export const MAX_FILE_SIZE_FOR_TOKENIZATION = 5_000_000 // 5MB
-
-/**
- * Size of preview to read from files that exceed MAX_FILE_SIZE_FOR_TOKENIZATION.
- * This allows the agent to see the beginning of large files without crashing.
- */
-export const PREVIEW_SIZE_FOR_LARGE_FILES = 100_000 // 100KB
+// Re-export the new incremental token-based file reader
+export { readFileWithTokenBudget } from "../../../integrations/misc/read-file-with-budget"
+export type { ReadWithBudgetResult, ReadWithBudgetOptions } from "../../../integrations/misc/read-file-with-budget"
/**
* Percentage of available context to reserve for file reading.
* The remaining percentage is reserved for the model's response and overhead.
*/
export const FILE_READ_BUDGET_PERCENT = 0.6 // 60% for file, 40% for response
-
-/**
- * Result of token budget validation for a file.
- */
-export interface TokenBudgetResult {
- /** Whether the file content should be truncated */
- shouldTruncate: boolean
- /** The maximum number of characters allowed (only relevant if shouldTruncate is true) */
- maxChars?: number
- /** Human-readable reason for truncation */
- reason?: string
- /** Whether this is a preview of a larger file (only showing beginning) */
- isPreview?: boolean
-}
-
-/**
- * Validates whether a file's content fits within the available token budget.
- *
- * Strategy:
- * 1. Files < 100KB: Skip validation (fast path)
- * 2. Files >= 100KB: Count tokens and check against budget
- * 3. Budget = (contextWindow - currentTokens) * 0.6
- *
- * @param filePath - Path to the file to validate
- * @param contextWindow - Total context window size in tokens
- * @param currentTokens - Current token usage
- * @returns TokenBudgetResult indicating whether to truncate and at what character limit
- */
-export async function validateFileTokenBudget(
- filePath: string,
- contextWindow: number,
- currentTokens: number,
-): Promise {
- try {
- // Check file size first (fast path)
- const stats = await fs.stat(filePath)
- const fileSizeBytes = stats.size
-
- // Fast path: small files always pass
- if (fileSizeBytes < FILE_SIZE_THRESHOLD) {
- return { shouldTruncate: false }
- }
-
- // Calculate available token budget
- const remainingTokens = contextWindow - currentTokens
- const safeReadBudget = Math.floor(remainingTokens * FILE_READ_BUDGET_PERCENT)
-
- // If we don't have enough budget, truncate immediately without reading
- if (safeReadBudget <= 0) {
- return {
- shouldTruncate: true,
- maxChars: 0,
- reason: "No available context budget for file reading",
- }
- }
-
- // For files too large to tokenize entirely, read a preview instead
- // The tokenizer (tiktoken WASM) crashes with "unreachable" errors on very large files
- const isPreviewMode = fileSizeBytes > MAX_FILE_SIZE_FOR_TOKENIZATION
-
- // Use streaming token counter for normal-sized files to avoid double read
- // For previews, still use direct read since we're only reading a portion
- let tokenCount = 0
- let streamingSucceeded = false
-
- if (!isPreviewMode) {
- // Try streaming token estimation first (single pass, early exit capability)
- try {
- const result = await countFileLinesAndTokens(filePath, {
- budgetTokens: safeReadBudget,
- chunkLines: 256,
- })
- tokenCount = result.tokenEstimate
- streamingSucceeded = true
-
- // If streaming indicated we exceeded budget during scan
- if (!result.complete) {
- // Early exit - we know file exceeds budget without reading it all
- const maxChars = Math.floor(safeReadBudget * 3)
- return {
- shouldTruncate: true,
- maxChars,
- reason: `File requires ${tokenCount}+ tokens but only ${safeReadBudget} tokens available in context budget`,
- }
- }
- } catch (error) {
- // Streaming failed - will fallback to full read below
- streamingSucceeded = false
- }
- }
-
- // Fallback to full read + token count (for preview mode or if streaming failed)
- if (!streamingSucceeded) {
- let content: string
-
- if (isPreviewMode) {
- // Read only the preview portion to avoid tokenizer crashes
- const fileHandle = await fs.open(filePath, "r")
- try {
- const buffer = Buffer.alloc(PREVIEW_SIZE_FOR_LARGE_FILES)
- const { bytesRead } = await fileHandle.read(buffer, 0, PREVIEW_SIZE_FOR_LARGE_FILES, 0)
- content = buffer.slice(0, bytesRead).toString("utf-8")
- } finally {
- await fileHandle.close()
- }
- } else {
- // Read the entire file for normal-sized files
- content = await fs.readFile(filePath, "utf-8")
- }
-
- // Count tokens with error handling
- try {
- const contentBlocks: Anthropic.Messages.ContentBlockParam[] = [{ type: "text", text: content }]
- tokenCount = await countTokens(contentBlocks)
- } catch (error) {
- // Catch tokenizer "unreachable" errors
- const errorMessage = error instanceof Error ? error.message : String(error)
- if (errorMessage.includes("unreachable")) {
- // Use conservative estimation: 2 chars = 1 token
- const estimatedTokens = Math.ceil(content.length / 2)
- if (estimatedTokens > safeReadBudget) {
- return {
- shouldTruncate: true,
- maxChars: safeReadBudget,
- isPreview: true,
- reason: `File content caused tokenizer error. Showing truncated preview to fit context budget. Use line_range to read specific sections.`,
- }
- }
- return {
- shouldTruncate: true,
- maxChars: content.length,
- isPreview: true,
- reason: `File content caused tokenizer error but fits in context. Use line_range for specific sections.`,
- }
- }
- throw error
- }
- }
-
- // Check if content exceeds budget
- if (tokenCount > safeReadBudget) {
- const maxChars = Math.floor(safeReadBudget * 3)
- return {
- shouldTruncate: true,
- maxChars,
- isPreview: isPreviewMode,
- reason: isPreviewMode
- ? `Preview of large file (${(fileSizeBytes / 1024 / 1024).toFixed(2)}MB) truncated to fit context budget. Use line_range to read specific sections.`
- : `File requires ${tokenCount} tokens but only ${safeReadBudget} tokens available in context budget`,
- }
- }
-
- // Content fits within budget
- if (isPreviewMode) {
- return {
- shouldTruncate: true,
- maxChars: PREVIEW_SIZE_FOR_LARGE_FILES,
- isPreview: true,
- reason: `File is too large (${(fileSizeBytes / 1024 / 1024).toFixed(2)}MB) to read entirely. Showing preview of first ${(PREVIEW_SIZE_FOR_LARGE_FILES / 1024 / 1024).toFixed(1)}MB. Use line_range to read specific sections.`,
- }
- }
-
- // File fits within budget
- return { shouldTruncate: false }
- } catch (error) {
- // On error, be conservative and don't truncate
- // This allows the existing error handling to take over
- console.warn(`[fileTokenBudget] Error validating file ${filePath}:`, error)
- return { shouldTruncate: false }
- }
-}
-
-/**
- * Truncates file content to fit within the specified character limit.
- * Adds a notice message at the end to inform the user about truncation.
- *
- * @param content - The full file content
- * @param maxChars - Maximum number of characters to keep
- * @param totalChars - Total number of characters in the original file
- * @param isPreview - Whether this is a preview of a larger file (not token-budget limited)
- * @returns Object containing truncated content and a notice message
- */
-export function truncateFileContent(
- content: string,
- maxChars: number,
- totalChars: number,
- isPreview: boolean = false,
-): { content: string; notice: string } {
- const truncatedContent = content.slice(0, maxChars)
-
- const notice = isPreview
- ? `Preview: Showing first ${(maxChars / 1024 / 1024).toFixed(1)}MB of ${(totalChars / 1024 / 1024).toFixed(2)}MB file. Use line_range to read specific sections.`
- : `File truncated to ${maxChars} of ${totalChars} characters due to context limitations. Use line_range to read specific sections if needed.`
-
- return {
- content: truncatedContent,
- notice,
- }
-}
diff --git a/src/integrations/misc/__tests__/read-file-with-budget.spec.ts b/src/integrations/misc/__tests__/read-file-with-budget.spec.ts
new file mode 100644
index 00000000000..7a4e99ce694
--- /dev/null
+++ b/src/integrations/misc/__tests__/read-file-with-budget.spec.ts
@@ -0,0 +1,321 @@
+import fs from "fs/promises"
+import path from "path"
+import os from "os"
+import { readFileWithTokenBudget } from "../read-file-with-budget"
+
+describe("readFileWithTokenBudget", () => {
+ let tempDir: string
+
+ beforeEach(async () => {
+ // Create a temporary directory for test files
+ tempDir = path.join(os.tmpdir(), `read-file-budget-test-${Date.now()}`)
+ await fs.mkdir(tempDir, { recursive: true })
+ })
+
+ afterEach(async () => {
+ // Clean up temporary directory
+ await fs.rm(tempDir, { recursive: true, force: true })
+ })
+
+ describe("Basic functionality", () => {
+ test("reads entire small file when within budget", async () => {
+ const filePath = path.join(tempDir, "small.txt")
+ const content = "Line 1\nLine 2\nLine 3"
+ await fs.writeFile(filePath, content)
+
+ const result = await readFileWithTokenBudget(filePath, {
+ budgetTokens: 1000, // Large budget
+ })
+
+ expect(result.content).toBe(content)
+ expect(result.lineCount).toBe(3)
+ expect(result.complete).toBe(true)
+ expect(result.tokenCount).toBeGreaterThan(0)
+ expect(result.tokenCount).toBeLessThan(1000)
+ })
+
+ test("returns correct token count", async () => {
+ const filePath = path.join(tempDir, "token-test.txt")
+ const content = "This is a test file with some content."
+ await fs.writeFile(filePath, content)
+
+ const result = await readFileWithTokenBudget(filePath, {
+ budgetTokens: 1000,
+ })
+
+ // Token count should be reasonable (rough estimate: 1 token per 3-4 chars)
+ expect(result.tokenCount).toBeGreaterThan(5)
+ expect(result.tokenCount).toBeLessThan(20)
+ })
+
+ test("returns complete: true for files within budget", async () => {
+ const filePath = path.join(tempDir, "within-budget.txt")
+ const lines = Array.from({ length: 10 }, (_, i) => `Line ${i + 1}`)
+ await fs.writeFile(filePath, lines.join("\n"))
+
+ const result = await readFileWithTokenBudget(filePath, {
+ budgetTokens: 1000,
+ })
+
+ expect(result.complete).toBe(true)
+ expect(result.lineCount).toBe(10)
+ })
+ })
+
+ describe("Truncation behavior", () => {
+ test("stops reading when token budget reached", async () => {
+ const filePath = path.join(tempDir, "large.txt")
+ // Create a file with many lines
+ const lines = Array.from({ length: 1000 }, (_, i) => `This is line number ${i + 1} with some content`)
+ await fs.writeFile(filePath, lines.join("\n"))
+
+ const result = await readFileWithTokenBudget(filePath, {
+ budgetTokens: 50, // Small budget
+ })
+
+ expect(result.complete).toBe(false)
+ expect(result.lineCount).toBeLessThan(1000)
+ expect(result.lineCount).toBeGreaterThan(0)
+ expect(result.tokenCount).toBeLessThanOrEqual(50)
+ })
+
+ test("returns complete: false when truncated", async () => {
+ const filePath = path.join(tempDir, "truncated.txt")
+ const lines = Array.from({ length: 500 }, (_, i) => `Line ${i + 1}`)
+ await fs.writeFile(filePath, lines.join("\n"))
+
+ const result = await readFileWithTokenBudget(filePath, {
+ budgetTokens: 20,
+ })
+
+ expect(result.complete).toBe(false)
+ expect(result.tokenCount).toBeLessThanOrEqual(20)
+ })
+
+ test("content ends at line boundary (no partial lines)", async () => {
+ const filePath = path.join(tempDir, "line-boundary.txt")
+ const lines = Array.from({ length: 100 }, (_, i) => `Line ${i + 1}`)
+ await fs.writeFile(filePath, lines.join("\n"))
+
+ const result = await readFileWithTokenBudget(filePath, {
+ budgetTokens: 30,
+ })
+
+ // Content should not end mid-line
+ const contentLines = result.content.split("\n")
+ expect(contentLines.length).toBe(result.lineCount)
+ // Last line should be complete (not cut off)
+ expect(contentLines[contentLines.length - 1]).toMatch(/^Line \d+$/)
+ })
+
+ test("works with different chunk sizes", async () => {
+ const filePath = path.join(tempDir, "chunks.txt")
+ const lines = Array.from({ length: 1000 }, (_, i) => `Line ${i + 1}`)
+ await fs.writeFile(filePath, lines.join("\n"))
+
+ // Test with small chunk size
+ const result1 = await readFileWithTokenBudget(filePath, {
+ budgetTokens: 50,
+ chunkLines: 10,
+ })
+
+ // Test with large chunk size
+ const result2 = await readFileWithTokenBudget(filePath, {
+ budgetTokens: 50,
+ chunkLines: 500,
+ })
+
+ // Both should truncate, but may differ slightly in exact line count
+ expect(result1.complete).toBe(false)
+ expect(result2.complete).toBe(false)
+ expect(result1.tokenCount).toBeLessThanOrEqual(50)
+ expect(result2.tokenCount).toBeLessThanOrEqual(50)
+ })
+ })
+
+ describe("Edge cases", () => {
+ test("handles empty file", async () => {
+ const filePath = path.join(tempDir, "empty.txt")
+ await fs.writeFile(filePath, "")
+
+ const result = await readFileWithTokenBudget(filePath, {
+ budgetTokens: 100,
+ })
+
+ expect(result.content).toBe("")
+ expect(result.lineCount).toBe(0)
+ expect(result.tokenCount).toBe(0)
+ expect(result.complete).toBe(true)
+ })
+
+ test("handles single line file", async () => {
+ const filePath = path.join(tempDir, "single-line.txt")
+ await fs.writeFile(filePath, "Single line content")
+
+ const result = await readFileWithTokenBudget(filePath, {
+ budgetTokens: 100,
+ })
+
+ expect(result.content).toBe("Single line content")
+ expect(result.lineCount).toBe(1)
+ expect(result.complete).toBe(true)
+ })
+
+ test("handles budget of 0 tokens", async () => {
+ const filePath = path.join(tempDir, "zero-budget.txt")
+ await fs.writeFile(filePath, "Line 1\nLine 2\nLine 3")
+
+ const result = await readFileWithTokenBudget(filePath, {
+ budgetTokens: 0,
+ })
+
+ expect(result.content).toBe("")
+ expect(result.lineCount).toBe(0)
+ expect(result.tokenCount).toBe(0)
+ expect(result.complete).toBe(false)
+ })
+
+ test("handles very small budget (fewer tokens than first line)", async () => {
+ const filePath = path.join(tempDir, "tiny-budget.txt")
+ const longLine = "This is a very long line with lots of content that will exceed a tiny token budget"
+ await fs.writeFile(filePath, `${longLine}\nLine 2\nLine 3`)
+
+ const result = await readFileWithTokenBudget(filePath, {
+ budgetTokens: 2, // Very small budget
+ })
+
+ // Should return empty since first line exceeds budget
+ expect(result.content).toBe("")
+ expect(result.lineCount).toBe(0)
+ expect(result.complete).toBe(false)
+ })
+
+ test("throws error for non-existent file", async () => {
+ const filePath = path.join(tempDir, "does-not-exist.txt")
+
+ await expect(
+ readFileWithTokenBudget(filePath, {
+ budgetTokens: 100,
+ }),
+ ).rejects.toThrow("File not found")
+ })
+
+ test("handles file with no trailing newline", async () => {
+ const filePath = path.join(tempDir, "no-trailing-newline.txt")
+ await fs.writeFile(filePath, "Line 1\nLine 2\nLine 3")
+
+ const result = await readFileWithTokenBudget(filePath, {
+ budgetTokens: 1000,
+ })
+
+ expect(result.content).toBe("Line 1\nLine 2\nLine 3")
+ expect(result.lineCount).toBe(3)
+ expect(result.complete).toBe(true)
+ })
+
+ test("handles file with trailing newline", async () => {
+ const filePath = path.join(tempDir, "trailing-newline.txt")
+ await fs.writeFile(filePath, "Line 1\nLine 2\nLine 3\n")
+
+ const result = await readFileWithTokenBudget(filePath, {
+ budgetTokens: 1000,
+ })
+
+ expect(result.content).toBe("Line 1\nLine 2\nLine 3")
+ expect(result.lineCount).toBe(3)
+ expect(result.complete).toBe(true)
+ })
+ })
+
+ describe("Token counting accuracy", () => {
+ test("returned tokenCount matches actual tokens in content", async () => {
+ const filePath = path.join(tempDir, "accuracy.txt")
+ const content = "Hello world\nThis is a test\nWith some content"
+ await fs.writeFile(filePath, content)
+
+ const result = await readFileWithTokenBudget(filePath, {
+ budgetTokens: 1000,
+ })
+
+ // Verify the token count is reasonable
+ // Rough estimate: 1 token per 3-4 characters
+ const minExpected = Math.floor(content.length / 5)
+ const maxExpected = Math.ceil(content.length / 2)
+
+ expect(result.tokenCount).toBeGreaterThanOrEqual(minExpected)
+ expect(result.tokenCount).toBeLessThanOrEqual(maxExpected)
+ })
+
+ test("handles special characters correctly", async () => {
+ const filePath = path.join(tempDir, "special-chars.txt")
+ const content = "Special chars: @#$%^&*()\nUnicode: δ½ ε₯½δΈη\nEmoji: ππ"
+ await fs.writeFile(filePath, content)
+
+ const result = await readFileWithTokenBudget(filePath, {
+ budgetTokens: 1000,
+ })
+
+ expect(result.content).toBe(content)
+ expect(result.tokenCount).toBeGreaterThan(0)
+ expect(result.complete).toBe(true)
+ })
+
+ test("handles code content", async () => {
+ const filePath = path.join(tempDir, "code.ts")
+ const code = `function hello(name: string): string {\n return \`Hello, \${name}!\`\n}`
+ await fs.writeFile(filePath, code)
+
+ const result = await readFileWithTokenBudget(filePath, {
+ budgetTokens: 1000,
+ })
+
+ expect(result.content).toBe(code)
+ expect(result.tokenCount).toBeGreaterThan(0)
+ expect(result.complete).toBe(true)
+ })
+ })
+
+ describe("Performance", () => {
+ test("handles large files efficiently", async () => {
+ const filePath = path.join(tempDir, "large-file.txt")
+ // Create a 1MB file
+ const lines = Array.from({ length: 10000 }, (_, i) => `Line ${i + 1} with some additional content`)
+ await fs.writeFile(filePath, lines.join("\n"))
+
+ const startTime = Date.now()
+
+ const result = await readFileWithTokenBudget(filePath, {
+ budgetTokens: 100,
+ })
+
+ const endTime = Date.now()
+ const duration = endTime - startTime
+
+ // Should complete in reasonable time (less than 5 seconds)
+ expect(duration).toBeLessThan(5000)
+ expect(result.complete).toBe(false)
+ expect(result.tokenCount).toBeLessThanOrEqual(100)
+ })
+
+ test("early exits when budget is reached", async () => {
+ const filePath = path.join(tempDir, "early-exit.txt")
+ // Create a very large file
+ const lines = Array.from({ length: 50000 }, (_, i) => `Line ${i + 1}`)
+ await fs.writeFile(filePath, lines.join("\n"))
+
+ const startTime = Date.now()
+
+ const result = await readFileWithTokenBudget(filePath, {
+ budgetTokens: 50, // Small budget should trigger early exit
+ })
+
+ const endTime = Date.now()
+ const duration = endTime - startTime
+
+ // Should be much faster than reading entire file (less than 2 seconds)
+ expect(duration).toBeLessThan(2000)
+ expect(result.complete).toBe(false)
+ expect(result.lineCount).toBeLessThan(50000)
+ })
+ })
+})
diff --git a/src/integrations/misc/read-file-with-budget.ts b/src/integrations/misc/read-file-with-budget.ts
new file mode 100644
index 00000000000..15aa4f1144f
--- /dev/null
+++ b/src/integrations/misc/read-file-with-budget.ts
@@ -0,0 +1,182 @@
+import { createReadStream } from "fs"
+import fs from "fs/promises"
+import { createInterface } from "readline"
+import { countTokens } from "../../utils/countTokens"
+import { Anthropic } from "@anthropic-ai/sdk"
+
+export interface ReadWithBudgetResult {
+ /** The content read up to the token budget */
+ content: string
+ /** Actual token count of returned content */
+ tokenCount: number
+ /** Total lines in the returned content */
+ lineCount: number
+ /** Whether the entire file was read (false if truncated) */
+ complete: boolean
+}
+
+export interface ReadWithBudgetOptions {
+ /** Maximum tokens allowed. Required. */
+ budgetTokens: number
+ /** Number of lines to buffer before token counting (default: 256) */
+ chunkLines?: number
+}
+
+/**
+ * Reads a file while incrementally counting tokens, stopping when budget is reached.
+ *
+ * Unlike validateFileTokenBudget + extractTextFromFile, this is a single-pass
+ * operation that returns the actual content up to the token limit.
+ *
+ * @param filePath - Path to the file to read
+ * @param options - Budget and chunking options
+ * @returns Content read, token count, and completion status
+ */
+export async function readFileWithTokenBudget(
+ filePath: string,
+ options: ReadWithBudgetOptions,
+): Promise {
+ const { budgetTokens, chunkLines = 256 } = options
+
+ // Verify file exists
+ try {
+ await fs.access(filePath)
+ } catch {
+ throw new Error(`File not found: ${filePath}`)
+ }
+
+ return new Promise((resolve, reject) => {
+ let content = ""
+ let lineCount = 0
+ let tokenCount = 0
+ let lineBuffer: string[] = []
+ let complete = true
+ let isProcessing = false
+ let shouldClose = false
+
+ const readStream = createReadStream(filePath)
+ const rl = createInterface({
+ input: readStream,
+ crlfDelay: Infinity,
+ })
+
+ const processBuffer = async (): Promise => {
+ if (lineBuffer.length === 0) return true
+
+ const bufferText = lineBuffer.join("\n")
+ const currentBuffer = [...lineBuffer]
+ lineBuffer = []
+
+ // Count tokens for this chunk
+ let chunkTokens: number
+ try {
+ const contentBlocks: Anthropic.Messages.ContentBlockParam[] = [{ type: "text", text: bufferText }]
+ chunkTokens = await countTokens(contentBlocks)
+ } catch {
+ // Fallback: conservative estimate (2 chars per token)
+ chunkTokens = Math.ceil(bufferText.length / 2)
+ }
+
+ // Check if adding this chunk would exceed budget
+ if (tokenCount + chunkTokens > budgetTokens) {
+ // Need to find cutoff within this chunk using binary search
+ let low = 0
+ let high = currentBuffer.length
+ let bestFit = 0
+ let bestTokens = 0
+
+ while (low < high) {
+ const mid = Math.floor((low + high + 1) / 2)
+ const testContent = currentBuffer.slice(0, mid).join("\n")
+ let testTokens: number
+ try {
+ const blocks: Anthropic.Messages.ContentBlockParam[] = [{ type: "text", text: testContent }]
+ testTokens = await countTokens(blocks)
+ } catch {
+ testTokens = Math.ceil(testContent.length / 2)
+ }
+
+ if (tokenCount + testTokens <= budgetTokens) {
+ bestFit = mid
+ bestTokens = testTokens
+ low = mid
+ } else {
+ high = mid - 1
+ }
+ }
+
+ // Add best fit lines
+ if (bestFit > 0) {
+ const fitContent = currentBuffer.slice(0, bestFit).join("\n")
+ content += (content.length > 0 ? "\n" : "") + fitContent
+ tokenCount += bestTokens
+ lineCount += bestFit
+ }
+ complete = false
+ return false
+ }
+
+ // Entire chunk fits - add it all
+ content += (content.length > 0 ? "\n" : "") + bufferText
+ tokenCount += chunkTokens
+ lineCount += currentBuffer.length
+ return true
+ }
+
+ rl.on("line", (line) => {
+ lineBuffer.push(line)
+
+ if (lineBuffer.length >= chunkLines && !isProcessing) {
+ isProcessing = true
+ rl.pause()
+
+ processBuffer()
+ .then((continueReading) => {
+ isProcessing = false
+ if (!continueReading) {
+ shouldClose = true
+ rl.close()
+ readStream.destroy()
+ } else if (!shouldClose) {
+ rl.resume()
+ }
+ })
+ .catch((err) => {
+ isProcessing = false
+ shouldClose = true
+ rl.close()
+ readStream.destroy()
+ reject(err)
+ })
+ }
+ })
+
+ rl.on("close", async () => {
+ // Wait for any ongoing processing with timeout
+ const maxWaitTime = 30000 // 30 seconds
+ const startWait = Date.now()
+ while (isProcessing) {
+ if (Date.now() - startWait > maxWaitTime) {
+ reject(new Error("Timeout waiting for buffer processing to complete"))
+ return
+ }
+ await new Promise((r) => setTimeout(r, 10))
+ }
+
+ // Process remaining buffer
+ if (!shouldClose) {
+ try {
+ await processBuffer()
+ } catch (err) {
+ reject(err)
+ return
+ }
+ }
+
+ resolve({ content, tokenCount, lineCount, complete })
+ })
+
+ rl.on("error", reject)
+ readStream.on("error", reject)
+ })
+}