Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 74 additions & 37 deletions src/core/tools/ReadFileTool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import {
processImageFile,
ImageMemoryTracker,
} from "./helpers/imageHelpers"
import { validateFileTokenBudget, truncateFileContent } from "./helpers/fileTokenBudget"
import { FILE_READ_BUDGET_PERCENT, readFileWithTokenBudget } from "./helpers/fileTokenBudget"
import { truncateDefinitionsToLineLimit } from "./helpers/truncateDefinitions"
import { BaseTool, ToolCallbacks } from "./BaseTool"
import type { ToolUse } from "../../shared/tools"
Expand Down Expand Up @@ -386,7 +386,38 @@ export class ReadFileTool extends BaseTool<"read_file"> {
}

if (supportedBinaryFormats && supportedBinaryFormats.includes(fileExtension)) {
// Fall through to extractTextFromFile
// Use extractTextFromFile for supported binary formats (PDF, DOCX, etc.)
try {
const content = await extractTextFromFile(fullPath)
const numberedContent = addLineNumbers(content)
const lines = content.split("\n")
const lineCount = lines.length
const lineRangeAttr = lineCount > 0 ? ` lines="1-${lineCount}"` : ""

await task.fileContextTracker.trackFileContext(relPath, "read_tool" as RecordSource)

updateFileResult(relPath, {
xmlContent:
lineCount > 0
? `<file><path>${relPath}</path>\n<content${lineRangeAttr}>\n${numberedContent}</content>\n</file>`
: `<file><path>${relPath}</path>\n<content/><notice>File is empty</notice>\n</file>`,
nativeContent:
lineCount > 0
? `File: ${relPath}\nLines 1-${lineCount}:\n${numberedContent}`
: `File: ${relPath}\nNote: File is empty`,
})
continue
} catch (error) {
const errorMsg = error instanceof Error ? error.message : String(error)
updateFileResult(relPath, {
status: "error",
error: `Error extracting text: ${errorMsg}`,
xmlContent: `<file><path>${relPath}</path><error>Error extracting text: ${errorMsg}</error></file>`,
nativeContent: `File: ${relPath}\nError: Error extracting text: ${errorMsg}`,
})
await task.say("error", `Error extracting text from ${relPath}: ${errorMsg}`)
continue
}
} else {
const fileFormat = fileExtension.slice(1) || "bin"
updateFileResult(relPath, {
Expand Down Expand Up @@ -492,48 +523,54 @@ export class ReadFileTool extends BaseTool<"read_file"> {
settings: task.apiConfiguration,
}) ?? ANTHROPIC_DEFAULT_MAX_TOKENS

const budgetResult = await validateFileTokenBudget(
fullPath,
contextWindow - maxOutputTokens,
contextTokens || 0,
)
// Calculate available token budget (60% of remaining context)
const remainingTokens = contextWindow - maxOutputTokens - (contextTokens || 0)
const safeReadBudget = Math.floor(remainingTokens * FILE_READ_BUDGET_PERCENT)

let content = await extractTextFromFile(fullPath)
let content: string
let xmlInfo = ""

let nativeInfo = ""

if (budgetResult.shouldTruncate && budgetResult.maxChars !== undefined) {
const truncateResult = truncateFileContent(
content,
budgetResult.maxChars,
content.length,
budgetResult.isPreview,
)
content = truncateResult.content

let displayedLines = content.length === 0 ? 0 : content.split(/\r?\n/).length
if (displayedLines > 0 && content.endsWith("\n")) {
displayedLines--
}
const lineRangeAttr = displayedLines > 0 ? ` lines="1-${displayedLines}"` : ""
xmlInfo =
content.length > 0 ? `<content${lineRangeAttr}>\n${content}</content>\n` : `<content/>`
xmlInfo += `<notice>${truncateResult.notice}</notice>\n`

nativeInfo =
content.length > 0
? `Lines 1-${displayedLines}:\n${content}\n\nNote: ${truncateResult.notice}`
: `Note: ${truncateResult.notice}`
if (safeReadBudget <= 0) {
// No budget available
content = ""
const notice = "No available context budget for file reading"
xmlInfo = `<content/>\n<notice>${notice}</notice>\n`
nativeInfo = `Note: ${notice}`
} else {
const lineRangeAttr = ` lines="1-${totalLines}"`
xmlInfo = totalLines > 0 ? `<content${lineRangeAttr}>\n${content}</content>\n` : `<content/>`
// Read file with incremental token counting
const result = await readFileWithTokenBudget(fullPath, {
budgetTokens: safeReadBudget,
})

if (totalLines === 0) {
xmlInfo += `<notice>File is empty</notice>\n`
nativeInfo = "Note: File is empty"
content = addLineNumbers(result.content)

if (!result.complete) {
// File was truncated
const notice = `File truncated: showing ${result.lineCount} lines (${result.tokenCount} tokens) due to context budget. Use line_range to read specific sections.`
const lineRangeAttr = result.lineCount > 0 ? ` lines="1-${result.lineCount}"` : ""
xmlInfo =
result.lineCount > 0
? `<content${lineRangeAttr}>\n${content}</content>\n<notice>${notice}</notice>\n`
: `<content/>\n<notice>${notice}</notice>\n`
nativeInfo =
result.lineCount > 0
? `Lines 1-${result.lineCount}:\n${content}\n\nNote: ${notice}`
: `Note: ${notice}`
} else {
nativeInfo = `Lines 1-${totalLines}:\n${content}`
// Full file read
const lineRangeAttr = ` lines="1-${result.lineCount}"`
xmlInfo =
result.lineCount > 0
? `<content${lineRangeAttr}>\n${content}</content>\n`
: `<content/>`

if (result.lineCount === 0) {
xmlInfo += `<notice>File is empty</notice>\n`
nativeInfo = "Note: File is empty"
} else {
nativeInfo = `Lines 1-${result.lineCount}:\n${content}`
}
}
}

Expand Down
77 changes: 64 additions & 13 deletions src/core/tools/__tests__/readFileTool.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,20 +36,29 @@ vi.mock("fs/promises", () => fsPromises)
// Mock input content for tests
let mockInputContent = ""

// Create hoisted mocks that can be used in vi.mock factories
const { addLineNumbersMock, mockReadFileWithTokenBudget } = vi.hoisted(() => {
const addLineNumbersMock = vi.fn().mockImplementation((text: string, startLine = 1) => {
if (!text) return ""
const lines = typeof text === "string" ? text.split("\n") : [text]
return lines.map((line: string, i: number) => `${startLine + i} | ${line}`).join("\n")
})
const mockReadFileWithTokenBudget = vi.fn()
return { addLineNumbersMock, mockReadFileWithTokenBudget }
})

// First create all the mocks
vi.mock("../../../integrations/misc/extract-text", () => ({
extractTextFromFile: vi.fn(),
addLineNumbers: vi.fn(),
addLineNumbers: addLineNumbersMock,
getSupportedBinaryFormats: vi.fn(() => [".pdf", ".docx", ".ipynb"]),
}))
vi.mock("../../../services/tree-sitter")

// Then create the mock functions
const addLineNumbersMock = vi.fn().mockImplementation((text, startLine = 1) => {
if (!text) return ""
const lines = typeof text === "string" ? text.split("\n") : [text]
return lines.map((line, i) => `${startLine + i} | ${line}`).join("\n")
})
// Mock readFileWithTokenBudget - must be mocked to prevent actual file system access
vi.mock("../../../integrations/misc/read-file-with-budget", () => ({
readFileWithTokenBudget: (...args: any[]) => mockReadFileWithTokenBudget(...args),
}))

const extractTextFromFileMock = vi.fn()
const getSupportedBinaryFormatsMock = vi.fn(() => [".pdf", ".docx", ".ipynb"])
Expand Down Expand Up @@ -145,6 +154,27 @@ beforeEach(() => {
})
: []
})

// Reset addLineNumbers mock to its default implementation (prevents cross-test pollution)
addLineNumbersMock.mockReset()
addLineNumbersMock.mockImplementation((text: string, startLine = 1) => {
if (!text) return ""
const lines = typeof text === "string" ? text.split("\n") : [text]
return lines.map((line: string, i: number) => `${startLine + i} | ${line}`).join("\n")
})

// Reset readFileWithTokenBudget mock with default implementation
mockReadFileWithTokenBudget.mockClear()
mockReadFileWithTokenBudget.mockImplementation(async (_filePath: string, _options: any) => {
// Default: return the mockInputContent with 5 lines
const lines = mockInputContent ? mockInputContent.split("\n") : []
return {
content: mockInputContent,
tokenCount: mockInputContent.length / 4, // rough estimate
lineCount: lines.length,
complete: true,
}
})
})

// Mock i18n translation function
Expand Down Expand Up @@ -496,7 +526,16 @@ describe("read_file tool with maxReadFileLine setting", () => {
it("should read with extractTextFromFile when file has few lines", async () => {
// Setup
mockedCountFileLines.mockResolvedValue(3) // File shorter than maxReadFileLine
mockInputContent = fileContent
const threeLineContent = "Line 1\nLine 2\nLine 3"
mockInputContent = threeLineContent

// Configure the mock to return the correct content for this test
mockReadFileWithTokenBudget.mockResolvedValueOnce({
content: threeLineContent,
tokenCount: threeLineContent.length / 4,
lineCount: 3,
complete: true,
})

// Execute
const result = await executeReadFileTool({}, { maxReadFileLine: 5, totalLines: 3 })
Expand Down Expand Up @@ -656,11 +695,15 @@ describe("read_file tool XML output structure", () => {
it("should produce XML output with no unnecessary indentation", async () => {
// Setup
const numberedContent = "1 | Line 1\n2 | Line 2\n3 | Line 3\n4 | Line 4\n5 | Line 5"
// For XML structure test
mockedExtractTextFromFile.mockImplementation(() => {
addLineNumbersMock(mockInputContent)
return Promise.resolve(numberedContent)

// Configure mockReadFileWithTokenBudget to return the 5-line content
mockReadFileWithTokenBudget.mockResolvedValueOnce({
content: fileContent, // "Line 1\nLine 2\nLine 3\nLine 4\nLine 5"
tokenCount: fileContent.length / 4,
lineCount: 5,
complete: true,
})

mockProvider.getState.mockResolvedValue({
maxReadFileLine: -1,
maxImageFileSize: 20,
Expand Down Expand Up @@ -693,7 +736,15 @@ describe("read_file tool XML output structure", () => {
it("should handle empty files correctly", async () => {
// Setup
mockedCountFileLines.mockResolvedValue(0)
mockedExtractTextFromFile.mockResolvedValue("")

// Configure mockReadFileWithTokenBudget to return empty content
mockReadFileWithTokenBudget.mockResolvedValueOnce({
content: "",
tokenCount: 0,
lineCount: 0,
complete: true,
})

mockProvider.getState.mockResolvedValue({
maxReadFileLine: -1,
maxImageFileSize: 20,
Expand Down
Loading
Loading