diff --git a/packages/opencode/script/glob-perf.ts b/packages/opencode/script/glob-perf.ts new file mode 100644 index 00000000000..3f672f60107 --- /dev/null +++ b/packages/opencode/script/glob-perf.ts @@ -0,0 +1,65 @@ +#!/usr/bin/env bun +import fs from "fs/promises" +import os from "os" +import path from "path" +import { performance } from "perf_hooks" +import { Ripgrep } from "../src/file/ripgrep" + +const root = await fs.mkdtemp(path.join(os.tmpdir(), "glob-perf-")) +const scaleInput = process.env.GLOB_PERF_SCALE ?? "1" +const scale = Number.isNaN(Number(scaleInput)) ? 1 : Math.max(1, Number(scaleInput)) + +async function makeTree() { + const branches = 40 * scale + const subs = 12 * scale + const filesPerDir = 40 + const content = "x".repeat(256) + + const tasks: Promise[] = [] + for (const b of Array.from({ length: branches }).map((_, i) => i)) { + const base = path.join(root, `dir${b}`) + tasks.push( + fs.mkdir(base, { recursive: true }).then(async () => { + for (const s of Array.from({ length: subs }).map((_, j) => j)) { + const sub = path.join(base, `sub${s}`) + await fs.mkdir(sub, { recursive: true }) + for (const f of Array.from({ length: filesPerDir }).map((_, k) => k)) { + await Bun.write(path.join(sub, `file${f}.txt`), content) + } + } + }), + ) + } + await Promise.all(tasks) +} + +async function makeSymlinkLoop() { + const loopRoot = path.join(root, "loop") + await fs.mkdir(loopRoot, { recursive: true }) + await Bun.write(path.join(loopRoot, "loop-file.txt"), "loop") + await fs.symlink(loopRoot, path.join(loopRoot, "cycle")) +} + +async function runScan(label: string, follow: boolean) { + const start = performance.now() + const files = await Array.fromAsync( + Ripgrep.files({ + cwd: root, + follow, + timeoutMs: 5000, + }), + ).catch((e) => { + console.log(`${label}: error ${e}`) + return [] as string[] + }) + const ms = Math.round(performance.now() - start) + console.log(`${label}: files=${files.length} time=${ms}ms follow=${follow}`) +} + +await makeTree() +await makeSymlinkLoop() + +await runScan("no-follow", false) +await runScan("with-follow", true) + +await fs.rm(root, { recursive: true, force: true }) diff --git a/packages/opencode/src/file/ripgrep.ts b/packages/opencode/src/file/ripgrep.ts index 00d9e8c3867..7462f257648 100644 --- a/packages/opencode/src/file/ripgrep.ts +++ b/packages/opencode/src/file/ripgrep.ts @@ -205,13 +205,33 @@ export namespace Ripgrep { return filepath } - export async function* files(input: { cwd: string; glob?: string[] }) { - const args = [await filepath(), "--files", "--follow", "--hidden", "--glob=!.git/*"] + export async function* files(input: { + cwd: string + glob?: string[] + ignore?: string[] + maxDepth?: number + maxFileSize?: string + timeoutMs?: number + follow?: boolean + }) { + const args = [await filepath(), "--files", "--hidden", "--glob=!.git/*"] + if (input.follow) args.push("--follow") if (input.glob) { for (const g of input.glob) { args.push(`--glob=${g}`) } } + if (input.ignore) { + for (const g of input.ignore) { + args.push(`--glob=!${g}`) + } + } + if (input.maxDepth !== undefined) { + args.push(`--max-depth=${input.maxDepth}`) + } + if (input.maxFileSize) { + args.push(`--max-filesize=${input.maxFileSize}`) + } // Bun.spawn should throw this, but it incorrectly reports that the executable does not exist. // See https://github.com/oven-sh/bun/issues/24012 @@ -234,10 +254,21 @@ export namespace Ripgrep { const decoder = new TextDecoder() let buffer = "" + let interrupted = true + let timedOut = false + const timeout = input.timeoutMs + ? setTimeout(() => { + timedOut = true + proc.kill() + }, input.timeoutMs) + : undefined try { while (true) { const { done, value } = await reader.read() - if (done) break + if (done) { + interrupted = false + break + } buffer += decoder.decode(value, { stream: true }) const lines = buffer.split("\n") @@ -251,7 +282,10 @@ export namespace Ripgrep { if (buffer) yield buffer } finally { reader.releaseLock() + if (timeout) clearTimeout(timeout) + if ((interrupted || timedOut) && proc.exitCode === null) proc.kill() await proc.exited + if (timedOut) throw new Error("ripgrep timed out") } } diff --git a/packages/opencode/src/tool/glob.ts b/packages/opencode/src/tool/glob.ts index 11c12f19ac4..aca4ef797b4 100644 --- a/packages/opencode/src/tool/glob.ts +++ b/packages/opencode/src/tool/glob.ts @@ -5,6 +5,18 @@ import DESCRIPTION from "./glob.txt" import { Ripgrep } from "../file/ripgrep" import { Instance } from "../project/instance" +const DEFAULT_IGNORE = [ + "**/.venv/**", + "**/.direnv/**", + "**/.cache/**", + "**/tmp/**", + "**/temp/**", + "**/__pycache__/**", +] + +const FOLLOW_WARNING = + "Following symlinks can scan large or cyclical directories and may spike CPU. Only enable if you need it." + export const GlobTool = Tool.define("glob", { description: DESCRIPTION, parameters: z.object({ @@ -15,33 +27,59 @@ export const GlobTool = Tool.define("glob", { .describe( `The directory to search in. If not specified, the current working directory will be used. IMPORTANT: Omit this field to use the default directory. DO NOT enter "undefined" or "null" - simply omit it for the default behavior. Must be a valid directory path if provided.`, ), + follow: z.boolean().optional().describe("Follow symlinks (can be expensive); defaults to false"), }), async execute(params) { - let search = params.path ?? Instance.directory - search = path.isAbsolute(search) ? search : path.resolve(Instance.directory, search) + const root = Instance.worktree + let search = params.path ?? root + search = path.isAbsolute(search) ? search : path.resolve(root, search) + const rel = path.relative(root, search) + if (rel.startsWith("..")) { + search = root + } - const limit = 100 - const files = [] - let truncated = false - for await (const file of Ripgrep.files({ - cwd: search, - glob: [params.pattern], - })) { - if (files.length >= limit) { - truncated = true - break + async function searchOnce(options: { maxDepth?: number; timeoutMs?: number; maxFileSize?: string }) { + const files: { path: string; mtime: number }[] = [] + let truncated = false + for await (const file of Ripgrep.files({ + cwd: search, + glob: [params.pattern], + ignore: DEFAULT_IGNORE, + maxDepth: options.maxDepth, + maxFileSize: options.maxFileSize, + timeoutMs: options.timeoutMs, + follow: params.follow ?? false, + })) { + if (files.length >= limit) { + truncated = true + break + } + const full = path.resolve(search, file) + const stats = await Bun.file(full) + .stat() + .then((x) => x.mtime.getTime()) + .catch(() => 0) + files.push({ path: full, mtime: stats }) } - const full = path.resolve(search, file) - const stats = await Bun.file(full) - .stat() - .then((x) => x.mtime.getTime()) - .catch(() => 0) - files.push({ - path: full, - mtime: stats, - }) + files.sort((a, b) => b.mtime - a.mtime) + return { files, truncated } } - files.sort((a, b) => b.mtime - a.mtime) + + const limit = 100 + const stage1 = await searchOnce({ maxDepth: 8, maxFileSize: "10M", timeoutMs: 4000 }) + const needMore = stage1.files.length < limit && !stage1.truncated + const stage2 = needMore ? await searchOnce({}) : { files: [], truncated: false } + + const combined = [...stage1.files, ...stage2.files] + const seen = new Set() + const deduped = [] + for (const f of combined) { + if (seen.has(f.path)) continue + seen.add(f.path) + deduped.push(f) + } + const truncated = stage1.truncated || stage2.truncated + const files = deduped.slice(0, limit) const output = [] if (files.length === 0) output.push("No files found") @@ -52,6 +90,10 @@ export const GlobTool = Tool.define("glob", { output.push("(Results are truncated. Consider using a more specific path or pattern.)") } } + if (params.follow) { + output.push("") + output.push(FOLLOW_WARNING) + } return { title: path.relative(Instance.worktree, search), diff --git a/packages/opencode/test/file/ripgrep-files.test.ts b/packages/opencode/test/file/ripgrep-files.test.ts new file mode 100644 index 00000000000..c346678388e --- /dev/null +++ b/packages/opencode/test/file/ripgrep-files.test.ts @@ -0,0 +1,42 @@ +import { describe, expect, test } from "bun:test" +import fs from "fs/promises" +import os from "os" +import path from "path" +import { Ripgrep } from "../../src/file/ripgrep" + +describe("Ripgrep.files", () => { + test("honors ignore globs", async () => { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rg-files-")) + const keep = path.join(dir, "keep.txt") + const skipDir = path.join(dir, "node_modules") + const skipFile = path.join(skipDir, "ignore.txt") + + await fs.writeFile(keep, "ok") + await fs.mkdir(skipDir) + await fs.writeFile(skipFile, "skip") + + const files = await Array.fromAsync( + Ripgrep.files({ cwd: dir, ignore: ["**/node_modules/**"] }), + ) + + expect(files.some((f) => f.endsWith("keep.txt"))).toBe(true) + expect(files.some((f) => f.includes("node_modules"))).toBe(false) + + await fs.rm(dir, { recursive: true, force: true }) + }) + + test("respects max depth", async () => { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rg-depth-")) + const deepDir = path.join(dir, "a", "b", "c") + await fs.mkdir(deepDir, { recursive: true }) + await fs.writeFile(path.join(dir, "root.txt"), "root") + await fs.writeFile(path.join(deepDir, "deep.txt"), "deep") + + const files = await Array.fromAsync(Ripgrep.files({ cwd: dir, maxDepth: 2 })) + + expect(files.some((f) => f.endsWith("root.txt"))).toBe(true) + expect(files.some((f) => f.endsWith("deep.txt"))).toBe(false) + + await fs.rm(dir, { recursive: true, force: true }) + }) +}) diff --git a/packages/opencode/test/file/ripgrep-symlink.test.ts b/packages/opencode/test/file/ripgrep-symlink.test.ts new file mode 100644 index 00000000000..c345b21fec0 --- /dev/null +++ b/packages/opencode/test/file/ripgrep-symlink.test.ts @@ -0,0 +1,24 @@ +import { describe, expect, test } from "bun:test" +import fs from "fs/promises" +import os from "os" +import path from "path" +import { Ripgrep } from "../../src/file/ripgrep" + +// Ensure ripgrep listing does not hang on symlink loops when follow=false +describe("Ripgrep.files symlink safety", () => { + test("skips symlink loop by default", async () => { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rg-loop-")) + const target = path.join(dir, "target") + await fs.mkdir(target) + await fs.writeFile(path.join(target, "file.txt"), "ok") + // loop -> parent (creates cycle target/loop/target/...) + await fs.symlink(target, path.join(target, "loop")) + + const files = await Array.fromAsync(Ripgrep.files({ cwd: dir })) + + expect(files.some((f) => f.endsWith("file.txt"))).toBe(true) + expect(files.some((f) => f.includes("loop"))).toBe(false) + + await fs.rm(dir, { recursive: true, force: true }) + }) +})