Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions packages/opencode/script/glob-perf.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/env bun
import fs from "fs/promises"
import os from "os"
import path from "path"
import { performance } from "perf_hooks"
import { Ripgrep } from "../src/file/ripgrep"

const root = await fs.mkdtemp(path.join(os.tmpdir(), "glob-perf-"))
const scaleInput = process.env.GLOB_PERF_SCALE ?? "1"
const scale = Number.isNaN(Number(scaleInput)) ? 1 : Math.max(1, Number(scaleInput))

async function makeTree() {
const branches = 40 * scale
const subs = 12 * scale
const filesPerDir = 40
const content = "x".repeat(256)

const tasks: Promise<unknown>[] = []
for (const b of Array.from({ length: branches }).map((_, i) => i)) {
const base = path.join(root, `dir${b}`)
tasks.push(
fs.mkdir(base, { recursive: true }).then(async () => {
for (const s of Array.from({ length: subs }).map((_, j) => j)) {
const sub = path.join(base, `sub${s}`)
await fs.mkdir(sub, { recursive: true })
for (const f of Array.from({ length: filesPerDir }).map((_, k) => k)) {
await Bun.write(path.join(sub, `file${f}.txt`), content)
}
}
}),
)
}
await Promise.all(tasks)
}

async function makeSymlinkLoop() {
const loopRoot = path.join(root, "loop")
await fs.mkdir(loopRoot, { recursive: true })
await Bun.write(path.join(loopRoot, "loop-file.txt"), "loop")
await fs.symlink(loopRoot, path.join(loopRoot, "cycle"))
}

async function runScan(label: string, follow: boolean) {
const start = performance.now()
const files = await Array.fromAsync(
Ripgrep.files({
cwd: root,
follow,
timeoutMs: 5000,
}),
).catch((e) => {
console.log(`${label}: error ${e}`)
return [] as string[]
})
const ms = Math.round(performance.now() - start)
console.log(`${label}: files=${files.length} time=${ms}ms follow=${follow}`)
}

await makeTree()
await makeSymlinkLoop()

await runScan("no-follow", false)
await runScan("with-follow", true)

await fs.rm(root, { recursive: true, force: true })
40 changes: 37 additions & 3 deletions packages/opencode/src/file/ripgrep.ts
Original file line number Diff line number Diff line change
Expand Up @@ -205,13 +205,33 @@ export namespace Ripgrep {
return filepath
}

export async function* files(input: { cwd: string; glob?: string[] }) {
const args = [await filepath(), "--files", "--follow", "--hidden", "--glob=!.git/*"]
export async function* files(input: {
cwd: string
glob?: string[]
ignore?: string[]
maxDepth?: number
maxFileSize?: string
timeoutMs?: number
follow?: boolean
}) {
const args = [await filepath(), "--files", "--hidden", "--glob=!.git/*"]
if (input.follow) args.push("--follow")
if (input.glob) {
for (const g of input.glob) {
args.push(`--glob=${g}`)
}
}
if (input.ignore) {
for (const g of input.ignore) {
args.push(`--glob=!${g}`)
}
}
if (input.maxDepth !== undefined) {
args.push(`--max-depth=${input.maxDepth}`)
}
if (input.maxFileSize) {
args.push(`--max-filesize=${input.maxFileSize}`)
}

// Bun.spawn should throw this, but it incorrectly reports that the executable does not exist.
// See https://github.com/oven-sh/bun/issues/24012
Expand All @@ -234,10 +254,21 @@ export namespace Ripgrep {
const decoder = new TextDecoder()
let buffer = ""

let interrupted = true
let timedOut = false
const timeout = input.timeoutMs
? setTimeout(() => {
timedOut = true
proc.kill()
}, input.timeoutMs)
: undefined
try {
while (true) {
const { done, value } = await reader.read()
if (done) break
if (done) {
interrupted = false
break
}

buffer += decoder.decode(value, { stream: true })
const lines = buffer.split("\n")
Expand All @@ -251,7 +282,10 @@ export namespace Ripgrep {
if (buffer) yield buffer
} finally {
reader.releaseLock()
if (timeout) clearTimeout(timeout)
if ((interrupted || timedOut) && proc.exitCode === null) proc.kill()
await proc.exited
if (timedOut) throw new Error("ripgrep timed out")
}
}

Expand Down
86 changes: 64 additions & 22 deletions packages/opencode/src/tool/glob.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@ import DESCRIPTION from "./glob.txt"
import { Ripgrep } from "../file/ripgrep"
import { Instance } from "../project/instance"

const DEFAULT_IGNORE = [
"**/.venv/**",
"**/.direnv/**",
"**/.cache/**",
"**/tmp/**",
"**/temp/**",
"**/__pycache__/**",
]

const FOLLOW_WARNING =
"Following symlinks can scan large or cyclical directories and may spike CPU. Only enable if you need it."

export const GlobTool = Tool.define("glob", {
description: DESCRIPTION,
parameters: z.object({
Expand All @@ -15,33 +27,59 @@ export const GlobTool = Tool.define("glob", {
.describe(
`The directory to search in. If not specified, the current working directory will be used. IMPORTANT: Omit this field to use the default directory. DO NOT enter "undefined" or "null" - simply omit it for the default behavior. Must be a valid directory path if provided.`,
),
follow: z.boolean().optional().describe("Follow symlinks (can be expensive); defaults to false"),
}),
async execute(params) {
let search = params.path ?? Instance.directory
search = path.isAbsolute(search) ? search : path.resolve(Instance.directory, search)
const root = Instance.worktree
let search = params.path ?? root
search = path.isAbsolute(search) ? search : path.resolve(root, search)
const rel = path.relative(root, search)
if (rel.startsWith("..")) {
search = root
}

const limit = 100
const files = []
let truncated = false
for await (const file of Ripgrep.files({
cwd: search,
glob: [params.pattern],
})) {
if (files.length >= limit) {
truncated = true
break
async function searchOnce(options: { maxDepth?: number; timeoutMs?: number; maxFileSize?: string }) {
const files: { path: string; mtime: number }[] = []
let truncated = false
for await (const file of Ripgrep.files({
cwd: search,
glob: [params.pattern],
ignore: DEFAULT_IGNORE,
maxDepth: options.maxDepth,
maxFileSize: options.maxFileSize,
timeoutMs: options.timeoutMs,
follow: params.follow ?? false,
})) {
if (files.length >= limit) {
truncated = true
break
}
const full = path.resolve(search, file)
const stats = await Bun.file(full)
.stat()
.then((x) => x.mtime.getTime())
.catch(() => 0)
files.push({ path: full, mtime: stats })
}
const full = path.resolve(search, file)
const stats = await Bun.file(full)
.stat()
.then((x) => x.mtime.getTime())
.catch(() => 0)
files.push({
path: full,
mtime: stats,
})
files.sort((a, b) => b.mtime - a.mtime)
return { files, truncated }
}
files.sort((a, b) => b.mtime - a.mtime)

const limit = 100
const stage1 = await searchOnce({ maxDepth: 8, maxFileSize: "10M", timeoutMs: 4000 })
const needMore = stage1.files.length < limit && !stage1.truncated
const stage2 = needMore ? await searchOnce({}) : { files: [], truncated: false }

const combined = [...stage1.files, ...stage2.files]
const seen = new Set<string>()
const deduped = []
for (const f of combined) {
if (seen.has(f.path)) continue
seen.add(f.path)
deduped.push(f)
}
const truncated = stage1.truncated || stage2.truncated
const files = deduped.slice(0, limit)

const output = []
if (files.length === 0) output.push("No files found")
Expand All @@ -52,6 +90,10 @@ export const GlobTool = Tool.define("glob", {
output.push("(Results are truncated. Consider using a more specific path or pattern.)")
}
}
if (params.follow) {
output.push("")
output.push(FOLLOW_WARNING)
}

return {
title: path.relative(Instance.worktree, search),
Expand Down
42 changes: 42 additions & 0 deletions packages/opencode/test/file/ripgrep-files.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import { describe, expect, test } from "bun:test"
import fs from "fs/promises"
import os from "os"
import path from "path"
import { Ripgrep } from "../../src/file/ripgrep"

describe("Ripgrep.files", () => {
test("honors ignore globs", async () => {
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rg-files-"))
const keep = path.join(dir, "keep.txt")
const skipDir = path.join(dir, "node_modules")
const skipFile = path.join(skipDir, "ignore.txt")

await fs.writeFile(keep, "ok")
await fs.mkdir(skipDir)
await fs.writeFile(skipFile, "skip")

const files = await Array.fromAsync(
Ripgrep.files({ cwd: dir, ignore: ["**/node_modules/**"] }),
)

expect(files.some((f) => f.endsWith("keep.txt"))).toBe(true)
expect(files.some((f) => f.includes("node_modules"))).toBe(false)

await fs.rm(dir, { recursive: true, force: true })
})

test("respects max depth", async () => {
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rg-depth-"))
const deepDir = path.join(dir, "a", "b", "c")
await fs.mkdir(deepDir, { recursive: true })
await fs.writeFile(path.join(dir, "root.txt"), "root")
await fs.writeFile(path.join(deepDir, "deep.txt"), "deep")

const files = await Array.fromAsync(Ripgrep.files({ cwd: dir, maxDepth: 2 }))

expect(files.some((f) => f.endsWith("root.txt"))).toBe(true)
expect(files.some((f) => f.endsWith("deep.txt"))).toBe(false)

await fs.rm(dir, { recursive: true, force: true })
})
})
24 changes: 24 additions & 0 deletions packages/opencode/test/file/ripgrep-symlink.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import { describe, expect, test } from "bun:test"
import fs from "fs/promises"
import os from "os"
import path from "path"
import { Ripgrep } from "../../src/file/ripgrep"

// Ensure ripgrep listing does not hang on symlink loops when follow=false
describe("Ripgrep.files symlink safety", () => {
test("skips symlink loop by default", async () => {
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rg-loop-"))
const target = path.join(dir, "target")
await fs.mkdir(target)
await fs.writeFile(path.join(target, "file.txt"), "ok")
// loop -> parent (creates cycle target/loop/target/...)
await fs.symlink(target, path.join(target, "loop"))

const files = await Array.fromAsync(Ripgrep.files({ cwd: dir }))

expect(files.some((f) => f.endsWith("file.txt"))).toBe(true)
expect(files.some((f) => f.includes("loop"))).toBe(false)

await fs.rm(dir, { recursive: true, force: true })
})
})
Loading