Skip to content

Commit aceca87

Browse files
committed
Buffbench: allow installing binaries for the evals
1 parent 2d222c0 commit aceca87

File tree

3 files changed

+83
-1
lines changed

3 files changed

+83
-1
lines changed

evals/buffbench/eval-codebuff.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
{
22
"repoUrl": "https://github.com/CodebuffAI/codebuff",
33
"generationDate": "2025-10-12T05:55:40.855Z",
4+
"binInstalls": [
5+
{
6+
"name": "bun",
7+
"installScript": "curl -fsSL https://bun.sh/install | BUN_INSTALL=$INSTALL_DIR bash -s 'bun-v1.2.23'",
8+
"binPath": "bin/bun"
9+
}
10+
],
411
"initCommand": "bun install",
512
"env": {
613
"ANTHROPIC_API_KEY": "test-key",

evals/buffbench/run-buffbench.ts

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import fs from 'fs'
22
import path from 'path'
3+
import os from 'os'
4+
import { execSync } from 'child_process'
35

46
import { API_KEY_ENV_VAR } from '@codebuff/common/old-constants'
57
import { getUserCredentials } from '@codebuff/npm-app/credentials'
@@ -198,6 +200,54 @@ async function runTask(options: {
198200
return { commit, agentResults, commitTraces }
199201
}
200202

203+
/**
204+
* Install binaries specified in binInstalls config to a temporary directory
205+
* Returns the temporary directory path and updated env with PATH
206+
*/
207+
function installBinaries(binInstalls: EvalDataV2['binInstalls']): {
208+
tempDir: string | null
209+
env: Record<string, string>
210+
} {
211+
if (!binInstalls || binInstalls.length === 0) {
212+
return { tempDir: null, env: {} }
213+
}
214+
215+
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codebuff-bins-'))
216+
217+
const binPaths: string[] = []
218+
219+
for (const bin of binInstalls) {
220+
try {
221+
execSync(bin.installScript, {
222+
cwd: tempDir,
223+
stdio: 'ignore',
224+
env: { ...process.env, INSTALL_DIR: tempDir },
225+
})
226+
227+
const fullBinPath = path.join(tempDir, bin.binPath)
228+
if (fs.existsSync(fullBinPath)) {
229+
binPaths.push(path.dirname(fullBinPath))
230+
console.log(`✓ ${bin.name} installed at ${fullBinPath}`)
231+
} else {
232+
console.warn(
233+
`Warning: Expected binary not found at ${fullBinPath} after installing ${bin.name}`,
234+
)
235+
}
236+
} catch (error) {
237+
console.error(`Error installing ${bin.name}:`, error)
238+
throw error
239+
}
240+
}
241+
242+
// Prepend all bin paths to PATH
243+
const updatedPath = [...binPaths, process.env.PATH].filter(Boolean).join(':')
244+
245+
return {
246+
tempDir,
247+
env: { PATH: updatedPath },
248+
}
249+
}
250+
201251
export async function runBuffBench(options: {
202252
evalDataPath: string
203253
agents: string[]
@@ -218,6 +268,14 @@ export async function runBuffBench(options: {
218268
fs.readFileSync(evalDataPath, 'utf-8'),
219269
)
220270

271+
// Install binaries once at the beginning
272+
const { tempDir: binsTempDir, env: binsEnv } = installBinaries(
273+
evalData.binInstalls,
274+
)
275+
276+
// Merge binaries env with eval data env
277+
const mergedEnv = { ...binsEnv, ...evalData.env }
278+
221279
let commitsToRun: EvalDataV2['evalCommits']
222280
if (taskIds && taskIds.length > 0) {
223281
const foundCommits: EvalDataV2['evalCommits'] = []
@@ -303,7 +361,7 @@ export async function runBuffBench(options: {
303361
agents,
304362
repoUrl: evalData.repoUrl,
305363
initCommand: evalData.initCommand,
306-
env: evalData.env,
364+
env: mergedEnv,
307365
logsDir,
308366
index,
309367
totalTasks: commitsToRun.length,
@@ -414,6 +472,16 @@ export async function runBuffBench(options: {
414472
const finalResultsPath = path.join(logsDir, 'FINAL_RESULTS.json')
415473
fs.writeFileSync(finalResultsPath, JSON.stringify(finalResults, null, 2))
416474

475+
// Cleanup binaries installation
476+
if (binsTempDir) {
477+
try {
478+
fs.rmSync(binsTempDir, { recursive: true, force: true })
479+
console.log(`✓ Cleaned up binaries installation at ${binsTempDir}.`)
480+
} catch (error) {
481+
console.warn(`Warning: Failed to cleanup binaries directory:`, error)
482+
}
483+
}
484+
417485
console.log(`Traces saved to ${logsDir}`)
418486
if (commitShasWithErrors.size > 0) {
419487
console.log(

evals/buffbench/types.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,18 @@ export interface EvalCommitV2 {
3838
fileDiffs: FileDiff[]
3939
}
4040

41+
export interface BinInstall {
42+
name: string
43+
installScript: string
44+
binPath: string
45+
}
46+
4147
export interface EvalDataV2 {
4248
repoUrl: string
4349
testRepoName?: string
4450
generationDate: string
4551
initCommand?: string
52+
binInstalls?: BinInstall[]
4653
env?: Record<string, string>
4754
evalCommits: EvalCommitV2[]
4855
}

0 commit comments

Comments
 (0)