Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions evals/apps/cli/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,15 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
const workspacePath = path.resolve(exercisesPath, language, exercise)
const taskSocketPath = path.resolve(dirname, `${dirname}/task-${task.id}.sock`)

// Inject foot gun system prompt if present
if (process.env.FOOTGUN_SYSTEM_PROMPT) {
const rooDir = path.join(workspacePath, ".roo")
if (!fs.existsSync(rooDir)) {
fs.mkdirSync(rooDir, { recursive: true })
}
fs.writeFileSync(path.join(rooDir, "system-prompt-code"), process.env.FOOTGUN_SYSTEM_PROMPT)
}

// If debugging:
// Use --wait --log trace or --verbose.
// Don't await execa and store result as subprocess.
Expand Down
28 changes: 26 additions & 2 deletions evals/apps/web/src/app/runs/new/new-run.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { zodResolver } from "@hookform/resolvers/zod"
import fuzzysort from "fuzzysort"
import { toast } from "sonner"
import { X, Rocket, Check, ChevronsUpDown, HardDriveUpload, CircleCheck } from "lucide-react"
import { Dialog, DialogContent, DialogTitle, DialogFooter } from "@/components/ui/dialog"

import { globalSettingsSchema, providerSettingsSchema, rooCodeDefaults } from "@evals/types"

Expand Down Expand Up @@ -83,6 +84,10 @@ export function NewRun() {

const [model, suite, settings] = watch(["model", "suite", "settings", "concurrency"])

const [systemPromptDialogOpen, setSystemPromptDialogOpen] = useState(false)
const [systemPrompt, setSystemPrompt] = useState("")
const systemPromptRef = useRef<HTMLTextAreaElement>(null)

const onSubmit = useCallback(
async (values: FormValues) => {
try {
Expand All @@ -97,13 +102,13 @@ export function NewRun() {
values.settings = { ...(values.settings || {}), openRouterModelId }
}

const { id } = await createRun(values)
const { id } = await createRun({ ...values, systemPrompt })
router.push(`/runs/${id}`)
} catch (e) {
toast.error(e instanceof Error ? e.message : "An unknown error occurred.")
}
},
[mode, model, models.data, router],
[mode, model, models.data, router, systemPrompt],
)

const onFilterModels = useCallback(
Expand Down Expand Up @@ -313,6 +318,10 @@ export function NewRun() {
)}
<FormMessage />
</FormItem>

<Button type="button" variant="secondary" onClick={() => setSystemPromptDialogOpen(true)}>
Import Foot Gun System Prompt
</Button>
</div>

<FormField
Expand Down Expand Up @@ -394,6 +403,21 @@ export function NewRun() {
onClick={() => router.push("/")}>
<X className="size-6" />
</Button>
<Dialog open={systemPromptDialogOpen} onOpenChange={setSystemPromptDialogOpen}>
<DialogContent>
<DialogTitle>Import Foot Gun System Prompt</DialogTitle>
<textarea
ref={systemPromptRef}
value={systemPrompt}
onChange={(e) => setSystemPrompt(e.target.value)}
placeholder="Paste or type your system prompt here..."
className="w-full min-h-[120px] border rounded p-2"
/>
<DialogFooter>
<Button onClick={() => setSystemPromptDialogOpen(false)}>Done</Button>
</DialogFooter>
</DialogContent>
</Dialog>
</>
)
}
1 change: 1 addition & 0 deletions evals/apps/web/src/lib/schemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export const createRunSchema = z
exercises: z.array(z.string()).optional(),
settings: rooCodeSettingsSchema.optional(),
concurrency: z.number().int().min(CONCURRENCY_MIN).max(CONCURRENCY_MAX).default(CONCURRENCY_DEFAULT),
systemPrompt: z.string().optional(),
})
.refine((data) => data.suite === "full" || (data.exercises || []).length > 0, {
message: "Exercises are required when running a partial suite.",
Expand Down
23 changes: 16 additions & 7 deletions evals/apps/web/src/lib/server/runs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import * as db from "@evals/db"
import { CreateRun } from "@/lib/schemas"
import { getExercisesForLanguage } from "./exercises"

export async function createRun({ suite, exercises = [], ...values }: CreateRun) {
export async function createRun({ suite, exercises = [], systemPrompt, ...values }: CreateRun) {
const run = await db.createRun({
...values,
socketPath: path.join(os.tmpdir(), `roo-code-evals-${crypto.randomUUID()}.sock`),
Expand Down Expand Up @@ -45,13 +45,22 @@ export async function createRun({ suite, exercises = [], ...values }: CreateRun)
try {
const logFile = fs.openSync(`/tmp/roo-code-evals-${run.id}.log`, "a")

const process = spawn("pnpm", ["--filter", "@evals/cli", "dev", "run", "all", "--runId", run.id.toString()], {
detached: true,
stdio: ["ignore", logFile, logFile],
})
const env: NodeJS.ProcessEnv = systemPrompt
? { ...process.env, FOOTGUN_SYSTEM_PROMPT: systemPrompt }
: process.env

process.unref()
await db.updateRun(run.id, { pid: process.pid })
const childProcess = spawn(
"pnpm",
["--filter", "@evals/cli", "dev", "run", "all", "--runId", run.id.toString()],
{
detached: true,
stdio: ["ignore", logFile, logFile],
env,
},
)

childProcess.unref()
await db.updateRun(run.id, { pid: childProcess.pid })
} catch (error) {
console.error(error)
}
Expand Down
Loading