diff --git a/apps/web-evals/package.json b/apps/web-evals/package.json index 446582a5d7b..d2e93599469 100644 --- a/apps/web-evals/package.json +++ b/apps/web-evals/package.json @@ -29,6 +29,7 @@ "@roo-code/evals": "workspace:^", "@roo-code/types": "workspace:^", "@tanstack/react-query": "^5.69.0", + "archiver": "^7.0.1", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "cmdk": "^1.1.0", @@ -52,6 +53,7 @@ "@roo-code/config-eslint": "workspace:^", "@roo-code/config-typescript": "workspace:^", "@tailwindcss/postcss": "^4", + "@types/archiver": "^7.0.0", "@types/ps-tree": "^1.1.6", "@types/react": "^18.3.23", "@types/react-dom": "^18.3.5", diff --git a/apps/web-evals/src/actions/runs.ts b/apps/web-evals/src/actions/runs.ts index 82a7ebfcbe5..e07bf342115 100644 --- a/apps/web-evals/src/actions/runs.ts +++ b/apps/web-evals/src/actions/runs.ts @@ -21,7 +21,7 @@ import { CreateRun } from "@/lib/schemas" const EVALS_REPO_PATH = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../../../../evals") -export async function createRun({ suite, exercises = [], timeout, ...values }: CreateRun) { +export async function createRun({ suite, exercises = [], timeout, iterations = 1, ...values }: CreateRun) { const run = await _createRun({ ...values, timeout, @@ -36,15 +36,34 @@ export async function createRun({ suite, exercises = [], timeout, ...values }: C throw new Error("Invalid exercise path: " + path) } - await createTask({ ...values, runId: run.id, language: language as ExerciseLanguage, exercise }) + // Create multiple tasks for each iteration + for (let iteration = 1; iteration <= iterations; iteration++) { + await createTask({ + ...values, + runId: run.id, + language: language as ExerciseLanguage, + exercise, + iteration, + }) + } } } else { for (const language of exerciseLanguages) { - const exercises = await getExercisesForLanguage(EVALS_REPO_PATH, language) + const languageExercises = await getExercisesForLanguage(EVALS_REPO_PATH, language) + + // Create tasks for all iterations of each exercise + const tasksToCreate: Array<{ language: ExerciseLanguage; exercise: string; iteration: number }> = [] + for (const exercise of languageExercises) { + for (let iteration = 1; iteration <= iterations; iteration++) { + tasksToCreate.push({ language, exercise, iteration }) + } + } - await pMap(exercises, (exercise) => createTask({ runId: run.id, language, exercise }), { - concurrency: 10, - }) + await pMap( + tasksToCreate, + ({ language, exercise, iteration }) => createTask({ runId: run.id, language, exercise, iteration }), + { concurrency: 10 }, + ) } } diff --git a/apps/web-evals/src/app/api/runs/[id]/logs/[taskId]/route.ts b/apps/web-evals/src/app/api/runs/[id]/logs/[taskId]/route.ts new file mode 100644 index 00000000000..e5ec8751ab0 --- /dev/null +++ b/apps/web-evals/src/app/api/runs/[id]/logs/[taskId]/route.ts @@ -0,0 +1,74 @@ +import { NextResponse } from "next/server" +import type { NextRequest } from "next/server" +import * as fs from "node:fs/promises" +import * as path from "node:path" + +import { findTask, findRun } from "@roo-code/evals" + +export const dynamic = "force-dynamic" + +const LOG_BASE_PATH = "/tmp/evals/runs" + +// Sanitize path components to prevent path traversal attacks +function sanitizePathComponent(component: string): string { + // Remove any path separators, null bytes, and other dangerous characters + return component.replace(/[/\\:\0*?"<>|]/g, "_") +} + +export async function GET(request: NextRequest, { params }: { params: Promise<{ id: string; taskId: string }> }) { + const { id, taskId } = await params + + try { + const runId = Number(id) + const taskIdNum = Number(taskId) + + if (isNaN(runId) || isNaN(taskIdNum)) { + return NextResponse.json({ error: "Invalid run ID or task ID" }, { status: 400 }) + } + + // Verify the run exists + await findRun(runId) + + // Get the task to find its language and exercise + const task = await findTask(taskIdNum) + + // Verify the task belongs to this run + if (task.runId !== runId) { + return NextResponse.json({ error: "Task does not belong to this run" }, { status: 404 }) + } + + // Sanitize language and exercise to prevent path traversal + const safeLanguage = sanitizePathComponent(task.language) + const safeExercise = sanitizePathComponent(task.exercise) + + // Construct the log file path + const logFileName = `${safeLanguage}-${safeExercise}.log` + const logFilePath = path.join(LOG_BASE_PATH, String(runId), logFileName) + + // Verify the resolved path is within the expected directory (defense in depth) + const resolvedPath = path.resolve(logFilePath) + const expectedBase = path.resolve(LOG_BASE_PATH) + if (!resolvedPath.startsWith(expectedBase)) { + return NextResponse.json({ error: "Invalid log path" }, { status: 400 }) + } + + // Check if the log file exists and read it (async) + try { + const logContent = await fs.readFile(logFilePath, "utf-8") + return NextResponse.json({ logContent }) + } catch (err) { + if ((err as NodeJS.ErrnoException).code === "ENOENT") { + return NextResponse.json({ error: "Log file not found", logContent: null }, { status: 200 }) + } + throw err + } + } catch (error) { + console.error("Error reading task log:", error) + + if (error instanceof Error && error.name === "RecordNotFoundError") { + return NextResponse.json({ error: "Task or run not found" }, { status: 404 }) + } + + return NextResponse.json({ error: "Failed to read log file" }, { status: 500 }) + } +} diff --git a/apps/web-evals/src/app/api/runs/[id]/logs/failed/route.ts b/apps/web-evals/src/app/api/runs/[id]/logs/failed/route.ts new file mode 100644 index 00000000000..f8c6cec06be --- /dev/null +++ b/apps/web-evals/src/app/api/runs/[id]/logs/failed/route.ts @@ -0,0 +1,129 @@ +import { NextResponse } from "next/server" +import type { NextRequest } from "next/server" +import * as fs from "node:fs" +import * as path from "node:path" +import archiver from "archiver" + +import { findRun, getTasks } from "@roo-code/evals" + +export const dynamic = "force-dynamic" + +const LOG_BASE_PATH = "/tmp/evals/runs" + +// Sanitize path components to prevent path traversal attacks +function sanitizePathComponent(component: string): string { + // Remove any path separators, null bytes, and other dangerous characters + return component.replace(/[/\\:\0*?"<>|]/g, "_") +} + +export async function GET(request: NextRequest, { params }: { params: Promise<{ id: string }> }) { + const { id } = await params + + try { + const runId = Number(id) + + if (isNaN(runId)) { + return NextResponse.json({ error: "Invalid run ID" }, { status: 400 }) + } + + // Verify the run exists + await findRun(runId) + + // Get all tasks for this run + const tasks = await getTasks(runId) + + // Filter for failed tasks only + const failedTasks = tasks.filter((task) => task.passed === false) + + if (failedTasks.length === 0) { + return NextResponse.json({ error: "No failed tasks to export" }, { status: 400 }) + } + + // Create a zip archive + const archive = archiver("zip", { zlib: { level: 9 } }) + + // Collect chunks to build the response + const chunks: Buffer[] = [] + + archive.on("data", (chunk: Buffer) => { + chunks.push(chunk) + }) + + // Track archive errors + let archiveError: Error | null = null + archive.on("error", (err: Error) => { + archiveError = err + }) + + // Set up the end promise before finalizing (proper event listener ordering) + const archiveEndPromise = new Promise((resolve, reject) => { + archive.on("end", resolve) + archive.on("error", reject) + }) + + // Add each failed task's log file to the archive + const logDir = path.join(LOG_BASE_PATH, String(runId)) + let filesAdded = 0 + + for (const task of failedTasks) { + // Sanitize language and exercise to prevent path traversal + const safeLanguage = sanitizePathComponent(task.language) + const safeExercise = sanitizePathComponent(task.exercise) + const logFileName = `${safeLanguage}-${safeExercise}.log` + const logFilePath = path.join(logDir, logFileName) + + // Verify the resolved path is within the expected directory (defense in depth) + const resolvedPath = path.resolve(logFilePath) + const expectedBase = path.resolve(LOG_BASE_PATH) + if (!resolvedPath.startsWith(expectedBase)) { + continue // Skip files with suspicious paths + } + + if (fs.existsSync(logFilePath)) { + archive.file(logFilePath, { name: logFileName }) + filesAdded++ + } + } + + // Check if any files were actually added + if (filesAdded === 0) { + archive.abort() + return NextResponse.json( + { error: "No log files found - they may have been cleared from disk" }, + { status: 404 }, + ) + } + + // Finalize the archive + await archive.finalize() + + // Wait for all data to be collected + await archiveEndPromise + + // Check for archive errors + if (archiveError) { + throw archiveError + } + + // Combine all chunks into a single buffer + const zipBuffer = Buffer.concat(chunks) + + // Return the zip file + return new NextResponse(zipBuffer, { + status: 200, + headers: { + "Content-Type": "application/zip", + "Content-Disposition": `attachment; filename="run-${runId}-failed-logs.zip"`, + "Content-Length": String(zipBuffer.length), + }, + }) + } catch (error) { + console.error("Error exporting failed logs:", error) + + if (error instanceof Error && error.name === "RecordNotFoundError") { + return NextResponse.json({ error: "Run not found" }, { status: 404 }) + } + + return NextResponse.json({ error: "Failed to export logs" }, { status: 500 }) + } +} diff --git a/apps/web-evals/src/app/runs/[id]/run.tsx b/apps/web-evals/src/app/runs/[id]/run.tsx index a8ff1484fe7..bd528884792 100644 --- a/apps/web-evals/src/app/runs/[id]/run.tsx +++ b/apps/web-evals/src/app/runs/[id]/run.tsx @@ -1,9 +1,10 @@ "use client" -import { useMemo } from "react" -import { LoaderCircle } from "lucide-react" +import { useMemo, useState, useCallback, useEffect } from "react" +import { toast } from "sonner" +import { LoaderCircle, FileText, Copy, Check } from "lucide-react" -import type { Run, TaskMetrics as _TaskMetrics } from "@roo-code/evals" +import type { Run, TaskMetrics as _TaskMetrics, Task } from "@roo-code/evals" import { formatCurrency, formatDuration, formatTokens, formatToolUsageSuccessRate } from "@/lib/formatters" import { useRunStatus } from "@/hooks/use-run-status" @@ -17,6 +18,12 @@ import { Tooltip, TooltipContent, TooltipTrigger, + Dialog, + DialogContent, + DialogHeader, + DialogTitle, + ScrollArea, + Button, } from "@/components/ui" import { TaskStatus } from "./task-status" @@ -35,10 +42,169 @@ function getToolAbbreviation(toolName: string): string { .join("") } +// Pattern definitions for syntax highlighting +type HighlightPattern = { + pattern: RegExp + className: string + // If true, wraps the entire match; if a number, wraps that capture group + wrapGroup?: number +} + +const HIGHLIGHT_PATTERNS: HighlightPattern[] = [ + // Timestamps [YYYY-MM-DDTHH:MM:SS.sssZ] + { pattern: /\[(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z)\]/g, className: "text-blue-400" }, + // Log levels + { pattern: /\|\s*(INFO)\s*\|/g, className: "text-green-400", wrapGroup: 1 }, + { pattern: /\|\s*(WARN|WARNING)\s*\|/g, className: "text-yellow-400", wrapGroup: 1 }, + { pattern: /\|\s*(ERROR)\s*\|/g, className: "text-red-400", wrapGroup: 1 }, + { pattern: /\|\s*(DEBUG)\s*\|/g, className: "text-gray-400", wrapGroup: 1 }, + // Task identifiers + { pattern: /(taskCreated|taskFocused|taskStarted|taskCompleted|EvalPass|EvalFail)/g, className: "text-purple-400" }, + // Message arrows + { pattern: /→/g, className: "text-cyan-400" }, +] + +// Format a single line with syntax highlighting using React elements (XSS-safe) +function formatLine(line: string): React.ReactNode[] { + // Find all matches with their positions + type Match = { start: number; end: number; text: string; className: string } + const matches: Match[] = [] + + for (const { pattern, className, wrapGroup } of HIGHLIGHT_PATTERNS) { + // Reset regex state + pattern.lastIndex = 0 + let regexMatch + while ((regexMatch = pattern.exec(line)) !== null) { + const capturedText = wrapGroup !== undefined ? regexMatch[wrapGroup] : regexMatch[0] + // Skip if capture group didn't match + if (!capturedText) continue + const start = + wrapGroup !== undefined ? regexMatch.index + regexMatch[0].indexOf(capturedText) : regexMatch.index + matches.push({ + start, + end: start + capturedText.length, + text: capturedText, + className, + }) + } + } + + // Sort matches by position and filter overlapping ones + matches.sort((a, b) => a.start - b.start) + const filteredMatches: Match[] = [] + for (const m of matches) { + const lastMatch = filteredMatches[filteredMatches.length - 1] + if (!lastMatch || m.start >= lastMatch.end) { + filteredMatches.push(m) + } + } + + // Build result with highlighted spans + const result: React.ReactNode[] = [] + let currentPos = 0 + + for (const [i, m] of filteredMatches.entries()) { + // Add text before this match + if (m.start > currentPos) { + result.push(line.slice(currentPos, m.start)) + } + // Add highlighted match + result.push( + + {m.text} + , + ) + currentPos = m.end + } + + // Add remaining text + if (currentPos < line.length) { + result.push(line.slice(currentPos)) + } + + return result.length > 0 ? result : [line] +} + +// Format log content with basic highlighting (XSS-safe - no dangerouslySetInnerHTML) +function formatLogContent(log: string): React.ReactNode[] { + const lines = log.split("\n") + return lines.map((line, index) => ( +
+ {line ? formatLine(line) : " "} +
+ )) +} + export function Run({ run }: { run: Run }) { const runStatus = useRunStatus(run) const { tasks, tokenUsage, usageUpdatedAt } = runStatus + const [selectedTask, setSelectedTask] = useState(null) + const [taskLog, setTaskLog] = useState(null) + const [isLoadingLog, setIsLoadingLog] = useState(false) + const [copied, setCopied] = useState(false) + + const onCopyLog = useCallback(async () => { + if (!taskLog) return + + try { + await navigator.clipboard.writeText(taskLog) + setCopied(true) + toast.success("Log copied to clipboard") + setTimeout(() => setCopied(false), 2000) + } catch (error) { + console.error("Failed to copy log:", error) + toast.error("Failed to copy log") + } + }, [taskLog]) + + // Handle ESC key to close the dialog + useEffect(() => { + const handleKeyDown = (e: KeyboardEvent) => { + if (e.key === "Escape" && selectedTask) { + setSelectedTask(null) + } + } + + document.addEventListener("keydown", handleKeyDown) + return () => document.removeEventListener("keydown", handleKeyDown) + }, [selectedTask]) + + const onViewTaskLog = useCallback( + async (task: Task) => { + // Only allow viewing logs for completed tasks + if (task.passed === null || task.passed === undefined) { + toast.error("Task is still running") + return + } + + setSelectedTask(task) + setIsLoadingLog(true) + setTaskLog(null) + + try { + const response = await fetch(`/api/runs/${run.id}/logs/${task.id}`) + + if (!response.ok) { + const error = await response.json() + toast.error(error.error || "Failed to load log") + setSelectedTask(null) + return + } + + const data = await response.json() + setTaskLog(data.logContent) + } catch (error) { + console.error("Error loading task log:", error) + toast.error("Failed to load log") + setSelectedTask(null) + } finally { + setIsLoadingLog(false) + } + }, + [run.id], + ) + const taskMetrics: Record = useMemo(() => { const metrics: Record = {} @@ -241,15 +407,33 @@ export function Run({ run }: { run: Run }) { {tasks.map((task) => ( - + task.finishedAt && onViewTaskLog(task)}>
-
- {task.language}/{task.exercise} +
+ + {task.language}/{task.exercise} + {task.iteration > 1 && ( + + (#{task.iteration}) + + )} + + {task.finishedAt && ( + + + + + Click to view log + + )}
@@ -282,6 +466,63 @@ export function Run({ run }: { run: Run }) { )}
+ + {/* Task Log Dialog - Full Screen */} + setSelectedTask(null)}> + + +
+ + + {selectedTask?.language}/{selectedTask?.exercise} + {selectedTask?.iteration && selectedTask.iteration > 1 && ( + (#{selectedTask.iteration}) + )} + + ({selectedTask?.passed ? "Passed" : "Failed"}) + + + {taskLog && ( + + )} +
+
+
+ {isLoadingLog ? ( +
+ +
+ ) : taskLog ? ( + +
+ {formatLogContent(taskLog)} +
+
+ ) : ( +
+ Log file not available (may have been cleared) +
+ )} +
+
+
) } diff --git a/apps/web-evals/src/app/runs/new/new-run.tsx b/apps/web-evals/src/app/runs/new/new-run.tsx index 3782f29a362..cb7dafd9922 100644 --- a/apps/web-evals/src/app/runs/new/new-run.tsx +++ b/apps/web-evals/src/app/runs/new/new-run.tsx @@ -7,7 +7,7 @@ import { useQuery } from "@tanstack/react-query" import { useForm, FormProvider } from "react-hook-form" import { zodResolver } from "@hookform/resolvers/zod" import { toast } from "sonner" -import { X, Rocket, Check, ChevronsUpDown, SlidersHorizontal } from "lucide-react" +import { X, Rocket, Check, ChevronsUpDown, SlidersHorizontal, Info } from "lucide-react" import { globalSettingsSchema, @@ -16,6 +16,7 @@ import { getModelId, type ProviderSettings, type GlobalSettings, + type ReasoningEffort, } from "@roo-code/types" import { createRun } from "@/actions/runs" @@ -30,6 +31,9 @@ import { TIMEOUT_MIN, TIMEOUT_MAX, TIMEOUT_DEFAULT, + ITERATIONS_MIN, + ITERATIONS_MAX, + ITERATIONS_DEFAULT, } from "@/lib/schemas" import { cn } from "@/lib/utils" @@ -40,6 +44,7 @@ import { Button, Checkbox, FormControl, + FormDescription, FormField, FormItem, FormLabel, @@ -61,7 +66,14 @@ import { PopoverTrigger, Slider, Label, - FormDescription, + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, + Tooltip, + TooltipContent, + TooltipTrigger, } from "@/components/ui" import { SettingsDiff } from "./settings-diff" @@ -78,6 +90,8 @@ export function NewRun() { const [provider, setModelSource] = useState<"roo" | "openrouter" | "other">("roo") const [modelPopoverOpen, setModelPopoverOpen] = useState(false) const [useNativeToolProtocol, setUseNativeToolProtocol] = useState(true) + const [useMultipleNativeToolCalls, setUseMultipleNativeToolCalls] = useState(true) + const [reasoningEffort, setReasoningEffort] = useState("") // State for imported settings with config selection const [importedSettings, setImportedSettings] = useState(null) @@ -106,6 +120,7 @@ export function NewRun() { settings: undefined, concurrency: CONCURRENCY_DEFAULT, timeout: TIMEOUT_DEFAULT, + iterations: ITERATIONS_DEFAULT, jobToken: "", }, }) @@ -204,12 +219,24 @@ export function NewRun() { const onSubmit = useCallback( async (values: CreateRun) => { try { + // Validate jobToken for Roo Code Cloud provider + if (provider === "roo" && !values.jobToken?.trim()) { + toast.error("Roo Code Cloud Token is required") + return + } + + // Build experiments settings + const experimentsSettings = useMultipleNativeToolCalls + ? { experiments: { multipleNativeToolCalls: true } } + : {} + if (provider === "openrouter") { values.settings = { ...(values.settings || {}), apiProvider: "openrouter", openRouterModelId: model, toolProtocol: useNativeToolProtocol ? "native" : "xml", + ...experimentsSettings, } } else if (provider === "roo") { values.settings = { @@ -217,6 +244,20 @@ export function NewRun() { apiProvider: "roo", apiModelId: model, toolProtocol: useNativeToolProtocol ? "native" : "xml", + ...experimentsSettings, + ...(reasoningEffort + ? { + enableReasoningEffort: true, + reasoningEffort: reasoningEffort as ReasoningEffort, + } + : {}), + } + } else if (provider === "other" && values.settings) { + // For imported settings, merge in experiments and tool protocol + values.settings = { + ...values.settings, + toolProtocol: useNativeToolProtocol ? "native" : "xml", + ...experimentsSettings, } } @@ -226,7 +267,7 @@ export function NewRun() { toast.error(e instanceof Error ? e.message : "An unknown error occurred.") } }, - [provider, model, router, useNativeToolProtocol], + [provider, model, router, useNativeToolProtocol, useMultipleNativeToolCalls, reasoningEffort], ) const onSelectModel = useCallback( @@ -394,6 +435,38 @@ export function NewRun() { )} +
+ +
+ + +
+
+ {settings && ( )} @@ -444,15 +517,66 @@ export function NewRun() { -
- - setUseNativeToolProtocol(checked === true) - } - /> - +
+
+ +
+ + +
+
+ + {provider === "roo" && ( +
+ + +

+ When set, enableReasoningEffort will be automatically enabled +

+
+ )}
)} @@ -468,20 +592,28 @@ export function NewRun() { name="jobToken" render={({ field }) => ( - Roo Code Cloud Token +
+ Roo Code Cloud Token + + + + + +

+ If you have access to the Roo Code Cloud repository, generate a + token with: +

+ + pnpm --filter @roo-code-cloud/auth production:create-job-token [org] + [timeout] + +
+
+
- + - - If you have access to the Roo Code Cloud repository then you can generate a - token with: -
- - pnpm --filter @roo-code-cloud/auth production:create-job-token [org] - [timeout] - -
)} /> @@ -600,6 +732,32 @@ export function NewRun() { )} /> + ( + + Iterations per Exercise + +
+ { + field.onChange(value[0]) + }} + /> +
{field.value}
+
+
+ Run each exercise multiple times to compare results + +
+ )} + /> + () const [showSettings, setShowSettings] = useState(false) + const [isExportingLogs, setIsExportingLogs] = useState(false) const continueRef = useRef(null) const { isPending, copyRun, copied } = useCopyRun(run.id) + const onExportFailedLogs = useCallback(async () => { + if (run.failed === 0) { + toast.error("No failed tasks to export") + return + } + + setIsExportingLogs(true) + try { + const response = await fetch(`/api/runs/${run.id}/logs/failed`) + + if (!response.ok) { + const error = await response.json() + toast.error(error.error || "Failed to export logs") + return + } + + // Download the zip file + const blob = await response.blob() + const url = window.URL.createObjectURL(blob) + const a = document.createElement("a") + a.href = url + a.download = `run-${run.id}-failed-logs.zip` + document.body.appendChild(a) + a.click() + window.URL.revokeObjectURL(url) + document.body.removeChild(a) + + toast.success("Failed logs exported successfully") + } catch (error) { + console.error("Error exporting logs:", error) + toast.error("Failed to export logs") + } finally { + setIsExportingLogs(false) + } + }, [run.id, run.failed]) + const onConfirmDelete = useCallback(async () => { if (!deleteRunId) { return @@ -161,6 +199,23 @@ export function Run({ run, taskMetrics, toolColumns }: RunProps) {
)} + {run.failed > 0 && ( + +
+ {isExportingLogs ? ( + <> + + Exporting... + + ) : ( + <> + + Export Failed Logs + + )} +
+
+ )} { setDeleteRunId(run.id) diff --git a/apps/web-evals/src/lib/schemas.ts b/apps/web-evals/src/lib/schemas.ts index 63c5fa7de50..478c328aa2c 100644 --- a/apps/web-evals/src/lib/schemas.ts +++ b/apps/web-evals/src/lib/schemas.ts @@ -14,6 +14,10 @@ export const TIMEOUT_MIN = 5 export const TIMEOUT_MAX = 10 export const TIMEOUT_DEFAULT = 5 +export const ITERATIONS_MIN = 1 +export const ITERATIONS_MAX = 10 +export const ITERATIONS_DEFAULT = 1 + export const createRunSchema = z .object({ model: z.string().min(1, { message: "Model is required." }), @@ -23,6 +27,7 @@ export const createRunSchema = z settings: rooCodeSettingsSchema.optional(), concurrency: z.number().int().min(CONCURRENCY_MIN).max(CONCURRENCY_MAX), timeout: z.number().int().min(TIMEOUT_MIN).max(TIMEOUT_MAX), + iterations: z.number().int().min(ITERATIONS_MIN).max(ITERATIONS_MAX), jobToken: z.string().optional(), }) .refine((data) => data.suite === "full" || (data.exercises || []).length > 0, { diff --git a/packages/evals/docker-compose.override.yml b/packages/evals/docker-compose.override.yml new file mode 100644 index 00000000000..7ffc4d88261 --- /dev/null +++ b/packages/evals/docker-compose.override.yml @@ -0,0 +1,45 @@ +# Development overrides - automatically loaded by docker compose +# These settings only apply when running locally for development +# +# For production, use: docker compose -f docker-compose.yml up +# (explicitly exclude override file) + +services: + web: + environment: + - NODE_ENV=development + volumes: + # Mount log files so web can access task logs + - /tmp/evals:/tmp/evals:ro + # Mount source code for hot reload in development + - ../../apps/web-evals:/roo/repo/apps/web-evals:delegated + - ../../packages/evals:/roo/repo/packages/evals:delegated + - ../../packages/types:/roo/repo/packages/types:delegated + - ../../packages/ipc:/roo/repo/packages/ipc:delegated + - ../../packages/cloud:/roo/repo/packages/cloud:delegated + # Exclude node_modules from being overwritten + - /roo/repo/node_modules + - /roo/repo/apps/web-evals/node_modules + - /roo/repo/packages/evals/node_modules + - /roo/repo/packages/types/node_modules + - /roo/repo/packages/ipc/node_modules + - /roo/repo/packages/cloud/node_modules + entrypoint: [] + command: + - sh + - -c + - | + echo '🚀 Starting evals web service in development mode...' + wait_for_db() { + echo '⏳ Waiting for database...' + until pg_isready -h db -p 5432 -U postgres -d evals_development > /dev/null 2>&1; do + echo '⏳ Database not ready yet, waiting 2 seconds...' + sleep 2 + done + echo '✅ Database is ready' + } + wait_for_db + echo '🔄 Running database migrations...' + pnpm --filter @roo-code/evals db:migrate + echo '🌐 Starting Next.js dev server...' + cd /roo/repo/apps/web-evals && npx next dev -p 3446 diff --git a/packages/evals/docker-compose.yml b/packages/evals/docker-compose.yml index 5928b531142..43594639f1a 100644 --- a/packages/evals/docker-compose.yml +++ b/packages/evals/docker-compose.yml @@ -55,8 +55,11 @@ services: - "${EVALS_WEB_PORT:-3446}:3446" environment: - HOST_EXECUTION_METHOD=docker + - PRODUCTION_DATABASE_URL volumes: - /var/run/docker.sock:/var/run/docker.sock + # Mount log files so web can access task logs + - /tmp/evals:/tmp/evals:ro depends_on: db: condition: service_healthy diff --git a/packages/evals/src/db/migrations/0004_sloppy_black_knight.sql b/packages/evals/src/db/migrations/0004_sloppy_black_knight.sql new file mode 100644 index 00000000000..f643305225c --- /dev/null +++ b/packages/evals/src/db/migrations/0004_sloppy_black_knight.sql @@ -0,0 +1,3 @@ +DROP INDEX "tasks_language_exercise_idx";--> statement-breakpoint +ALTER TABLE "tasks" ADD COLUMN "iteration" integer DEFAULT 1 NOT NULL;--> statement-breakpoint +CREATE UNIQUE INDEX "tasks_language_exercise_iteration_idx" ON "tasks" USING btree ("run_id","language","exercise","iteration"); \ No newline at end of file diff --git a/packages/evals/src/db/migrations/meta/0004_snapshot.json b/packages/evals/src/db/migrations/meta/0004_snapshot.json new file mode 100644 index 00000000000..6aef6954e51 --- /dev/null +++ b/packages/evals/src/db/migrations/meta/0004_snapshot.json @@ -0,0 +1,472 @@ +{ + "id": "9caa4487-e146-4084-907d-fbf9cc3e03b9", + "prevId": "853d308a-3946-4ea8-9039-236bfce3c6c0", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.runs": { + "name": "runs", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "identity": { + "type": "always", + "name": "runs_id_seq", + "schema": "public", + "increment": "1", + "startWith": "1", + "minValue": "1", + "maxValue": "2147483647", + "cache": "1", + "cycle": false + } + }, + "task_metrics_id": { + "name": "task_metrics_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "model": { + "name": "model", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "contextWindow": { + "name": "contextWindow", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "inputPrice": { + "name": "inputPrice", + "type": "real", + "primaryKey": false, + "notNull": false + }, + "outputPrice": { + "name": "outputPrice", + "type": "real", + "primaryKey": false, + "notNull": false + }, + "cacheWritesPrice": { + "name": "cacheWritesPrice", + "type": "real", + "primaryKey": false, + "notNull": false + }, + "cacheReadsPrice": { + "name": "cacheReadsPrice", + "type": "real", + "primaryKey": false, + "notNull": false + }, + "settings": { + "name": "settings", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "jobToken": { + "name": "jobToken", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "pid": { + "name": "pid", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "socket_path": { + "name": "socket_path", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "concurrency": { + "name": "concurrency", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 2 + }, + "timeout": { + "name": "timeout", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 5 + }, + "passed": { + "name": "passed", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "failed": { + "name": "failed", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": { + "runs_task_metrics_id_taskMetrics_id_fk": { + "name": "runs_task_metrics_id_taskMetrics_id_fk", + "tableFrom": "runs", + "tableTo": "taskMetrics", + "columnsFrom": ["task_metrics_id"], + "columnsTo": ["id"], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.taskMetrics": { + "name": "taskMetrics", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "identity": { + "type": "always", + "name": "taskMetrics_id_seq", + "schema": "public", + "increment": "1", + "startWith": "1", + "minValue": "1", + "maxValue": "2147483647", + "cache": "1", + "cycle": false + } + }, + "tokens_in": { + "name": "tokens_in", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "tokens_out": { + "name": "tokens_out", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "tokens_context": { + "name": "tokens_context", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "cache_writes": { + "name": "cache_writes", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "cache_reads": { + "name": "cache_reads", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "cost": { + "name": "cost", + "type": "real", + "primaryKey": false, + "notNull": true + }, + "duration": { + "name": "duration", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "tool_usage": { + "name": "tool_usage", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.tasks": { + "name": "tasks", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "identity": { + "type": "always", + "name": "tasks_id_seq", + "schema": "public", + "increment": "1", + "startWith": "1", + "minValue": "1", + "maxValue": "2147483647", + "cache": "1", + "cycle": false + } + }, + "run_id": { + "name": "run_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "task_metrics_id": { + "name": "task_metrics_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "language": { + "name": "language", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "exercise": { + "name": "exercise", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "iteration": { + "name": "iteration", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 1 + }, + "passed": { + "name": "passed", + "type": "boolean", + "primaryKey": false, + "notNull": false + }, + "started_at": { + "name": "started_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "finished_at": { + "name": "finished_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "tasks_language_exercise_iteration_idx": { + "name": "tasks_language_exercise_iteration_idx", + "columns": [ + { + "expression": "run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "language", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "exercise", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "iteration", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "tasks_run_id_runs_id_fk": { + "name": "tasks_run_id_runs_id_fk", + "tableFrom": "tasks", + "tableTo": "runs", + "columnsFrom": ["run_id"], + "columnsTo": ["id"], + "onDelete": "no action", + "onUpdate": "no action" + }, + "tasks_task_metrics_id_taskMetrics_id_fk": { + "name": "tasks_task_metrics_id_taskMetrics_id_fk", + "tableFrom": "tasks", + "tableTo": "taskMetrics", + "columnsFrom": ["task_metrics_id"], + "columnsTo": ["id"], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.toolErrors": { + "name": "toolErrors", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "identity": { + "type": "always", + "name": "toolErrors_id_seq", + "schema": "public", + "increment": "1", + "startWith": "1", + "minValue": "1", + "maxValue": "2147483647", + "cache": "1", + "cycle": false + } + }, + "run_id": { + "name": "run_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "task_id": { + "name": "task_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "tool_name": { + "name": "tool_name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "error": { + "name": "error", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": { + "toolErrors_run_id_runs_id_fk": { + "name": "toolErrors_run_id_runs_id_fk", + "tableFrom": "toolErrors", + "tableTo": "runs", + "columnsFrom": ["run_id"], + "columnsTo": ["id"], + "onDelete": "no action", + "onUpdate": "no action" + }, + "toolErrors_task_id_tasks_id_fk": { + "name": "toolErrors_task_id_tasks_id_fk", + "tableFrom": "toolErrors", + "tableTo": "tasks", + "columnsFrom": ["task_id"], + "columnsTo": ["id"], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + } + }, + "enums": {}, + "schemas": {}, + "sequences": {}, + "roles": {}, + "policies": {}, + "views": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +} diff --git a/packages/evals/src/db/migrations/meta/_journal.json b/packages/evals/src/db/migrations/meta/_journal.json index 9be55aecb8a..813667c6375 100644 --- a/packages/evals/src/db/migrations/meta/_journal.json +++ b/packages/evals/src/db/migrations/meta/_journal.json @@ -29,6 +29,13 @@ "when": 1763797232454, "tag": "0003_simple_retro_girl", "breakpoints": true + }, + { + "idx": 4, + "version": "7", + "when": 1764201678953, + "tag": "0004_sloppy_black_knight", + "breakpoints": true } ] } diff --git a/packages/evals/src/db/schema.ts b/packages/evals/src/db/schema.ts index d8d4c3ea0a6..638aae0eeeb 100644 --- a/packages/evals/src/db/schema.ts +++ b/packages/evals/src/db/schema.ts @@ -55,12 +55,20 @@ export const tasks = pgTable( taskMetricsId: integer("task_metrics_id").references(() => taskMetrics.id), language: text().notNull().$type(), exercise: text().notNull(), + iteration: integer().default(1).notNull(), passed: boolean(), startedAt: timestamp("started_at"), finishedAt: timestamp("finished_at"), createdAt: timestamp("created_at").notNull(), }, - (table) => [uniqueIndex("tasks_language_exercise_idx").on(table.runId, table.language, table.exercise)], + (table) => [ + uniqueIndex("tasks_language_exercise_iteration_idx").on( + table.runId, + table.language, + table.exercise, + table.iteration, + ), + ], ) export const tasksRelations = relations(tasks, ({ one }) => ({ diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 6558ec50221..78f046c0c5f 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -176,6 +176,9 @@ importers: '@tanstack/react-query': specifier: ^5.69.0 version: 5.76.1(react@18.3.1) + archiver: + specifier: ^7.0.1 + version: 7.0.1 class-variance-authority: specifier: ^0.7.1 version: 0.7.1 @@ -240,6 +243,9 @@ importers: '@tailwindcss/postcss': specifier: ^4 version: 4.1.8 + '@types/archiver': + specifier: ^7.0.0 + version: 7.0.0 '@types/ps-tree': specifier: ^1.1.6 version: 1.1.6 @@ -3904,6 +3910,9 @@ packages: '@tybys/wasm-util@0.9.0': resolution: {integrity: sha512-6+7nlbMVX/PVDCwaIQ8nTOPveOcFLSt8GcXdx8hD0bt39uWxYT88uXzqTd4fTvqta7oeUJqudepapKNt2DYJFw==} + '@types/archiver@7.0.0': + resolution: {integrity: sha512-/3vwGwx9n+mCQdYZ2IKGGHEFL30I96UgBlk8EtRDDFQ9uxM1l4O5Ci6r00EMAkiDaTqD9DQ6nVrWRICnBPtzzg==} + '@types/aria-query@5.0.4': resolution: {integrity: sha512-rfT93uj5s0PRL7EzccGMs3brplhcrghnDoV26NqKhCAS1hVo+WdNsPvE/yb6ilfr5hi2MEk6d5EWJTKdxg8jVw==} @@ -4142,6 +4151,9 @@ packages: '@types/react@18.3.23': resolution: {integrity: sha512-/LDXMQh55EzZQ0uVAZmKKhfENivEvWz6E+EYzh+/MCjMhNsotd+ZHhBGIjFDTi6+fz0OhQQQLbTgdQIxxCsC0w==} + '@types/readdir-glob@1.1.5': + resolution: {integrity: sha512-raiuEPUYqXu+nvtY2Pe8s8FEmZ3x5yAH4VkLdihcPdalvsHltomrRC9BzuStrJ9yk06470hS0Crw0f1pXqD+Hg==} + '@types/retry@0.12.5': resolution: {integrity: sha512-3xSjTp3v03X/lSQLkczaN9UIEwJMoMCA1+Nb5HfbJEQWogdeQIyVtTvxPXDQjZ5zws8rFQfVfRdz03ARihPJgw==} @@ -4471,10 +4483,18 @@ packages: resolution: {integrity: sha512-KVgf4XQVrTjhyWmx6cte4RxonPLR9onExufI1jhvw/MQ4BB6IsZD5gT8Lq+u/+pRkWna/6JoHpiQioaqFP5Rzw==} engines: {node: '>= 10'} + archiver-utils@5.0.2: + resolution: {integrity: sha512-wuLJMmIBQYCsGZgYLTy5FIB2pF6Lfb6cXMSF8Qywwk3t20zWnAi7zLcQFdKQmIB8wyZpY5ER38x08GbwtR2cLA==} + engines: {node: '>= 14'} + archiver@5.3.2: resolution: {integrity: sha512-+25nxyyznAXF7Nef3y0EbBeqmGZgeN/BxHX29Rs39djAfaFalmQ89SE6CWyDCHzGL0yt/ycBtNOmGTW0FyGWNw==} engines: {node: '>= 10'} + archiver@7.0.1: + resolution: {integrity: sha512-ZcbTaIqJOfCc03QwD468Unz/5Ir8ATtvAHsK+FdXbDIbGfihqh9mrvdcYunQzqn4HrvWWaFyaxJhGZagaJJpPQ==} + engines: {node: '>= 14'} + arg@5.0.2: resolution: {integrity: sha512-PYjyFOLKQ9y57JvQ6QLo8dAgNqswh8M1RMJYdQduT6xbWSgK36P/Z/v+p888pM69jMMfS8Xd8F6I1kQ/I9HUGg==} @@ -4676,6 +4696,10 @@ packages: buffer-crc32@0.2.13: resolution: {integrity: sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==} + buffer-crc32@1.0.0: + resolution: {integrity: sha512-Db1SbgBS/fg/392AblrMJk97KggmvYhr4pB5ZIMTWtaivCPMWLkmb7m21cJvpvgK+J3nsU2CmmixNBZx4vFj/w==} + engines: {node: '>=8.0.0'} + buffer-equal-constant-time@1.0.1: resolution: {integrity: sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==} @@ -4689,6 +4713,9 @@ packages: buffer@5.7.1: resolution: {integrity: sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==} + buffer@6.0.3: + resolution: {integrity: sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==} + buffers@0.1.1: resolution: {integrity: sha512-9q/rDEGSb/Qsvv2qvzIzdluL5k7AaJOTrw23z9reQthrbF7is4CtlT0DXyO1oei2DCp4uojjzQ7igaSHp1kAEQ==} engines: {node: '>=0.2.0'} @@ -4978,6 +5005,10 @@ packages: resolution: {integrity: sha512-D3uMHtGc/fcO1Gt1/L7i1e33VOvD4A9hfQLP+6ewd+BvG/gQ84Yh4oftEhAdjSMgBgwGL+jsppT7JYNpo6MHHg==} engines: {node: '>= 10'} + compress-commons@6.0.2: + resolution: {integrity: sha512-6FqVXeETqWPoGcfzrXb37E50NP0LXT8kAMu5ooZayhWWdgEY4lBEEcbQNXtkuKQsGduxiIcI4gOTsxTmuq/bSg==} + engines: {node: '>= 14'} + confbox@0.1.8: resolution: {integrity: sha512-RMtmw0iFkeR4YV+fUOSucriAQNb9g8zFR52MWCtl+cCZOFRNL6zeB395vPzFhEjjn4fMxXudmELnl/KF/WrK6w==} @@ -5042,6 +5073,10 @@ packages: resolution: {integrity: sha512-NT7w2JVU7DFroFdYkeq8cywxrgjPHWkdX1wjpRQXPX5Asews3tA+Ght6lddQO5Mkumffp3X7GEqku3epj2toIw==} engines: {node: '>= 10'} + crc32-stream@6.0.0: + resolution: {integrity: sha512-piICUB6ei4IlTv1+653yq5+KoqfBYmj9bw6LqXoOneTMDXk5nM1qt12mFW1caG3LlJXEKW1Bp0WggEmIfQB34g==} + engines: {node: '>= 14'} + cross-fetch@4.0.0: resolution: {integrity: sha512-e4a5N8lVvuLgAWgnCrLr2PP0YyDOTHa9H/Rj54dirp61qXnNq46m82bRhNqIA5VccJtWBvPTFRV3TtvHUKPB1g==} @@ -5896,6 +5931,10 @@ packages: eventemitter3@5.0.1: resolution: {integrity: sha512-GWkBvjiSZK87ELrYOSESUYeVIc9mvLLf/nXalMOS5dYrgZq9o5OVkbZAVM06CVxYsCwH9BDZFPlQTlPA1j4ahA==} + events@3.3.0: + resolution: {integrity: sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==} + engines: {node: '>=0.8.x'} + eventsource-parser@3.0.2: resolution: {integrity: sha512-6RxOBZ/cYgd8usLwsEl+EC09Au/9BcmCKYF2/xbml6DNczf7nv0MQb+7BA2F+li6//I+28VNlQR37XfQtcAJuA==} engines: {node: '>=18.0.0'} @@ -8362,6 +8401,10 @@ packages: process-nextick-args@2.0.1: resolution: {integrity: sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==} + process@0.11.10: + resolution: {integrity: sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A==} + engines: {node: '>= 0.6.0'} + progress@2.0.3: resolution: {integrity: sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==} engines: {node: '>=0.4.0'} @@ -8614,6 +8657,10 @@ packages: resolution: {integrity: sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==} engines: {node: '>= 6'} + readable-stream@4.7.0: + resolution: {integrity: sha512-oIGGmcpTLwPga8Bn6/Z75SVaH1z5dUut2ibSyAMVhmUggWpmDn2dapB0n7f8nwaSiRtepAsfJyfXIO5DCVAODg==} + engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} + readdir-glob@1.1.3: resolution: {integrity: sha512-v05I2k7xN8zXvPD9N+z/uhXPaj0sUFCe2rcWZIpBsqxfP7xXFQ0tipAd/wjj1YxWyWtUS5IDJpOG82JKt2EAVA==} @@ -9180,6 +9227,9 @@ packages: string_decoder@1.1.1: resolution: {integrity: sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==} + string_decoder@1.3.0: + resolution: {integrity: sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==} + stringify-entities@4.0.4: resolution: {integrity: sha512-IwfBptatlO+QCJUo19AqvrPNqlVMpW9YEL2LIVY+Rpv2qsjCGxaDLNRgeGsQWJhfItebuJhsGSLjaBbNSQ+ieg==} @@ -10271,6 +10321,10 @@ packages: resolution: {integrity: sha512-9qv4rlDiopXg4E69k+vMHjNN63YFMe9sZMrdlvKnCjlCRWeCBswPPMPUfx+ipsAWq1LXHe70RcbaHdJJpS6hyQ==} engines: {node: '>= 10'} + zip-stream@6.0.1: + resolution: {integrity: sha512-zK7YHHz4ZXpW89AHXUPbQVGKI7uvkd3hzusTdotCg1UxyaVtg0zFJSTfW/Dq5f7OBBVnq6cZIaC8Ti4hb6dtCA==} + engines: {node: '>= 14'} + zod-to-json-schema@3.24.5: resolution: {integrity: sha512-/AuWwMP+YqiPbsJx5D6TfgRTc4kTLjsh5SOcd4bLsfUg2RcEXrFMJl1DGgdHy2aCfsIA/cr/1JM0xcB2GZji8g==} peerDependencies: @@ -13557,6 +13611,10 @@ snapshots: tslib: 2.8.1 optional: true + '@types/archiver@7.0.0': + dependencies: + '@types/readdir-glob': 1.1.5 + '@types/aria-query@5.0.4': {} '@types/babel__core@7.20.5': @@ -13831,6 +13889,10 @@ snapshots: '@types/prop-types': 15.7.14 csstype: 3.1.3 + '@types/readdir-glob@1.1.5': + dependencies: + '@types/node': 24.2.1 + '@types/retry@0.12.5': {} '@types/shell-quote@1.7.5': {} @@ -14052,7 +14114,7 @@ snapshots: sirv: 3.0.1 tinyglobby: 0.2.14 tinyrainbow: 2.0.0 - vitest: 3.2.4(@types/debug@4.1.12)(@types/node@20.17.57)(@vitest/ui@3.2.4)(jiti@2.4.2)(jsdom@26.1.0)(lightningcss@1.30.1)(tsx@4.19.4)(yaml@2.8.0) + vitest: 3.2.4(@types/debug@4.1.12)(@types/node@24.2.1)(@vitest/ui@3.2.4)(jiti@2.4.2)(jsdom@26.1.0)(lightningcss@1.30.1)(tsx@4.19.4)(yaml@2.8.0) '@vitest/utils@3.2.4': dependencies: @@ -14262,6 +14324,16 @@ snapshots: normalize-path: 3.0.0 readable-stream: 3.6.2 + archiver-utils@5.0.2: + dependencies: + glob: 11.1.0 + graceful-fs: 4.2.11 + is-stream: 2.0.1 + lazystream: 1.0.1 + lodash: 4.17.21 + normalize-path: 3.0.0 + readable-stream: 4.7.0 + archiver@5.3.2: dependencies: archiver-utils: 2.1.0 @@ -14272,6 +14344,16 @@ snapshots: tar-stream: 2.2.0 zip-stream: 4.1.1 + archiver@7.0.1: + dependencies: + archiver-utils: 5.0.2 + async: 3.2.6 + buffer-crc32: 1.0.0 + readable-stream: 4.7.0 + readdir-glob: 1.1.3 + tar-stream: 3.1.7 + zip-stream: 6.0.1 + arg@5.0.2: {} argparse@1.0.10: @@ -14502,6 +14584,8 @@ snapshots: buffer-crc32@0.2.13: {} + buffer-crc32@1.0.0: {} + buffer-equal-constant-time@1.0.1: {} buffer-from@1.1.2: {} @@ -14513,6 +14597,11 @@ snapshots: base64-js: 1.5.1 ieee754: 1.2.1 + buffer@6.0.3: + dependencies: + base64-js: 1.5.1 + ieee754: 1.2.1 + buffers@0.1.1: {} bundle-name@4.1.0: @@ -14823,6 +14912,14 @@ snapshots: normalize-path: 3.0.0 readable-stream: 3.6.2 + compress-commons@6.0.2: + dependencies: + crc-32: 1.2.2 + crc32-stream: 6.0.0 + is-stream: 2.0.1 + normalize-path: 3.0.0 + readable-stream: 4.7.0 + confbox@0.1.8: {} confbox@0.2.2: {} @@ -14881,6 +14978,11 @@ snapshots: crc-32: 1.2.2 readable-stream: 3.6.2 + crc32-stream@6.0.0: + dependencies: + crc-32: 1.2.2 + readable-stream: 4.7.0 + cross-fetch@4.0.0: dependencies: node-fetch: 2.7.0 @@ -15789,6 +15891,8 @@ snapshots: eventemitter3@5.0.1: {} + events@3.3.0: {} + eventsource-parser@3.0.2: {} eventsource@3.0.7: @@ -18692,6 +18796,8 @@ snapshots: process-nextick-args@2.0.1: {} + process@0.11.10: {} + progress@2.0.3: {} promise-limit@2.7.0: @@ -19021,6 +19127,14 @@ snapshots: string_decoder: 1.1.1 util-deprecate: 1.0.2 + readable-stream@4.7.0: + dependencies: + abort-controller: 3.0.0 + buffer: 6.0.3 + events: 3.3.0 + process: 0.11.10 + string_decoder: 1.3.0 + readdir-glob@1.1.3: dependencies: minimatch: 5.1.6 @@ -19766,6 +19880,10 @@ snapshots: dependencies: safe-buffer: 5.1.2 + string_decoder@1.3.0: + dependencies: + safe-buffer: 5.2.1 + stringify-entities@4.0.4: dependencies: character-entities-html4: 2.1.0 @@ -21059,6 +21177,12 @@ snapshots: compress-commons: 4.1.2 readable-stream: 3.6.2 + zip-stream@6.0.1: + dependencies: + archiver-utils: 5.0.2 + compress-commons: 6.0.2 + readable-stream: 4.7.0 + zod-to-json-schema@3.24.5(zod@3.25.61): dependencies: zod: 3.25.61