diff --git a/apps/web-evals/src/app/runs/new/new-run.tsx b/apps/web-evals/src/app/runs/new/new-run.tsx index 561c3ceb27a..be015ac8ca3 100644 --- a/apps/web-evals/src/app/runs/new/new-run.tsx +++ b/apps/web-evals/src/app/runs/new/new-run.tsx @@ -7,7 +7,7 @@ import { useQuery } from "@tanstack/react-query" import { useForm, FormProvider } from "react-hook-form" import { zodResolver } from "@hookform/resolvers/zod" import { toast } from "sonner" -import { X, Rocket, Check, ChevronsUpDown, SlidersHorizontal, Info } from "lucide-react" +import { X, Rocket, Check, ChevronsUpDown, SlidersHorizontal, Info, Plus, Minus } from "lucide-react" import { globalSettingsSchema, @@ -16,7 +16,6 @@ import { getModelId, type ProviderSettings, type GlobalSettings, - type ReasoningEffort, } from "@roo-code/types" import { createRun } from "@/actions/runs" @@ -44,7 +43,6 @@ import { Button, Checkbox, FormControl, - FormDescription, FormField, FormItem, FormLabel, @@ -66,11 +64,6 @@ import { PopoverTrigger, Slider, Label, - Select, - SelectContent, - SelectItem, - SelectTrigger, - SelectValue, Tooltip, TooltipContent, TooltipTrigger, @@ -84,21 +77,38 @@ type ImportedSettings = { currentApiConfigName: string } +// Type for a model selection entry +type ModelSelection = { + id: string + model: string + popoverOpen: boolean +} + +// Type for a config selection entry (for import mode) +type ConfigSelection = { + id: string + configName: string + popoverOpen: boolean +} + export function NewRun() { const router = useRouter() const [provider, setModelSource] = useState<"roo" | "openrouter" | "other">("other") - const [modelPopoverOpen, setModelPopoverOpen] = useState(false) const [useNativeToolProtocol, setUseNativeToolProtocol] = useState(true) - const [useMultipleNativeToolCalls, setUseMultipleNativeToolCalls] = useState(false) - const [reasoningEffort, setReasoningEffort] = useState("") const [commandExecutionTimeout, setCommandExecutionTimeout] = useState(20) const [terminalShellIntegrationTimeout, setTerminalShellIntegrationTimeout] = useState(30) // seconds - // State for imported settings with config selection + // State for multiple model selections + const [modelSelections, setModelSelections] = useState([ + { id: crypto.randomUUID(), model: "", popoverOpen: false }, + ]) + + // State for imported settings with multiple config selections const [importedSettings, setImportedSettings] = useState(null) - const [selectedConfigName, setSelectedConfigName] = useState("") - const [configPopoverOpen, setConfigPopoverOpen] = useState(false) + const [configSelections, setConfigSelections] = useState([ + { id: crypto.randomUUID(), configName: "", popoverOpen: false }, + ]) const openRouter = useOpenRouterModels() const rooCodeCloud = useRooCodeCloudModels() @@ -134,7 +144,7 @@ export function NewRun() { formState: { isSubmitting }, } = form - const [model, suite, settings] = watch(["model", "suite", "settings", "concurrency"]) + const [suite, settings] = watch(["suite", "settings", "concurrency"]) // Load settings from localStorage on mount useEffect(() => { @@ -250,6 +260,60 @@ export function NewRun() { [getExercisesForLanguage, selectedExercises], ) + // Add a new model selection + const addModelSelection = useCallback(() => { + setModelSelections((prev) => [...prev, { id: crypto.randomUUID(), model: "", popoverOpen: false }]) + }, []) + + // Remove a model selection + const removeModelSelection = useCallback((id: string) => { + setModelSelections((prev) => prev.filter((s) => s.id !== id)) + }, []) + + // Update a model selection + const updateModelSelection = useCallback( + (id: string, model: string) => { + setModelSelections((prev) => prev.map((s) => (s.id === id ? { ...s, model, popoverOpen: false } : s))) + // Also set the form model field for validation (use first non-empty model) + setValue("model", model) + }, + [setValue], + ) + + // Toggle popover for a model selection + const toggleModelPopover = useCallback((id: string, open: boolean) => { + setModelSelections((prev) => prev.map((s) => (s.id === id ? { ...s, popoverOpen: open } : s))) + }, []) + + // Add a new config selection + const addConfigSelection = useCallback(() => { + setConfigSelections((prev) => [...prev, { id: crypto.randomUUID(), configName: "", popoverOpen: false }]) + }, []) + + // Remove a config selection + const removeConfigSelection = useCallback((id: string) => { + setConfigSelections((prev) => prev.filter((s) => s.id !== id)) + }, []) + + // Update a config selection + const updateConfigSelection = useCallback( + (id: string, configName: string) => { + setConfigSelections((prev) => prev.map((s) => (s.id === id ? { ...s, configName, popoverOpen: false } : s))) + // Also update the form settings for the first config (for validation) + if (importedSettings) { + const providerSettings = importedSettings.apiConfigs[configName] ?? {} + setValue("model", getModelId(providerSettings) ?? "") + setValue("settings", { ...EVALS_SETTINGS, ...providerSettings, ...importedSettings.globalSettings }) + } + }, + [importedSettings, setValue], + ) + + // Toggle popover for a config selection + const toggleConfigPopover = useCallback((id: string, open: boolean) => { + setConfigSelections((prev) => prev.map((s) => (s.id === id ? { ...s, popoverOpen: open } : s))) + }, []) + const onSubmit = useCallback( async (values: CreateRun) => { try { @@ -259,74 +323,104 @@ export function NewRun() { return } - // Build experiments settings - const experimentsSettings = useMultipleNativeToolCalls - ? { experiments: { multipleNativeToolCalls: true } } - : {} - - if (provider === "openrouter") { - values.settings = { - ...(values.settings || {}), - apiProvider: "openrouter", - openRouterModelId: model, - toolProtocol: useNativeToolProtocol ? "native" : "xml", - commandExecutionTimeout, - terminalShellIntegrationTimeout: terminalShellIntegrationTimeout * 1000, // Convert to ms - ...experimentsSettings, + // Determine which selections to use based on provider + const selectionsToLaunch: { model: string; configName?: string }[] = [] + + if (provider === "other") { + // For import mode, use config selections + for (const config of configSelections) { + if (config.configName) { + selectionsToLaunch.push({ model: "", configName: config.configName }) + } + } + } else { + // For openrouter/roo, use model selections + for (const selection of modelSelections) { + if (selection.model) { + selectionsToLaunch.push({ model: selection.model }) + } + } + } + + if (selectionsToLaunch.length === 0) { + toast.error("Please select at least one model or config") + return + } + + // Show launching toast + const totalRuns = selectionsToLaunch.length + toast.info(totalRuns > 1 ? `Launching ${totalRuns} runs (every 20 seconds)...` : "Launching run...") + + // Launch runs with 20-second delay between each + for (let i = 0; i < selectionsToLaunch.length; i++) { + const selection = selectionsToLaunch[i]! + + // Wait 20 seconds between runs (except for the first one) + if (i > 0) { + await new Promise((resolve) => setTimeout(resolve, 20000)) } - } else if (provider === "roo") { - values.settings = { - ...(values.settings || {}), - apiProvider: "roo", - apiModelId: model, - toolProtocol: useNativeToolProtocol ? "native" : "xml", - commandExecutionTimeout, - terminalShellIntegrationTimeout: terminalShellIntegrationTimeout * 1000, // Convert to ms - ...experimentsSettings, - ...(reasoningEffort - ? { - enableReasoningEffort: true, - reasoningEffort: reasoningEffort as ReasoningEffort, - } - : {}), + + const runValues = { ...values } + + if (provider === "openrouter") { + runValues.model = selection.model + runValues.settings = { + ...(runValues.settings || {}), + apiProvider: "openrouter", + openRouterModelId: selection.model, + toolProtocol: useNativeToolProtocol ? "native" : "xml", + commandExecutionTimeout, + terminalShellIntegrationTimeout: terminalShellIntegrationTimeout * 1000, + } + } else if (provider === "roo") { + runValues.model = selection.model + runValues.settings = { + ...(runValues.settings || {}), + apiProvider: "roo", + apiModelId: selection.model, + toolProtocol: useNativeToolProtocol ? "native" : "xml", + commandExecutionTimeout, + terminalShellIntegrationTimeout: terminalShellIntegrationTimeout * 1000, + } + } else if (provider === "other" && selection.configName && importedSettings) { + const providerSettings = importedSettings.apiConfigs[selection.configName] ?? {} + runValues.model = getModelId(providerSettings) ?? "" + runValues.settings = { + ...EVALS_SETTINGS, + ...providerSettings, + ...importedSettings.globalSettings, + toolProtocol: useNativeToolProtocol ? "native" : "xml", + commandExecutionTimeout, + terminalShellIntegrationTimeout: terminalShellIntegrationTimeout * 1000, + } } - } else if (provider === "other" && values.settings) { - // For imported settings, merge in experiments and tool protocol - values.settings = { - ...values.settings, - toolProtocol: useNativeToolProtocol ? "native" : "xml", - commandExecutionTimeout, - terminalShellIntegrationTimeout: terminalShellIntegrationTimeout * 1000, // Convert to ms - ...experimentsSettings, + + try { + await createRun(runValues) + toast.success(`Run ${i + 1}/${totalRuns} launched`) + } catch (e) { + toast.error(`Run ${i + 1} failed: ${e instanceof Error ? e.message : "Unknown error"}`) } } - const { id } = await createRun(values) - router.push(`/runs/${id}`) + // Navigate back to main evals UI + router.push("/") } catch (e) { toast.error(e instanceof Error ? e.message : "An unknown error occurred.") } }, [ provider, - model, + modelSelections, + configSelections, + importedSettings, router, useNativeToolProtocol, - useMultipleNativeToolCalls, - reasoningEffort, commandExecutionTimeout, terminalShellIntegrationTimeout, ], ) - const onSelectModel = useCallback( - (model: string) => { - setValue("model", model) - setModelPopoverOpen(false) - }, - [setValue, setModelPopoverOpen], - ) - const onImportSettings = useCallback( async (event: React.ChangeEvent) => { const file = event.target.files?.[0] @@ -355,9 +449,9 @@ export function NewRun() { currentApiConfigName: providerProfiles.currentApiConfigName, }) - // Default to the current config + // Default to the current config for the first selection const defaultConfigName = providerProfiles.currentApiConfigName - setSelectedConfigName(defaultConfigName) + setConfigSelections([{ id: crypto.randomUUID(), configName: defaultConfigName, popoverOpen: false }]) // Apply the default config const providerSettings = providerProfiles.apiConfigs[defaultConfigName] ?? {} @@ -373,22 +467,6 @@ export function NewRun() { [clearErrors, setValue], ) - const onSelectConfig = useCallback( - (configName: string) => { - if (!importedSettings) { - return - } - - setSelectedConfigName(configName) - setConfigPopoverOpen(false) - - const providerSettings = importedSettings.apiConfigs[configName] ?? {} - setValue("model", getModelId(providerSettings) ?? "") - setValue("settings", { ...EVALS_SETTINGS, ...providerSettings, ...importedSettings.globalSettings }) - }, - [importedSettings, setValue], - ) - return ( <> @@ -428,59 +506,91 @@ export function NewRun() { onChange={onImportSettings} /> - {importedSettings && Object.keys(importedSettings.apiConfigs).length > 1 && ( -
- - - - - - - - - - No config found. - - {Object.keys(importedSettings.apiConfigs).map( - (configName) => ( - - {configName} - {configName === - importedSettings.currentApiConfigName && ( - - (default) - - )} - 0 && ( +
+ + {configSelections.map((selection, index) => ( +
+ + toggleConfigPopover(selection.id, open) + }> + + + + + + + + No config found. + + {Object.keys( + importedSettings.apiConfigs, + ).map((configName) => ( + + updateConfigSelection( + selection.id, + configName, + ) + }> + {configName} + {configName === + importedSettings.currentApiConfigName && ( + + (default) + )} - /> - - ), - )} - - - - - + + + ))} + + + + + + {index === configSelections.length - 1 ? ( + + ) : ( + + )} +
+ ))}
)} @@ -501,18 +611,6 @@ export function NewRun() { /> Use Native Tool Calls -
@@ -522,110 +620,103 @@ export function NewRun() { ) : ( <> - - - - - - - - - No model found. - - {models?.map(({ id, name }) => ( - - {name} - - - ))} - - - - - - -
-
- -
- - +
+ {modelSelections.map((selection, index) => ( +
+ toggleModelPopover(selection.id, open)}> + + + + + + + + No model found. + + {models?.map(({ id, name }) => ( + + updateModelSelection( + selection.id, + id, + ) + }> + {name} + + + ))} + + + + + + {index === modelSelections.length - 1 ? ( + + ) : ( + + )}
-
+ ))} +
- {provider === "roo" && ( -
- - -

- When set, enableReasoningEffort will be automatically enabled -

-
- )} +
+ +
+ +
)} @@ -732,147 +823,153 @@ export function NewRun() { )} /> - ( - - Concurrency - -
- { - field.onChange(value[0]) - localStorage.setItem("evals-concurrency", String(value[0])) - }} - /> -
{field.value}
-
-
- -
- )} - /> + {/* Concurrency, Timeout, and Iterations in a 3-column row */} +
+ ( + + Concurrency + +
+ { + field.onChange(value[0]) + localStorage.setItem("evals-concurrency", String(value[0])) + }} + /> +
{field.value}
+
+
+ +
+ )} + /> - ( - - Timeout (Minutes) - -
- { - field.onChange(value[0]) - localStorage.setItem("evals-timeout", String(value[0])) - }} - /> -
{field.value}
-
-
- -
- )} - /> + ( + + Timeout (Minutes) + +
+ { + field.onChange(value[0]) + localStorage.setItem("evals-timeout", String(value[0])) + }} + /> +
{field.value}
+
+
+ +
+ )} + /> - ( - - Iterations per Exercise - -
- { - field.onChange(value[0]) - }} - /> -
{field.value}
-
-
- Run each exercise multiple times to compare results - -
- )} - /> + ( + + Iterations + +
+ { + field.onChange(value[0]) + }} + /> +
{field.value}
+
+
+ +
+ )} + /> +
- -
- - - - - - -

- Maximum time in seconds to wait for terminal command execution to complete - before timing out. This applies to commands run via the execute_command tool. -

-
-
-
-
- { - if (value !== undefined) { - setCommandExecutionTimeout(value) - localStorage.setItem("evals-command-execution-timeout", String(value)) - } - }} - /> -
{commandExecutionTimeout}
-
-
- - -
- - - - - - -

- Maximum time in seconds to wait for shell integration to initialize when opening - a new terminal. -

-
-
-
-
- { - if (value !== undefined) { - setTerminalShellIntegrationTimeout(value) - localStorage.setItem("evals-shell-integration-timeout", String(value)) - } - }} - /> -
{terminalShellIntegrationTimeout}
-
-
+ {/* Terminal timeouts in a 2-column row */} +
+ +
+ + + + + + +

+ Maximum time in seconds to wait for terminal command execution to complete + before timing out. This applies to commands run via the execute_command + tool. +

+
+
+
+
+ { + if (value !== undefined) { + setCommandExecutionTimeout(value) + localStorage.setItem("evals-command-execution-timeout", String(value)) + } + }} + /> +
{commandExecutionTimeout}
+
+
+ + +
+ + + + + + +

+ Maximum time in seconds to wait for shell integration to initialize when + opening a new terminal. +

+
+
+
+
+ { + if (value !== undefined) { + setTerminalShellIntegrationTimeout(value) + localStorage.setItem("evals-shell-integration-timeout", String(value)) + } + }} + /> +
{terminalShellIntegrationTimeout}
+
+
+