Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
207 changes: 207 additions & 0 deletions apps/web-evals/src/actions/__tests__/killRun.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
// npx vitest run src/actions/__tests__/killRun.spec.ts

import { execFileSync } from "child_process"

// Mock child_process
vi.mock("child_process", () => ({
execFileSync: vi.fn(),
spawn: vi.fn(),
}))

// Mock next/cache
vi.mock("next/cache", () => ({
revalidatePath: vi.fn(),
}))

// Mock redis client
vi.mock("@/lib/server/redis", () => ({
redisClient: vi.fn().mockResolvedValue({
del: vi.fn().mockResolvedValue(1),
}),
}))

// Mock @roo-code/evals
vi.mock("@roo-code/evals", () => ({
createRun: vi.fn(),
deleteRun: vi.fn(),
createTask: vi.fn(),
exerciseLanguages: [],
getExercisesForLanguage: vi.fn().mockResolvedValue([]),
}))

// Mock timers to speed up tests
vi.useFakeTimers()

// Import after mocks
import { killRun } from "../runs"

const mockExecFileSync = execFileSync as ReturnType<typeof vi.fn>

describe("killRun", () => {
beforeEach(() => {
vi.clearAllMocks()
})

afterEach(() => {
vi.clearAllTimers()
})

it("should kill controller first, wait, then kill task containers", async () => {
const runId = 123

// execFileSync is used for all docker commands
mockExecFileSync
.mockReturnValueOnce("") // docker kill controller
.mockReturnValueOnce("evals-task-123-456.0\nevals-task-123-789.1\n") // docker ps
.mockReturnValueOnce("") // docker kill evals-task-123-456.0
.mockReturnValueOnce("") // docker kill evals-task-123-789.1

const resultPromise = killRun(runId)

// Fast-forward past the 10 second sleep
await vi.advanceTimersByTimeAsync(10000)

const result = await resultPromise

expect(result.success).toBe(true)
expect(result.killedContainers).toContain("evals-controller-123")
expect(result.killedContainers).toContain("evals-task-123-456.0")
expect(result.killedContainers).toContain("evals-task-123-789.1")
expect(result.errors).toHaveLength(0)

// Verify execFileSync was called for docker kill
expect(mockExecFileSync).toHaveBeenNthCalledWith(
1,
"docker",
["kill", "evals-controller-123"],
expect.any(Object),
)
// Verify execFileSync was called for docker ps with run-specific filter
expect(mockExecFileSync).toHaveBeenNthCalledWith(
2,
"docker",
["ps", "--format", "{{.Names}}", "--filter", "name=evals-task-123-"],
expect.any(Object),
)
})

it("should continue killing runners even if controller is not running", async () => {
const runId = 456

mockExecFileSync
.mockImplementationOnce(() => {
throw new Error("No such container")
}) // controller kill fails
.mockReturnValueOnce("evals-task-456-100.0\n") // docker ps
.mockReturnValueOnce("") // docker kill task

const resultPromise = killRun(runId)
await vi.advanceTimersByTimeAsync(10000)
const result = await resultPromise

expect(result.success).toBe(true)
expect(result.killedContainers).toContain("evals-task-456-100.0")
// Controller not in list since it failed
expect(result.killedContainers).not.toContain("evals-controller-456")
})

it("should clear Redis state after killing containers", async () => {
const runId = 789

const mockDel = vi.fn().mockResolvedValue(1)
const { redisClient } = await import("@/lib/server/redis")
vi.mocked(redisClient).mockResolvedValue({ del: mockDel } as never)

mockExecFileSync
.mockReturnValueOnce("") // controller kill
.mockReturnValueOnce("") // docker ps (no tasks)

const resultPromise = killRun(runId)
await vi.advanceTimersByTimeAsync(10000)
await resultPromise

expect(mockDel).toHaveBeenCalledWith("heartbeat:789")
expect(mockDel).toHaveBeenCalledWith("runners:789")
})

it("should handle docker ps failure gracefully", async () => {
const runId = 111

mockExecFileSync
.mockReturnValueOnce("") // controller kill succeeds
.mockImplementationOnce(() => {
throw new Error("Docker error")
}) // docker ps fails

const resultPromise = killRun(runId)
await vi.advanceTimersByTimeAsync(10000)
const result = await resultPromise

// Should still be successful because controller was killed
expect(result.success).toBe(true)
expect(result.killedContainers).toContain("evals-controller-111")
expect(result.errors).toContain("Failed to list Docker task containers")
})

it("should handle individual task kill failures", async () => {
const runId = 222

mockExecFileSync
.mockReturnValueOnce("") // controller kill
.mockReturnValueOnce("evals-task-222-300.0\nevals-task-222-400.0\n") // docker ps
.mockImplementationOnce(() => {
throw new Error("Kill failed")
}) // first task kill fails
.mockReturnValueOnce("") // second task kill succeeds

const resultPromise = killRun(runId)
await vi.advanceTimersByTimeAsync(10000)
const result = await resultPromise

expect(result.success).toBe(true)
expect(result.killedContainers).toContain("evals-controller-222")
expect(result.killedContainers).toContain("evals-task-222-400.0")
expect(result.errors.length).toBe(1)
expect(result.errors[0]).toContain("evals-task-222-300.0")
})

it("should return success with no containers when nothing is running", async () => {
const runId = 333

mockExecFileSync
.mockImplementationOnce(() => {
throw new Error("No such container")
}) // controller not running
.mockReturnValueOnce("") // no task containers

const resultPromise = killRun(runId)
await vi.advanceTimersByTimeAsync(10000)
const result = await resultPromise

expect(result.success).toBe(true)
expect(result.killedContainers).toHaveLength(0)
expect(result.errors).toHaveLength(0)
})

it("should only kill containers belonging to the specific run", async () => {
const runId = 555

mockExecFileSync
.mockReturnValueOnce("") // controller kill
.mockReturnValueOnce("evals-task-555-100.0\n") // docker ps
.mockReturnValueOnce("") // docker kill task

const resultPromise = killRun(runId)
await vi.advanceTimersByTimeAsync(10000)
const result = await resultPromise

expect(result.success).toBe(true)
// Verify execFileSync was called for docker ps with run-specific filter
expect(mockExecFileSync).toHaveBeenNthCalledWith(
2,
"docker",
["ps", "--format", "{{.Names}}", "--filter", "name=evals-task-555-"],
expect.any(Object),
)
})
})
100 changes: 99 additions & 1 deletion apps/web-evals/src/actions/runs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import * as path from "path"
import fs from "fs"
import { fileURLToPath } from "url"
import { spawn } from "child_process"
import { spawn, execFileSync } from "child_process"

import { revalidatePath } from "next/cache"
import pMap from "p-map"
Expand All @@ -18,6 +18,7 @@ import {
} from "@roo-code/evals"

import { CreateRun } from "@/lib/schemas"
import { redisClient } from "@/lib/server/redis"

const EVALS_REPO_PATH = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../../../../evals")

Expand Down Expand Up @@ -116,3 +117,100 @@ export async function deleteRun(runId: number) {
await _deleteRun(runId)
revalidatePath("/runs")
}

export type KillRunResult = {
success: boolean
killedContainers: string[]
errors: string[]
}

const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms))

/**
* Kill all Docker containers associated with a run (controller and task runners).
* Kills the controller first, waits 10 seconds, then kills runners.
* Also clears Redis state for heartbeat and runners.
*
* Container naming conventions:
* - Controller: evals-controller-{runId}
* - Task runners: evals-task-{runId}-{taskId}.{attempt}
*/
export async function killRun(runId: number): Promise<KillRunResult> {
const killedContainers: string[] = []
const errors: string[] = []
const controllerPattern = `evals-controller-${runId}`
const taskPattern = `evals-task-${runId}-`

try {
// Step 1: Kill the controller first
console.log(`Killing controller: ${controllerPattern}`)
try {
execFileSync("docker", ["kill", controllerPattern], { encoding: "utf-8", timeout: 10000 })
killedContainers.push(controllerPattern)
console.log(`Killed controller container: ${controllerPattern}`)
} catch (_error) {
// Controller might not be running - that's ok, continue to kill runners
console.log(`Controller ${controllerPattern} not running or already stopped`)
}

// Step 2: Wait 10 seconds before killing runners
console.log("Waiting 10 seconds before killing runners...")
await sleep(10000)

// Step 3: Find and kill all task runner containers for THIS run only
let taskContainerNames: string[] = []

try {
const output = execFileSync("docker", ["ps", "--format", "{{.Names}}", "--filter", `name=${taskPattern}`], {
encoding: "utf-8",
timeout: 10000,
})
taskContainerNames = output
.split("\n")
.map((name) => name.trim())
.filter((name) => name.length > 0 && name.startsWith(taskPattern))
} catch (error) {
console.error("Failed to list task containers:", error)
errors.push("Failed to list Docker task containers")
}

// Kill each task runner container
for (const containerName of taskContainerNames) {
try {
execFileSync("docker", ["kill", containerName], { encoding: "utf-8", timeout: 10000 })
killedContainers.push(containerName)
console.log(`Killed task container: ${containerName}`)
} catch (error) {
// Container might have already stopped
console.error(`Failed to kill container ${containerName}:`, error)
errors.push(`Failed to kill container: ${containerName}`)
}
}

// Step 4: Clear Redis state
try {
const redis = await redisClient()
const heartbeatKey = `heartbeat:${runId}`
const runnersKey = `runners:${runId}`

await redis.del(heartbeatKey)
await redis.del(runnersKey)
console.log(`Cleared Redis keys: ${heartbeatKey}, ${runnersKey}`)
} catch (error) {
console.error("Failed to clear Redis state:", error)
errors.push("Failed to clear Redis state")
}
} catch (error) {
console.error("Error in killRun:", error)
errors.push("Unexpected error while killing containers")
}

revalidatePath(`/runs/${runId}`)
revalidatePath("/runs")

return {
success: killedContainers.length > 0 || errors.length === 0,
killedContainers,
errors,
}
}
2 changes: 1 addition & 1 deletion apps/web-evals/src/app/runs/[id]/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ export default async function Page({ params }: { params: Promise<{ id: string }>
const run = await findRun(Number(id))

return (
<div className="max-w-3xl mx-auto px-12 p-12">
<div className="w-full px-6 py-12">
<Run run={run} />
</div>
)
Expand Down
Loading
Loading