@@ -5,15 +5,36 @@ import { LoaderCircle } from "lucide-react"
55
66import type { Run , TaskMetrics as _TaskMetrics } from "@roo-code/evals"
77
8- import { formatCurrency , formatDuration , formatTokens } from "@/lib/formatters"
8+ import { formatCurrency , formatDuration , formatTokens , formatToolUsageSuccessRate } from "@/lib/formatters"
99import { useRunStatus } from "@/hooks/use-run-status"
10- import { Table , TableBody , TableCell , TableHead , TableHeader , TableRow } from "@/components/ui"
10+ import {
11+ Table ,
12+ TableBody ,
13+ TableCell ,
14+ TableHead ,
15+ TableHeader ,
16+ TableRow ,
17+ Tooltip ,
18+ TooltipContent ,
19+ TooltipTrigger ,
20+ } from "@/components/ui"
1121
1222import { TaskStatus } from "./task-status"
1323import { RunStatus } from "./run-status"
1424
1525type TaskMetrics = Pick < _TaskMetrics , "tokensIn" | "tokensOut" | "tokensContext" | "duration" | "cost" >
1626
27+ type ToolUsageEntry = { attempts : number ; failures : number }
28+ type ToolUsage = Record < string , ToolUsageEntry >
29+
30+ // Generate abbreviation from tool name (e.g., "read_file" -> "RF", "list_code_definition_names" -> "LCDN")
31+ function getToolAbbreviation ( toolName : string ) : string {
32+ return toolName
33+ . split ( "_" )
34+ . map ( ( word ) => word [ 0 ] ?. toUpperCase ( ) ?? "" )
35+ . join ( "" )
36+ }
37+
1738export function Run ( { run } : { run : Run } ) {
1839 const runStatus = useRunStatus ( run )
1940 const { tasks, tokenUsage, usageUpdatedAt } = runStatus
@@ -41,16 +62,162 @@ export function Run({ run }: { run: Run }) {
4162 // eslint-disable-next-line react-hooks/exhaustive-deps
4263 } , [ tasks , tokenUsage , usageUpdatedAt ] )
4364
65+ // Compute aggregate stats
66+ const stats = useMemo ( ( ) => {
67+ if ( ! tasks ) return null
68+
69+ const passed = tasks . filter ( ( t ) => t . passed === true ) . length
70+ const failed = tasks . filter ( ( t ) => t . passed === false ) . length
71+ const running = tasks . filter ( ( t ) => t . startedAt && ! t . finishedAt ) . length
72+ const pending = tasks . filter ( ( t ) => ! t . startedAt && ! t . finishedAt ) . length
73+ const total = tasks . length
74+ const completed = passed + failed
75+
76+ let totalTokensIn = 0
77+ let totalTokensOut = 0
78+ let totalCost = 0
79+ let totalDuration = 0
80+
81+ // Aggregate tool usage from completed tasks
82+ const toolUsage : ToolUsage = { }
83+
84+ for ( const task of tasks ) {
85+ const metrics = taskMetrics [ task . id ]
86+ if ( metrics ) {
87+ totalTokensIn += metrics . tokensIn
88+ totalTokensOut += metrics . tokensOut
89+ totalCost += metrics . cost
90+ totalDuration += metrics . duration
91+ }
92+
93+ // Aggregate tool usage from finished tasks with taskMetrics
94+ if ( task . finishedAt && task . taskMetrics ?. toolUsage ) {
95+ for ( const [ key , usage ] of Object . entries ( task . taskMetrics . toolUsage ) ) {
96+ const tool = key as keyof ToolUsage
97+ if ( ! toolUsage [ tool ] ) {
98+ toolUsage [ tool ] = { attempts : 0 , failures : 0 }
99+ }
100+ toolUsage [ tool ] . attempts += usage . attempts
101+ toolUsage [ tool ] . failures += usage . failures
102+ }
103+ }
104+ }
105+
106+ return {
107+ passed,
108+ failed,
109+ running,
110+ pending,
111+ total,
112+ completed,
113+ passRate : completed > 0 ? ( ( passed / completed ) * 100 ) . toFixed ( 1 ) : null ,
114+ totalTokensIn,
115+ totalTokensOut,
116+ totalCost,
117+ totalDuration,
118+ toolUsage,
119+ }
120+ } , [ tasks , taskMetrics ] )
121+
44122 return (
45123 < >
46124 < div >
47- < div className = "mb-2 " >
125+ < div className = "mb-4 " >
48126 < div >
49127 < div className = "font-mono" > { run . model } </ div >
50128 { run . description && < div className = "text-sm text-muted-foreground" > { run . description } </ div > }
51129 </ div >
52130 { ! run . taskMetricsId && < RunStatus runStatus = { runStatus } /> }
53131 </ div >
132+
133+ { stats && (
134+ < div className = "mb-4 p-4 border rounded-lg bg-muted/50" >
135+ { /* Main Stats Row */ }
136+ < div className = "flex flex-wrap items-start justify-between gap-x-6 gap-y-3" >
137+ { /* Passed/Failed */ }
138+ < div className = "text-center" >
139+ < div className = "text-2xl font-bold whitespace-nowrap" >
140+ < span className = "text-green-600" > { stats . passed } </ span >
141+ < span className = "text-muted-foreground mx-1" > /</ span >
142+ < span className = "text-red-600" > { stats . failed } </ span >
143+ { stats . running > 0 && (
144+ < span className = "text-yellow-600 text-sm ml-2" > ({ stats . running } )</ span >
145+ ) }
146+ </ div >
147+ < div className = "text-xs text-muted-foreground" > Passed / Failed</ div >
148+ </ div >
149+
150+ { /* Pass Rate */ }
151+ < div className = "text-center" >
152+ < div className = "text-2xl font-bold" > { stats . passRate ? `${ stats . passRate } %` : "-" } </ div >
153+ < div className = "text-xs text-muted-foreground" > Pass Rate</ div >
154+ </ div >
155+
156+ { /* Tokens */ }
157+ < div className = "text-center" >
158+ < div className = "text-xl font-bold font-mono whitespace-nowrap" >
159+ { formatTokens ( stats . totalTokensIn ) }
160+ < span className = "text-muted-foreground mx-1" > /</ span >
161+ { formatTokens ( stats . totalTokensOut ) }
162+ </ div >
163+ < div className = "text-xs text-muted-foreground" > Tokens In / Out</ div >
164+ </ div >
165+
166+ { /* Cost */ }
167+ < div className = "text-center" >
168+ < div className = "text-2xl font-bold font-mono" > { formatCurrency ( stats . totalCost ) } </ div >
169+ < div className = "text-xs text-muted-foreground" > Cost</ div >
170+ </ div >
171+
172+ { /* Duration */ }
173+ < div className = "text-center" >
174+ < div className = "text-2xl font-bold font-mono whitespace-nowrap" >
175+ { stats . totalDuration > 0 ? formatDuration ( stats . totalDuration ) : "-" }
176+ </ div >
177+ < div className = "text-xs text-muted-foreground" > Duration</ div >
178+ </ div >
179+
180+ { /* Tool Usage - Inline */ }
181+ { Object . keys ( stats . toolUsage ) . length > 0 && (
182+ < div className = "flex items-center gap-2 flex-wrap" >
183+ { Object . entries ( stats . toolUsage )
184+ . sort ( ( [ , a ] , [ , b ] ) => b . attempts - a . attempts )
185+ . map ( ( [ toolName , usage ] ) => {
186+ const abbr = getToolAbbreviation ( toolName )
187+ const successRate =
188+ usage . attempts > 0
189+ ? ( ( usage . attempts - usage . failures ) / usage . attempts ) * 100
190+ : 100
191+ const rateColor =
192+ successRate === 100
193+ ? "text-green-500"
194+ : successRate >= 80
195+ ? "text-yellow-500"
196+ : "text-red-500"
197+ return (
198+ < Tooltip key = { toolName } >
199+ < TooltipTrigger asChild >
200+ < div className = "flex items-center gap-1 px-2 py-1 rounded bg-background/50 border border-border/50 hover:border-border transition-colors cursor-default text-xs" >
201+ < span className = "font-medium text-muted-foreground" >
202+ { abbr }
203+ </ span >
204+ < span className = "font-bold tabular-nums" >
205+ { usage . attempts }
206+ </ span >
207+ < span className = { `${ rateColor } ` } >
208+ { formatToolUsageSuccessRate ( usage ) }
209+ </ span >
210+ </ div >
211+ </ TooltipTrigger >
212+ < TooltipContent side = "bottom" > { toolName } </ TooltipContent >
213+ </ Tooltip >
214+ )
215+ } ) }
216+ </ div >
217+ ) }
218+ </ div >
219+ </ div >
220+ ) }
54221 { ! tasks ? (
55222 < LoaderCircle className = "size-4 animate-spin" />
56223 ) : (
0 commit comments