diff --git a/README.md b/README.md index 9987d3b..7c9a0e5 100644 --- a/README.md +++ b/README.md @@ -44,17 +44,21 @@ planeteer list 1. **Clarify** — Describe your project in natural language. Copilot asks clarifying questions until the scope is clear. 2. **Breakdown** — Copilot generates a work breakdown structure: tasks with descriptions, acceptance criteria, and dependencies. 3. **Refine** — Navigate the task tree, edit details, or type refinement requests (e.g., "split the auth task into login and signup"). Press `s` to save, `x` to execute. -4. **Execute** — Tasks are dispatched to Copilot agents in parallel batches that respect the dependency graph. Progress is shown in real time. +4. **Execute** — Tasks are dispatched to Copilot agents in parallel batches that respect the dependency graph. Progress is shown in real time. Press `e` during execution to view the session event log, which displays granular feedback including tool executions, progress updates, token usage, and error messages. ### Keyboard Shortcuts | Key | Action | |-----|--------| | `↑` `↓` | Navigate task list | +| `←` `→` | Switch batch (execute screen) | | `⏎` | Submit input / proceed to next screen | | `Esc` | Go back | | `s` | Save plan (refine screen) | | `x` | Start execution (refine/execute screen) | +| `r` | Retry failed tasks (execute screen) | +| `e` | Toggle session event log (execute screen) | +| `z` | Export summary to markdown (execute screen) | | `q` | Quit | ## Development @@ -157,6 +161,25 @@ Plans are saved to `.planeteer/` in the current working directory: - `.json` — Machine-readable plan (used by the app) - `.md` — Human-readable Markdown export +## Features + +### Real-Time Session Event Monitoring + +During task execution, Planeteer subscribes to session events from the Copilot SDK, providing granular visibility into what the AI agents are doing: + +- **Tool Executions**: See when tools (bash, file operations, etc.) start and complete +- **Progress Updates**: View real-time progress messages from agents +- **Token Usage**: Monitor input/output tokens and associated costs per task +- **Error Details**: Get actionable error messages when tasks fail + +**How to use:** +1. Start task execution with `x` +2. Press `e` to toggle the event log panel +3. The log shows the most recent 8 events with timestamps, task IDs, and messages +4. Error events are highlighted in red for quick identification + +This feature leverages the Copilot SDK's `session.on()` event API to capture events like `tool.execution_start`, `tool.execution_progress`, `tool.execution_complete`, and `session.error`, providing transparency and debugging insights into agent behavior. + ## Project Structure ``` diff --git a/src/screens/execute.tsx b/src/screens/execute.tsx index 2302ff1..0e19984 100644 --- a/src/screens/execute.tsx +++ b/src/screens/execute.tsx @@ -3,6 +3,7 @@ import { Box, Text, useInput } from 'ink'; import type { Plan, Task } from '../models/plan.js'; import { executePlan } from '../services/executor.js'; import type { ExecutionOptions, ExecutionHandle } from '../services/executor.js'; +import type { SessionEventData } from '../services/copilot.js'; import { savePlan, summarizePlan } from '../services/persistence.js'; import { computeBatches } from '../utils/dependency-graph.js'; import Spinner from '../components/spinner.js'; @@ -15,6 +16,14 @@ interface ExecuteScreenProps { onBack: () => void; } +interface DisplayEvent { + taskId: string; + type: string; + timestamp: string; + message: string; + isError: boolean; +} + const STATUS_ICON: Record = { pending: '○', in_progress: '◉', @@ -29,6 +38,50 @@ const STATUS_COLOR: Record = { failed: 'red', }; +// Helper to format session events for display +function formatSessionEvent(taskId: string, event: SessionEventData): DisplayEvent { + const time = new Date(event.timestamp).toLocaleTimeString(); + let message = ''; + let isError = false; + + switch (event.type) { + case 'tool.execution_start': + message = `Tool started: ${(event.data as { toolName?: string }).toolName || 'unknown'}`; + break; + case 'tool.execution_progress': + message = `Progress: ${(event.data as { progressMessage?: string }).progressMessage || '...'}`; + break; + case 'tool.execution_complete': + const completionData = event.data as { success?: boolean; toolName?: string; error?: { message?: string } }; + if (completionData.success === false) { + message = `Tool failed: ${completionData.error?.message || 'unknown error'}`; + isError = true; + } else { + message = `Tool completed successfully`; + } + break; + case 'session.error': + message = `Error: ${(event.data as { message?: string }).message || 'unknown error'}`; + isError = true; + break; + case 'assistant.usage': + const usage = event.data as { inputTokens?: number; outputTokens?: number; model?: string }; + message = `Token usage — Model: ${usage.model || 'unknown'}, In: ${usage.inputTokens || 0}, Out: ${usage.outputTokens || 0}`; + break; + default: + // Show other events with minimal formatting + message = event.type; + } + + return { + taskId, + type: event.type, + timestamp: time, + message, + isError, + }; +} + export default function ExecuteScreen({ plan, codebaseContext, @@ -46,6 +99,8 @@ export default function ExecuteScreen({ const [runCount, setRunCount] = useState(0); // incremented to re-trigger execution const execHandleRef = useRef(null); const [summarized, setSummarized] = useState(''); + const [eventLog, setEventLog] = useState([]); + const [showEventLog, setShowEventLog] = useState(false); const { batches } = computeBatches(plan.tasks); // Total display batches: init batch (index 0) + real batches @@ -110,6 +165,10 @@ export default function ExecuteScreen({ setTimeout(() => setSummarized(''), 3000); }); } + // Toggle event log + if (ch === 'e' && started) { + setShowEventLog((prev) => !prev); + } if (key.leftArrow) { setViewBatchIndex((i) => Math.max(0, i - 1)); setSelectedTaskIndex(0); @@ -195,6 +254,14 @@ export default function ExecuteScreen({ } // Otherwise stay on execute screen — user can press 'r' to retry }, + onSessionEvent: (taskId, event) => { + const displayEvent = formatSessionEvent(taskId, event); + // Keep only the most recent 100 events to prevent unbounded memory growth + setEventLog((prev) => { + const updated = [...prev, displayEvent]; + return updated.length > 100 ? updated.slice(-100) : updated; + }); + }, }, execOptions); execHandleRef.current = handle; @@ -401,17 +468,55 @@ export default function ExecuteScreen({ )} + {/* Event Log */} + {showEventLog && eventLog.length > 0 && ( + + + Session Event Log + ({eventLog.length} events) + + {(() => { + const maxEvents = 8; + const visible = eventLog.slice(-maxEvents); + const truncated = eventLog.length > maxEvents; + return ( + <> + {truncated && ( + ··· {eventLog.length - maxEvents} earlier events ··· + )} + {visible.map((evt, i) => ( + + [{evt.timestamp}] + {evt.taskId}: + {evt.message} + + ))} + + ); + })()} + + )} + + {/* Event log hint */} + {started && !showEventLog && eventLog.length > 0 && ( + + Press + e + to view session event log ({eventLog.length} events) + + )} + 0 - ? '←→: switch batch ↑↓: select task r: retry task ⏳ executing...' + ? '←→: switch batch ↑↓: select task r: retry task e: events ⏳ executing...' : executing - ? '←→: switch batch ↑↓: select task ⏳ executing...' + ? '←→: switch batch ↑↓: select task e: events ⏳ executing...' : started && failedCount > 0 - ? '←→: switch batch ↑↓: select task r: retry z: summarize esc: back' + ? '←→: switch batch ↑↓: select task r: retry e: events z: summarize esc: back' : started - ? '←→: switch batch ↑↓: select task z: summarize esc: back' + ? '←→: switch batch ↑↓: select task e: events z: summarize esc: back' : 'x: start esc: back' } /> diff --git a/src/services/copilot.test.ts b/src/services/copilot.test.ts new file mode 100644 index 0000000..1c312b9 --- /dev/null +++ b/src/services/copilot.test.ts @@ -0,0 +1,60 @@ +import { describe, it, expect, vi } from 'vitest'; +import type { SessionEventData } from './copilot.js'; + +describe('SessionEventData', () => { + it('should have correct type structure', () => { + const mockEvent: SessionEventData = { + type: 'tool.execution_start', + timestamp: new Date().toISOString(), + data: { toolName: 'test-tool' }, + }; + + expect(mockEvent.type).toBe('tool.execution_start'); + expect(mockEvent.timestamp).toBeDefined(); + expect(mockEvent.data).toBeDefined(); + }); + + it('should handle tool.execution_complete events', () => { + const mockEvent: SessionEventData = { + type: 'tool.execution_complete', + timestamp: new Date().toISOString(), + data: { + toolCallId: 'test-123', + success: true, + result: { content: 'Task completed' }, + }, + }; + + expect(mockEvent.type).toBe('tool.execution_complete'); + expect(mockEvent.data).toHaveProperty('success'); + }); + + it('should handle session.error events', () => { + const mockEvent: SessionEventData = { + type: 'session.error', + timestamp: new Date().toISOString(), + data: { + errorType: 'timeout', + message: 'Request timed out', + }, + }; + + expect(mockEvent.type).toBe('session.error'); + expect((mockEvent.data as { message: string }).message).toBe('Request timed out'); + }); + + it('should handle assistant.usage events', () => { + const mockEvent: SessionEventData = { + type: 'assistant.usage', + timestamp: new Date().toISOString(), + data: { + model: 'claude-sonnet-4', + inputTokens: 100, + outputTokens: 50, + }, + }; + + expect(mockEvent.type).toBe('assistant.usage'); + expect((mockEvent.data as { inputTokens: number }).inputTokens).toBe(100); + }); +}); diff --git a/src/services/copilot.ts b/src/services/copilot.ts index b691e58..5297078 100644 --- a/src/services/copilot.ts +++ b/src/services/copilot.ts @@ -104,10 +104,17 @@ export async function stopClient(): Promise { } } +export interface SessionEventData { + type: string; + timestamp: string; + data: unknown; +} + export interface StreamCallbacks { onDelta: (text: string) => void; onDone: (fullText: string) => void; onError: (error: Error) => void; + onSessionEvent?: (event: SessionEventData) => void; } export async function sendPrompt( @@ -137,6 +144,20 @@ export async function sendPrompt( let fullText = ''; let settled = false; + // Subscribe to all session events if callback provided. + // We capture all events rather than specific types to ensure comprehensive monitoring + // of SDK behavior (tool executions, progress updates, token usage, errors, etc.). + // The UI layer filters and formats events for display. + if (callbacks.onSessionEvent) { + session.on((event) => { + callbacks.onSessionEvent?.({ + type: event.type, + timestamp: event.timestamp, + data: event.data, + }); + }); + } + session.on('assistant.message_delta', (event: { data: { deltaContent: string } }) => { fullText += event.data.deltaContent; callbacks.onDelta(event.data.deltaContent); @@ -176,10 +197,15 @@ export async function sendPrompt( export async function sendPromptSync( systemPrompt: string, messages: ChatMessage[], - options?: { timeoutMs?: number; onDelta?: (delta: string, fullText: string) => void }, + options?: { + timeoutMs?: number; + onDelta?: (delta: string, fullText: string) => void; + onSessionEvent?: (event: SessionEventData) => void; + }, ): Promise { const idleTimeoutMs = options?.timeoutMs ?? 120_000; const onDelta = options?.onDelta; + const onSessionEvent = options?.onSessionEvent; return new Promise((resolve, reject) => { let settled = false; @@ -235,6 +261,7 @@ export async function sendPromptSync( reject(err); } }, + onSessionEvent, }); }); } diff --git a/src/services/executor.test.ts b/src/services/executor.test.ts new file mode 100644 index 0000000..e56974d --- /dev/null +++ b/src/services/executor.test.ts @@ -0,0 +1,58 @@ +import { describe, it, expect, vi } from 'vitest'; +import type { ExecutionCallbacks } from './executor.js'; +import type { SessionEventData } from './copilot.js'; + +describe('ExecutionCallbacks with session events', () => { + it('should define onSessionEvent callback', () => { + const mockCallback: ExecutionCallbacks = { + onTaskStart: vi.fn(), + onTaskDelta: vi.fn(), + onTaskDone: vi.fn(), + onTaskFailed: vi.fn(), + onBatchComplete: vi.fn(), + onAllDone: vi.fn(), + onSessionEvent: vi.fn(), + }; + + expect(mockCallback.onSessionEvent).toBeDefined(); + expect(typeof mockCallback.onSessionEvent).toBe('function'); + }); + + it('should call onSessionEvent with taskId and event data', () => { + const onSessionEvent = vi.fn(); + + const mockEvent: SessionEventData = { + type: 'tool.execution_start', + timestamp: new Date().toISOString(), + data: { toolName: 'bash' }, + }; + + onSessionEvent('task-1', mockEvent); + + expect(onSessionEvent).toHaveBeenCalledWith('task-1', mockEvent); + expect(onSessionEvent).toHaveBeenCalledTimes(1); + }); + + it('should handle multiple session events for different tasks', () => { + const onSessionEvent = vi.fn(); + + const event1: SessionEventData = { + type: 'tool.execution_start', + timestamp: new Date().toISOString(), + data: { toolName: 'bash' }, + }; + + const event2: SessionEventData = { + type: 'tool.execution_complete', + timestamp: new Date().toISOString(), + data: { success: true }, + }; + + onSessionEvent('task-1', event1); + onSessionEvent('task-2', event2); + + expect(onSessionEvent).toHaveBeenCalledTimes(2); + expect(onSessionEvent).toHaveBeenNthCalledWith(1, 'task-1', event1); + expect(onSessionEvent).toHaveBeenNthCalledWith(2, 'task-2', event2); + }); +}); diff --git a/src/services/executor.ts b/src/services/executor.ts index 1edf492..36b046a 100644 --- a/src/services/executor.ts +++ b/src/services/executor.ts @@ -1,5 +1,5 @@ import type { Plan, Task } from '../models/plan.js'; -import { sendPromptSync } from './copilot.js'; +import { sendPromptSync, type SessionEventData } from './copilot.js'; import { getReadyTasks } from '../utils/dependency-graph.js'; export interface ExecutionCallbacks { @@ -9,6 +9,7 @@ export interface ExecutionCallbacks { onTaskFailed: (taskId: string, error: string) => void; onBatchComplete: (batchIndex: number) => void; onAllDone: (plan: Plan) => void; + onSessionEvent?: (taskId: string, event: SessionEventData) => void; } function buildTaskPrompt(task: Task, plan: Plan, codebaseContext?: string): string { @@ -112,6 +113,9 @@ export function executePlan( onDelta: (delta, fullText) => { callbacks.onTaskDelta(task.id, delta, fullText); }, + onSessionEvent: callbacks.onSessionEvent ? (event) => { + callbacks.onSessionEvent!(task.id, event); + } : undefined, }); taskInPlan.status = 'done'; taskInPlan.agentResult = result; @@ -180,6 +184,9 @@ export function executePlan( onDelta: (delta, fullText) => { callbacks.onTaskDelta(INIT_TASK_ID, delta, fullText); }, + onSessionEvent: callbacks.onSessionEvent ? (event) => { + callbacks.onSessionEvent!(INIT_TASK_ID, event); + } : undefined, }); callbacks.onTaskDone(INIT_TASK_ID, initResult); } catch (err) {