Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,21 @@ planeteer list
1. **Clarify** — Describe your project in natural language. Copilot asks clarifying questions until the scope is clear.
2. **Breakdown** — Copilot generates a work breakdown structure: tasks with descriptions, acceptance criteria, and dependencies.
3. **Refine** — Navigate the task tree, edit details, or type refinement requests (e.g., "split the auth task into login and signup"). Press `s` to save, `x` to execute.
4. **Execute** — Tasks are dispatched to Copilot agents in parallel batches that respect the dependency graph. Progress is shown in real time.
4. **Execute** — Tasks are dispatched to Copilot agents in parallel batches that respect the dependency graph. Progress is shown in real time. Press `e` during execution to view the session event log, which displays granular feedback including tool executions, progress updates, token usage, and error messages.

### Keyboard Shortcuts

| Key | Action |
|-----|--------|
| `↑` `↓` | Navigate task list |
| `←` `→` | Switch batch (execute screen) |
| `⏎` | Submit input / proceed to next screen |
| `Esc` | Go back |
| `s` | Save plan (refine screen) |
| `x` | Start execution (refine/execute screen) |
| `r` | Retry failed tasks (execute screen) |
| `e` | Toggle session event log (execute screen) |
| `z` | Export summary to markdown (execute screen) |
| `q` | Quit |

## Development
Expand Down Expand Up @@ -157,6 +161,25 @@ Plans are saved to `.planeteer/` in the current working directory:
- `<plan-id>.json` — Machine-readable plan (used by the app)
- `<plan-id>.md` — Human-readable Markdown export

## Features

### Real-Time Session Event Monitoring

During task execution, Planeteer subscribes to session events from the Copilot SDK, providing granular visibility into what the AI agents are doing:

- **Tool Executions**: See when tools (bash, file operations, etc.) start and complete
- **Progress Updates**: View real-time progress messages from agents
- **Token Usage**: Monitor input/output tokens and associated costs per task
- **Error Details**: Get actionable error messages when tasks fail

**How to use:**
1. Start task execution with `x`
2. Press `e` to toggle the event log panel
3. The log shows the most recent 8 events with timestamps, task IDs, and messages
4. Error events are highlighted in red for quick identification

This feature leverages the Copilot SDK's `session.on()` event API to capture events like `tool.execution_start`, `tool.execution_progress`, `tool.execution_complete`, and `session.error`, providing transparency and debugging insights into agent behavior.

## Project Structure

```
Expand Down
113 changes: 109 additions & 4 deletions src/screens/execute.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { Box, Text, useInput } from 'ink';
import type { Plan, Task } from '../models/plan.js';
import { executePlan } from '../services/executor.js';
import type { ExecutionOptions, ExecutionHandle } from '../services/executor.js';
import type { SessionEventData } from '../services/copilot.js';
import { savePlan, summarizePlan } from '../services/persistence.js';
import { computeBatches } from '../utils/dependency-graph.js';
import Spinner from '../components/spinner.js';
Expand All @@ -15,6 +16,14 @@ interface ExecuteScreenProps {
onBack: () => void;
}

interface DisplayEvent {
taskId: string;
type: string;
timestamp: string;
message: string;
isError: boolean;
}

const STATUS_ICON: Record<string, string> = {
pending: '○',
in_progress: '◉',
Expand All @@ -29,6 +38,50 @@ const STATUS_COLOR: Record<string, string> = {
failed: 'red',
};

// Helper to format session events for display
function formatSessionEvent(taskId: string, event: SessionEventData): DisplayEvent {
const time = new Date(event.timestamp).toLocaleTimeString();
let message = '';
let isError = false;

switch (event.type) {
case 'tool.execution_start':
message = `Tool started: ${(event.data as { toolName?: string }).toolName || 'unknown'}`;
break;
case 'tool.execution_progress':
message = `Progress: ${(event.data as { progressMessage?: string }).progressMessage || '...'}`;
break;
case 'tool.execution_complete':
const completionData = event.data as { success?: boolean; toolName?: string; error?: { message?: string } };
if (completionData.success === false) {
message = `Tool failed: ${completionData.error?.message || 'unknown error'}`;
isError = true;
} else {
message = `Tool completed successfully`;
}
break;
case 'session.error':
message = `Error: ${(event.data as { message?: string }).message || 'unknown error'}`;
isError = true;
break;
case 'assistant.usage':
const usage = event.data as { inputTokens?: number; outputTokens?: number; model?: string };
message = `Token usage — Model: ${usage.model || 'unknown'}, In: ${usage.inputTokens || 0}, Out: ${usage.outputTokens || 0}`;
break;
default:
// Show other events with minimal formatting
message = event.type;
}

return {
taskId,
type: event.type,
timestamp: time,
message,
isError,
};
}

export default function ExecuteScreen({
plan,
codebaseContext,
Expand All @@ -46,6 +99,8 @@ export default function ExecuteScreen({
const [runCount, setRunCount] = useState(0); // incremented to re-trigger execution
const execHandleRef = useRef<ExecutionHandle | null>(null);
const [summarized, setSummarized] = useState('');
const [eventLog, setEventLog] = useState<DisplayEvent[]>([]);
const [showEventLog, setShowEventLog] = useState(false);

const { batches } = computeBatches(plan.tasks);
// Total display batches: init batch (index 0) + real batches
Expand Down Expand Up @@ -110,6 +165,10 @@ export default function ExecuteScreen({
setTimeout(() => setSummarized(''), 3000);
});
}
// Toggle event log
if (ch === 'e' && started) {
setShowEventLog((prev) => !prev);
}
if (key.leftArrow) {
setViewBatchIndex((i) => Math.max(0, i - 1));
setSelectedTaskIndex(0);
Expand Down Expand Up @@ -195,6 +254,14 @@ export default function ExecuteScreen({
}
// Otherwise stay on execute screen — user can press 'r' to retry
},
onSessionEvent: (taskId, event) => {
const displayEvent = formatSessionEvent(taskId, event);
// Keep only the most recent 100 events to prevent unbounded memory growth
setEventLog((prev) => {
const updated = [...prev, displayEvent];
return updated.length > 100 ? updated.slice(-100) : updated;
});
},
}, execOptions);

execHandleRef.current = handle;
Expand Down Expand Up @@ -401,17 +468,55 @@ export default function ExecuteScreen({
</Box>
)}

{/* Event Log */}
{showEventLog && eventLog.length > 0 && (
<Box flexDirection="column" marginBottom={1} borderStyle="single" borderColor="cyan" paddingX={1}>
<Box marginBottom={0}>
<Text color="cyan" bold>Session Event Log</Text>
<Text color="gray"> ({eventLog.length} events)</Text>
</Box>
{(() => {
const maxEvents = 8;
const visible = eventLog.slice(-maxEvents);
const truncated = eventLog.length > maxEvents;
return (
<>
{truncated && (
<Text color="gray" dimColor>··· {eventLog.length - maxEvents} earlier events ···</Text>
)}
{visible.map((evt, i) => (
<Box key={i}>
<Text color="gray">[{evt.timestamp}] </Text>
<Text color="cyan">{evt.taskId}: </Text>
<Text color={evt.isError ? 'red' : 'white'}>{evt.message}</Text>
</Box>
))}
</>
);
})()}
</Box>
)}

{/* Event log hint */}
{started && !showEventLog && eventLog.length > 0 && (
<Box marginBottom={1}>
<Text color="cyan">Press </Text>
<Text color="green" bold>e</Text>
<Text color="cyan"> to view session event log ({eventLog.length} events)</Text>
</Box>
)}

<StatusBar
screen="Execute"
hint={
executing && failedCount > 0
? '←→: switch batch ↑↓: select task r: retry task ⏳ executing...'
? '←→: switch batch ↑↓: select task r: retry task e: events ⏳ executing...'
: executing
? '←→: switch batch ↑↓: select task ⏳ executing...'
? '←→: switch batch ↑↓: select task e: events ⏳ executing...'
: started && failedCount > 0
? '←→: switch batch ↑↓: select task r: retry z: summarize esc: back'
? '←→: switch batch ↑↓: select task r: retry e: events z: summarize esc: back'
: started
? '←→: switch batch ↑↓: select task z: summarize esc: back'
? '←→: switch batch ↑↓: select task e: events z: summarize esc: back'
: 'x: start esc: back'
}
/>
Expand Down
60 changes: 60 additions & 0 deletions src/services/copilot.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import { describe, it, expect, vi } from 'vitest';
import type { SessionEventData } from './copilot.js';

describe('SessionEventData', () => {
it('should have correct type structure', () => {
const mockEvent: SessionEventData = {
type: 'tool.execution_start',
timestamp: new Date().toISOString(),
data: { toolName: 'test-tool' },
};

expect(mockEvent.type).toBe('tool.execution_start');
expect(mockEvent.timestamp).toBeDefined();
expect(mockEvent.data).toBeDefined();
});

it('should handle tool.execution_complete events', () => {
const mockEvent: SessionEventData = {
type: 'tool.execution_complete',
timestamp: new Date().toISOString(),
data: {
toolCallId: 'test-123',
success: true,
result: { content: 'Task completed' },
},
};

expect(mockEvent.type).toBe('tool.execution_complete');
expect(mockEvent.data).toHaveProperty('success');
});

it('should handle session.error events', () => {
const mockEvent: SessionEventData = {
type: 'session.error',
timestamp: new Date().toISOString(),
data: {
errorType: 'timeout',
message: 'Request timed out',
},
};

expect(mockEvent.type).toBe('session.error');
expect((mockEvent.data as { message: string }).message).toBe('Request timed out');
});

it('should handle assistant.usage events', () => {
const mockEvent: SessionEventData = {
type: 'assistant.usage',
timestamp: new Date().toISOString(),
data: {
model: 'claude-sonnet-4',
inputTokens: 100,
outputTokens: 50,
},
};

expect(mockEvent.type).toBe('assistant.usage');
expect((mockEvent.data as { inputTokens: number }).inputTokens).toBe(100);
});
});
29 changes: 28 additions & 1 deletion src/services/copilot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,17 @@ export async function stopClient(): Promise<void> {
}
}

export interface SessionEventData {
type: string;
timestamp: string;
data: unknown;
}

export interface StreamCallbacks {
onDelta: (text: string) => void;
onDone: (fullText: string) => void;
onError: (error: Error) => void;
onSessionEvent?: (event: SessionEventData) => void;
}

export async function sendPrompt(
Expand Down Expand Up @@ -137,6 +144,20 @@ export async function sendPrompt(
let fullText = '';
let settled = false;

// Subscribe to all session events if callback provided.
// We capture all events rather than specific types to ensure comprehensive monitoring
// of SDK behavior (tool executions, progress updates, token usage, errors, etc.).
// The UI layer filters and formats events for display.
if (callbacks.onSessionEvent) {
session.on((event) => {
callbacks.onSessionEvent?.({
type: event.type,
timestamp: event.timestamp,
data: event.data,
});
});
}

session.on('assistant.message_delta', (event: { data: { deltaContent: string } }) => {
fullText += event.data.deltaContent;
callbacks.onDelta(event.data.deltaContent);
Expand Down Expand Up @@ -176,10 +197,15 @@ export async function sendPrompt(
export async function sendPromptSync(
systemPrompt: string,
messages: ChatMessage[],
options?: { timeoutMs?: number; onDelta?: (delta: string, fullText: string) => void },
options?: {
timeoutMs?: number;
onDelta?: (delta: string, fullText: string) => void;
onSessionEvent?: (event: SessionEventData) => void;
},
): Promise<string> {
const idleTimeoutMs = options?.timeoutMs ?? 120_000;
const onDelta = options?.onDelta;
const onSessionEvent = options?.onSessionEvent;

return new Promise((resolve, reject) => {
let settled = false;
Expand Down Expand Up @@ -235,6 +261,7 @@ export async function sendPromptSync(
reject(err);
}
},
onSessionEvent,
});
});
}
Loading