Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 10 additions & 9 deletions .github/workflows/e2e-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ jobs:
echo "SERVER_PID=$SERVER_PID" >> $GITHUB_ENV

env:
PORT: 3008
PORT: 3108
TEST_SERVER_PORT: 3108
NODE_ENV: test
# Use a deterministic API key so Playwright can log in reliably
AUTOMAKER_API_KEY: test-api-key-for-e2e-tests
Expand Down Expand Up @@ -81,13 +82,13 @@ jobs:

# Wait for health endpoint
for i in {1..60}; do
if curl -s -f http://localhost:3008/api/health > /dev/null 2>&1; then
if curl -s -f http://localhost:3108/api/health > /dev/null 2>&1; then
echo "Backend server is ready!"
echo "=== Backend logs ==="
cat backend.log
echo ""
echo "Health check response:"
curl -s http://localhost:3008/api/health | jq . 2>/dev/null || echo "Health check: $(curl -s http://localhost:3008/api/health 2>/dev/null || echo 'No response')"
curl -s http://localhost:3108/api/health | jq . 2>/dev/null || echo "Health check: $(curl -s http://localhost:3108/api/health 2>/dev/null || echo 'No response')"
exit 0
fi

Expand All @@ -111,11 +112,11 @@ jobs:
ps aux | grep -E "(node|tsx)" | grep -v grep || echo "No node processes found"
echo ""
echo "=== Port status ==="
netstat -tlnp 2>/dev/null | grep :3008 || echo "Port 3008 not listening"
lsof -i :3008 2>/dev/null || echo "lsof not available or port not in use"
netstat -tlnp 2>/dev/null | grep :3108 || echo "Port 3108 not listening"
lsof -i :3108 2>/dev/null || echo "lsof not available or port not in use"
echo ""
echo "=== Health endpoint test ==="
curl -v http://localhost:3008/api/health 2>&1 || echo "Health endpoint failed"
curl -v http://localhost:3108/api/health 2>&1 || echo "Health endpoint failed"

# Kill the server process if it's still hanging
if kill -0 $SERVER_PID 2>/dev/null; then
Expand All @@ -132,8 +133,8 @@ jobs:
run: npm run test --workspace=apps/ui
env:
CI: true
VITE_SERVER_URL: http://localhost:3008
SERVER_URL: http://localhost:3008
VITE_SERVER_URL: http://localhost:3108
SERVER_URL: http://localhost:3108
VITE_SKIP_SETUP: 'true'
# Keep UI-side login/defaults consistent
AUTOMAKER_API_KEY: test-api-key-for-e2e-tests
Expand All @@ -148,7 +149,7 @@ jobs:
ps aux | grep -E "(node|tsx)" | grep -v grep || echo "No node processes found"
echo ""
echo "=== Port status ==="
netstat -tlnp 2>/dev/null | grep :3008 || echo "Port 3008 not listening"
netstat -tlnp 2>/dev/null | grep :3108 || echo "Port 3108 not listening"

- name: Upload Playwright report
uses: actions/upload-artifact@v4
Expand Down
6 changes: 6 additions & 0 deletions apps/server/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ HOST=0.0.0.0
# Port to run the server on
PORT=3008

# Port to run the server on for testing
TEST_SERVER_PORT=3108

# Port to run the UI on for testing
TEST_PORT=3107

# Data directory for sessions and metadata
DATA_DIR=./data

Expand Down
7 changes: 6 additions & 1 deletion apps/server/src/providers/copilot-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -389,9 +389,14 @@ export class CopilotProvider extends CliProvider {

case 'session.error': {
const errorEvent = sdkEvent as SdkSessionErrorEvent;
const enrichedError =
errorEvent.data.message ||
(errorEvent.data.code
? `Copilot agent error (code: ${errorEvent.data.code})`
: 'Copilot agent error');
return {
type: 'error',
error: errorEvent.data.message || 'Unknown error',
error: enrichedError,
Comment on lines +392 to +399
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Enriched session.error text can be bypassed at runtime.

Line 392-Line 399 improves normalization, but executeQuery still throws from the event handler using new Error(errorEvent.data.message) before normalized events are consumed. Empty message still loses your new fallback.

🔧 Suggested patch (align throw-path with enrichment)
-        } else if (event.type === 'session.error') {
-          const errorEvent = event as SdkSessionErrorEvent;
-          sessionError = new Error(errorEvent.data.message);
+        } else if (event.type === 'session.error') {
+          const errorEvent = event as SdkSessionErrorEvent;
+          const enrichedError =
+            errorEvent.data.message ||
+            (errorEvent.data.code
+              ? `Copilot agent error (code: ${errorEvent.data.code})`
+              : 'Copilot agent error');
+          sessionError = new Error(enrichedError);
           sessionComplete = true;
           pushEvent(event);
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@apps/server/src/providers/copilot-provider.ts` around lines 392 - 399, The
error throw in executeQuery still uses the raw event text (new
Error(errorEvent.data.message)) and can bypass the enriched fallback; locate the
throw inside executeQuery's event handler that references
errorEvent.data.message and change it to use the same normalized value as the
enrichment logic (use the enrichedError value or build the same fallback string
using errorEvent.data.code when message is falsy) so thrown Errors always
contain the enriched message that session.error receives.

};
}

Expand Down
6 changes: 5 additions & 1 deletion apps/server/src/providers/cursor-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -562,10 +562,14 @@ export class CursorProvider extends CliProvider {
const resultEvent = cursorEvent as CursorResultEvent;

if (resultEvent.is_error) {
const errorText = resultEvent.error || resultEvent.result || '';
const enrichedError =
errorText ||
`Cursor agent failed (duration: ${resultEvent.duration_ms}ms, subtype: ${resultEvent.subtype}, session: ${resultEvent.session_id ?? 'none'})`;
return {
type: 'error',
session_id: resultEvent.session_id,
error: resultEvent.error || resultEvent.result || 'Unknown error',
error: enrichedError,
};
}

Expand Down
9 changes: 7 additions & 2 deletions apps/server/src/providers/gemini-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -381,10 +381,13 @@ export class GeminiProvider extends CliProvider {
const resultEvent = geminiEvent as GeminiResultEvent;

if (resultEvent.status === 'error') {
const enrichedError =
resultEvent.error ||
`Gemini agent failed (duration: ${resultEvent.stats?.duration_ms ?? 'unknown'}ms, session: ${resultEvent.session_id ?? 'none'})`;
return {
type: 'error',
session_id: resultEvent.session_id,
error: resultEvent.error || 'Unknown error',
error: enrichedError,
};
}

Expand All @@ -401,10 +404,12 @@ export class GeminiProvider extends CliProvider {

case 'error': {
const errorEvent = geminiEvent as GeminiResultEvent;
const enrichedError =
errorEvent.error || `Gemini agent failed (session: ${errorEvent.session_id ?? 'none'})`;
return {
type: 'error',
session_id: errorEvent.session_id,
error: errorEvent.error || 'Unknown error',
error: enrichedError,
};
}

Expand Down
56 changes: 44 additions & 12 deletions apps/server/src/services/agent-executor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -296,8 +296,28 @@ export class AgentExecutor {
}
}
} else if (msg.type === 'error') {
throw new Error(AgentExecutor.sanitizeProviderError(msg.error));
} else if (msg.type === 'result' && msg.subtype === 'success') scheduleWrite();
const sanitized = AgentExecutor.sanitizeProviderError(msg.error);
logger.error(
`[execute] Feature ${featureId} received error from provider. ` +
`raw="${msg.error}", sanitized="${sanitized}", session_id=${msg.session_id ?? 'none'}`
);
Comment on lines +299 to +303
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Stop logging raw provider error payloads.

Line 301 and Line 473 include raw="${msg.error}". Provider errors can contain sensitive content (tokens, file snippets, user data). Log only sanitized output.

🔧 Suggested patch
-          logger.error(
-            `[execute] Feature ${featureId} received error from provider. ` +
-              `raw="${msg.error}", sanitized="${sanitized}", session_id=${msg.session_id ?? 'none'}`
-          );
+          logger.error(
+            `[execute] Feature ${featureId} received error from provider. ` +
+              `error="${sanitized}", session_id=${msg.session_id ?? 'none'}`
+          );
-          logger.error(
-            `[executeTasksLoop] Feature ${featureId} task ${task.id} received error from provider. ` +
-              `raw="${msg.error}", sanitized="${sanitized}", session_id=${msg.session_id ?? 'none'}`
-          );
+          logger.error(
+            `[executeTasksLoop] Feature ${featureId} task ${task.id} received error from provider. ` +
+              `error="${sanitized}", session_id=${msg.session_id ?? 'none'}`
+          );

Also applies to: 473-475

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@apps/server/src/services/agent-executor.ts` around lines 299 - 303, The
current log call in AgentExecutor is printing raw provider errors (msg.error)
which may contain sensitive data; update the logger.error invocations (the one
using featureId, session_id and the other occurrence around lines 473-475) to
remove raw="${msg.error}" and only include the sanitized output returned by
AgentExecutor.sanitizeProviderError(msg.error) along with contextual fields
(featureId, session_id) — i.e., call logger.error with a message that references
the sanitized variable and context, not the raw msg.error, and ensure any other
logger calls in this file that include msg.error are changed the same way.

throw new Error(sanitized);
} else if (msg.type === 'result') {
if (msg.subtype === 'success') {
scheduleWrite();
} else if (msg.subtype?.startsWith('error')) {
// Non-success result subtypes from the SDK (error_max_turns, error_during_execution, etc.)
logger.error(
`[execute] Feature ${featureId} ended with error subtype: ${msg.subtype}. ` +
`session_id=${msg.session_id ?? 'none'}`
);
throw new Error(`Agent execution ended with: ${msg.subtype}`);
} else {
Comment on lines +308 to +315
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Preserve provider error detail for result error subtypes.

Line 314 and Line 486 throw only the subtype string. If msg.error is present, that context is dropped and debugging becomes harder.

🔧 Suggested patch
-            throw new Error(`Agent execution ended with: ${msg.subtype}`);
+            const detail = AgentExecutor.sanitizeProviderError(msg.error);
+            const suffix = detail !== 'Unknown error' ? ` - ${detail}` : '';
+            throw new Error(`Agent execution ended with: ${msg.subtype}${suffix}`);
-            throw new Error(`Agent execution ended with: ${msg.subtype}`);
+            const detail = AgentExecutor.sanitizeProviderError(msg.error);
+            const suffix = detail !== 'Unknown error' ? ` - ${detail}` : '';
+            throw new Error(`Agent execution ended with: ${msg.subtype}${suffix}`);

Also applies to: 481-487

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@apps/server/src/services/agent-executor.ts` around lines 308 - 315, The error
handling branch in agent-executor.ts that checks
msg.subtype?.startsWith('error') (and the similar branch around the other
occurrence) currently throws only the subtype string and logs without including
provider error details; update the logger.error and the thrown Error in the
execute flow (refer to variables featureId, msg.subtype, msg.session_id, and
msg.error) to append or include msg.error when present so the thrown Error and
log message contain both the subtype and the provider error detail for easier
debugging.

logger.warn(
`[execute] Feature ${featureId} received unhandled result subtype: ${msg.subtype}`
);
}
}
}
} finally {
clearInterval(streamHeartbeat);
Expand Down Expand Up @@ -447,16 +467,28 @@ export class AgentExecutor {
});
}
} else if (msg.type === 'error') {
// Clean the error: strip ANSI codes and redundant "Error: " prefix
const cleanedError =
(msg.error || `Error during task ${task.id}`)
.replace(/\x1b\[[0-9;]*m/g, '')
.replace(/^Error:\s*/i, '')
.trim() || `Error during task ${task.id}`;
throw new Error(cleanedError);
} else if (msg.type === 'result' && msg.subtype === 'success') {
taskOutput += msg.result || '';
responseText += msg.result || '';
const fallback = `Error during task ${task.id}`;
const sanitized = AgentExecutor.sanitizeProviderError(msg.error || fallback);
logger.error(
`[executeTasksLoop] Feature ${featureId} task ${task.id} received error from provider. ` +
`raw="${msg.error}", sanitized="${sanitized}", session_id=${msg.session_id ?? 'none'}`
);
throw new Error(sanitized);
} else if (msg.type === 'result') {
if (msg.subtype === 'success') {
taskOutput += msg.result || '';
responseText += msg.result || '';
} else if (msg.subtype?.startsWith('error')) {
logger.error(
`[executeTasksLoop] Feature ${featureId} task ${task.id} ended with error subtype: ${msg.subtype}. ` +
`session_id=${msg.session_id ?? 'none'}`
);
throw new Error(`Agent execution ended with: ${msg.subtype}`);
} else {
logger.warn(
`[executeTasksLoop] Feature ${featureId} task ${task.id} received unhandled result subtype: ${msg.subtype}`
);
}
}
}
if (!taskCompleteDetected)
Expand Down
49 changes: 41 additions & 8 deletions apps/server/src/services/execution-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ import type {

const logger = createLogger('ExecutionService');

/** Marker written by agent-executor for each tool invocation. */
const TOOL_USE_MARKER = '🔧 Tool:';

/** Minimum trimmed output length to consider agent work meaningful. */
const MIN_MEANINGFUL_OUTPUT_LENGTH = 200;

export class ExecutionService {
constructor(
private eventBus: TypedEventBus,
Expand Down Expand Up @@ -409,7 +415,41 @@ Please continue from where you left off and complete all remaining tasks. Use th
}
}

const finalStatus = feature.skipTests ? 'waiting_approval' : 'verified';
// Read agent output before determining final status.
// CLI-based providers (Cursor, Codex, etc.) may exit quickly without doing
// meaningful work. Check output to avoid prematurely marking as 'verified'.
const outputPath = path.join(getFeatureDir(projectPath, featureId), 'agent-output.md');
let agentOutput = '';
try {
agentOutput = (await secureFs.readFile(outputPath, 'utf-8')) as string;
} catch {
/* */
}

// Determine if the agent did meaningful work by checking for tool usage
// indicators in the output. The agent executor writes "🔧 Tool:" markers
// each time a tool is invoked. No tool usage suggests the CLI exited
// without performing implementation work.
const hasToolUsage = agentOutput.includes(TOOL_USE_MARKER);
const isOutputTooShort = agentOutput.trim().length < MIN_MEANINGFUL_OUTPUT_LENGTH;
const agentDidWork = hasToolUsage && !isOutputTooShort;

let finalStatus: 'verified' | 'waiting_approval';
if (feature.skipTests) {
finalStatus = 'waiting_approval';
} else if (!agentDidWork) {
// Agent didn't produce meaningful output (e.g., CLI exited quickly).
// Route to waiting_approval so the user can review and re-run.
finalStatus = 'waiting_approval';
logger.warn(
`[executeFeature] Feature ${featureId}: agent produced insufficient output ` +
`(${agentOutput.trim().length}/${MIN_MEANINGFUL_OUTPUT_LENGTH} chars, toolUsage=${hasToolUsage}). ` +
`Setting status to waiting_approval instead of verified.`
);
} else {
finalStatus = 'verified';
}

await this.updateFeatureStatusFn(projectPath, featureId, finalStatus);
this.recordSuccessFn();

Expand All @@ -421,13 +461,6 @@ Please continue from where you left off and complete all remaining tasks. Use th
const hasIncompleteTasks = totalTasks > 0 && completedTasks < totalTasks;

try {
const outputPath = path.join(getFeatureDir(projectPath, featureId), 'agent-output.md');
let agentOutput = '';
try {
agentOutput = (await secureFs.readFile(outputPath, 'utf-8')) as string;
} catch {
/* */
}
if (agentOutput) {
const summary = extractSummary(agentOutput);
if (summary) await this.saveFeatureSummaryFn(projectPath, featureId, summary);
Expand Down
20 changes: 20 additions & 0 deletions apps/server/tests/unit/lib/thinking-level-normalization.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { describe, it, expect } from 'vitest';
import { normalizeThinkingLevelForModel } from '@automaker/types';

describe('normalizeThinkingLevelForModel', () => {
it('preserves explicitly selected none for Opus models', () => {
expect(normalizeThinkingLevelForModel('claude-opus', 'none')).toBe('none');
});

it('falls back to none when Opus receives an unsupported manual thinking level', () => {
expect(normalizeThinkingLevelForModel('claude-opus', 'medium')).toBe('none');
});

it('keeps adaptive for Opus when adaptive is selected', () => {
expect(normalizeThinkingLevelForModel('claude-opus', 'adaptive')).toBe('adaptive');
});

it('preserves supported manual levels for non-Opus models', () => {
expect(normalizeThinkingLevelForModel('claude-sonnet', 'high')).toBe('high');
});
});
39 changes: 39 additions & 0 deletions apps/server/tests/unit/providers/copilot-provider.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,45 @@ describe('copilot-provider.ts', () => {
});
});

it('should use error code in fallback when session.error message is empty', () => {
const event = {
type: 'session.error',
data: { message: '', code: 'RATE_LIMIT_EXCEEDED' },
};

const result = provider.normalizeEvent(event);
expect(result).not.toBeNull();
expect(result!.type).toBe('error');
expect(result!.error).toContain('RATE_LIMIT_EXCEEDED');
expect(result!.error).not.toBe('Unknown error');
});

it('should return generic "Copilot agent error" fallback when both message and code are empty', () => {
const event = {
type: 'session.error',
data: { message: '', code: '' },
};

const result = provider.normalizeEvent(event);
expect(result).not.toBeNull();
expect(result!.type).toBe('error');
expect(result!.error).toBe('Copilot agent error');
// Must NOT be the old opaque 'Unknown error'
expect(result!.error).not.toBe('Unknown error');
});

it('should return generic "Copilot agent error" fallback when data has no code field', () => {
const event = {
type: 'session.error',
data: { message: '' },
};

const result = provider.normalizeEvent(event);
expect(result).not.toBeNull();
expect(result!.type).toBe('error');
expect(result!.error).toBe('Copilot agent error');
});

it('should return null for unknown event types', () => {
const event = { type: 'unknown.event' };

Expand Down
Loading