AutoMaker-Org · gsxdsm · Feb 25, 2026 · Feb 24, 2026 · Feb 25, 2026 · Feb 25, 2026
diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml
@@ -46,7 +46,8 @@ jobs:
           echo "SERVER_PID=$SERVER_PID" >> $GITHUB_ENV
 
         env:
-          PORT: 3008
+          PORT: 3108
+          TEST_SERVER_PORT: 3108
           NODE_ENV: test
           # Use a deterministic API key so Playwright can log in reliably
           AUTOMAKER_API_KEY: test-api-key-for-e2e-tests
@@ -81,13 +82,13 @@ jobs:
 
           # Wait for health endpoint
           for i in {1..60}; do
-            if curl -s -f http://localhost:3008/api/health > /dev/null 2>&1; then
+            if curl -s -f http://localhost:3108/api/health > /dev/null 2>&1; then
               echo "Backend server is ready!"
               echo "=== Backend logs ==="
               cat backend.log
               echo ""
               echo "Health check response:"
-              curl -s http://localhost:3008/api/health | jq . 2>/dev/null || echo "Health check: $(curl -s http://localhost:3008/api/health 2>/dev/null || echo 'No response')"
+              curl -s http://localhost:3108/api/health | jq . 2>/dev/null || echo "Health check: $(curl -s http://localhost:3108/api/health 2>/dev/null || echo 'No response')"
               exit 0
             fi
 
@@ -111,11 +112,11 @@ jobs:
           ps aux | grep -E "(node|tsx)" | grep -v grep || echo "No node processes found"
           echo ""
           echo "=== Port status ==="
-          netstat -tlnp 2>/dev/null | grep :3008 || echo "Port 3008 not listening"
-          lsof -i :3008 2>/dev/null || echo "lsof not available or port not in use"
+          netstat -tlnp 2>/dev/null | grep :3108 || echo "Port 3108 not listening"
+          lsof -i :3108 2>/dev/null || echo "lsof not available or port not in use"
           echo ""
           echo "=== Health endpoint test ==="
-          curl -v http://localhost:3008/api/health 2>&1 || echo "Health endpoint failed"
+          curl -v http://localhost:3108/api/health 2>&1 || echo "Health endpoint failed"
 
           # Kill the server process if it's still hanging
           if kill -0 $SERVER_PID 2>/dev/null; then
@@ -132,8 +133,8 @@ jobs:
         run: npm run test --workspace=apps/ui
         env:
           CI: true
-          VITE_SERVER_URL: http://localhost:3008
-          SERVER_URL: http://localhost:3008
+          VITE_SERVER_URL: http://localhost:3108
+          SERVER_URL: http://localhost:3108
           VITE_SKIP_SETUP: 'true'
           # Keep UI-side login/defaults consistent
           AUTOMAKER_API_KEY: test-api-key-for-e2e-tests
@@ -148,7 +149,7 @@ jobs:
           ps aux | grep -E "(node|tsx)" | grep -v grep || echo "No node processes found"
           echo ""
           echo "=== Port status ==="
-          netstat -tlnp 2>/dev/null | grep :3008 || echo "Port 3008 not listening"
+          netstat -tlnp 2>/dev/null | grep :3108 || echo "Port 3108 not listening"
 
       - name: Upload Playwright report
         uses: actions/upload-artifact@v4

diff --git a/apps/server/.env.example b/apps/server/.env.example
@@ -52,6 +52,12 @@ HOST=0.0.0.0
 # Port to run the server on
 PORT=3008
 
+# Port to run the server on for testing
+TEST_SERVER_PORT=3108
+
+# Port to run the UI on for testing
+TEST_PORT=3107
+
 # Data directory for sessions and metadata
 DATA_DIR=./data
 

diff --git a/apps/server/src/providers/copilot-provider.ts b/apps/server/src/providers/copilot-provider.ts
@@ -389,9 +389,14 @@ export class CopilotProvider extends CliProvider {
 
       case 'session.error': {
         const errorEvent = sdkEvent as SdkSessionErrorEvent;
+        const enrichedError =
+          errorEvent.data.message ||
+          (errorEvent.data.code
+            ? `Copilot agent error (code: ${errorEvent.data.code})`
+            : 'Copilot agent error');
         return {
           type: 'error',
-          error: errorEvent.data.message || 'Unknown error',
+          error: enrichedError,
         };
       }
 

diff --git a/apps/server/src/providers/cursor-provider.ts b/apps/server/src/providers/cursor-provider.ts
@@ -562,10 +562,14 @@ export class CursorProvider extends CliProvider {
         const resultEvent = cursorEvent as CursorResultEvent;
 
         if (resultEvent.is_error) {
+          const errorText = resultEvent.error || resultEvent.result || '';
+          const enrichedError =
+            errorText ||
+            `Cursor agent failed (duration: ${resultEvent.duration_ms}ms, subtype: ${resultEvent.subtype}, session: ${resultEvent.session_id ?? 'none'})`;
           return {
             type: 'error',
             session_id: resultEvent.session_id,
-            error: resultEvent.error || resultEvent.result || 'Unknown error',
+            error: enrichedError,
           };
         }
 

diff --git a/apps/server/src/providers/gemini-provider.ts b/apps/server/src/providers/gemini-provider.ts
@@ -381,10 +381,13 @@ export class GeminiProvider extends CliProvider {
         const resultEvent = geminiEvent as GeminiResultEvent;
 
         if (resultEvent.status === 'error') {
+          const enrichedError =
+            resultEvent.error ||
+            `Gemini agent failed (duration: ${resultEvent.stats?.duration_ms ?? 'unknown'}ms, session: ${resultEvent.session_id ?? 'none'})`;
           return {
             type: 'error',
             session_id: resultEvent.session_id,
-            error: resultEvent.error || 'Unknown error',
+            error: enrichedError,
           };
         }
 
@@ -401,10 +404,12 @@ export class GeminiProvider extends CliProvider {
 
       case 'error': {
         const errorEvent = geminiEvent as GeminiResultEvent;
+        const enrichedError =
+          errorEvent.error || `Gemini agent failed (session: ${errorEvent.session_id ?? 'none'})`;
         return {
           type: 'error',
           session_id: errorEvent.session_id,
-          error: errorEvent.error || 'Unknown error',
+          error: enrichedError,
         };
       }
 

diff --git a/apps/server/src/services/agent-executor.ts b/apps/server/src/services/agent-executor.ts
@@ -296,8 +296,28 @@ export class AgentExecutor {
             }
           }
         } else if (msg.type === 'error') {
-          throw new Error(AgentExecutor.sanitizeProviderError(msg.error));
-        } else if (msg.type === 'result' && msg.subtype === 'success') scheduleWrite();
+          const sanitized = AgentExecutor.sanitizeProviderError(msg.error);
+          logger.error(
+            `[execute] Feature ${featureId} received error from provider. ` +
+              `raw="${msg.error}", sanitized="${sanitized}", session_id=${msg.session_id ?? 'none'}`
+          );
+          throw new Error(sanitized);
+        } else if (msg.type === 'result') {
+          if (msg.subtype === 'success') {
+            scheduleWrite();
+          } else if (msg.subtype?.startsWith('error')) {
+            // Non-success result subtypes from the SDK (error_max_turns, error_during_execution, etc.)
+            logger.error(
+              `[execute] Feature ${featureId} ended with error subtype: ${msg.subtype}. ` +
+                `session_id=${msg.session_id ?? 'none'}`
+            );
+            throw new Error(`Agent execution ended with: ${msg.subtype}`);
+          } else {
+            logger.warn(
+              `[execute] Feature ${featureId} received unhandled result subtype: ${msg.subtype}`
+            );
+          }
+        }
       }
     } finally {
       clearInterval(streamHeartbeat);
@@ -447,16 +467,28 @@ export class AgentExecutor {
               });
           }
         } else if (msg.type === 'error') {
-          // Clean the error: strip ANSI codes and redundant "Error: " prefix
-          const cleanedError =
-            (msg.error || `Error during task ${task.id}`)
-              .replace(/\x1b\[[0-9;]*m/g, '')
-              .replace(/^Error:\s*/i, '')
-              .trim() || `Error during task ${task.id}`;
-          throw new Error(cleanedError);
-        } else if (msg.type === 'result' && msg.subtype === 'success') {
-          taskOutput += msg.result || '';
-          responseText += msg.result || '';
+          const fallback = `Error during task ${task.id}`;
+          const sanitized = AgentExecutor.sanitizeProviderError(msg.error || fallback);
+          logger.error(
+            `[executeTasksLoop] Feature ${featureId} task ${task.id} received error from provider. ` +
+              `raw="${msg.error}", sanitized="${sanitized}", session_id=${msg.session_id ?? 'none'}`
+          );
+          throw new Error(sanitized);
+        } else if (msg.type === 'result') {
+          if (msg.subtype === 'success') {
+            taskOutput += msg.result || '';
+            responseText += msg.result || '';
+          } else if (msg.subtype?.startsWith('error')) {
+            logger.error(
+              `[executeTasksLoop] Feature ${featureId} task ${task.id} ended with error subtype: ${msg.subtype}. ` +
+                `session_id=${msg.session_id ?? 'none'}`
+            );
+            throw new Error(`Agent execution ended with: ${msg.subtype}`);
+          } else {
+            logger.warn(
+              `[executeTasksLoop] Feature ${featureId} task ${task.id} received unhandled result subtype: ${msg.subtype}`
+            );
+          }
         }
       }
       if (!taskCompleteDetected)

diff --git a/apps/server/src/services/execution-service.ts b/apps/server/src/services/execution-service.ts
@@ -60,6 +60,12 @@ import type {
 
 const logger = createLogger('ExecutionService');
 
+/** Marker written by agent-executor for each tool invocation. */
+const TOOL_USE_MARKER = '🔧 Tool:';
+
+/** Minimum trimmed output length to consider agent work meaningful. */
+const MIN_MEANINGFUL_OUTPUT_LENGTH = 200;
+
 export class ExecutionService {
   constructor(
     private eventBus: TypedEventBus,
@@ -409,7 +415,41 @@ Please continue from where you left off and complete all remaining tasks. Use th
         }
       }
 
-      const finalStatus = feature.skipTests ? 'waiting_approval' : 'verified';
+      // Read agent output before determining final status.
+      // CLI-based providers (Cursor, Codex, etc.) may exit quickly without doing
+      // meaningful work. Check output to avoid prematurely marking as 'verified'.
+      const outputPath = path.join(getFeatureDir(projectPath, featureId), 'agent-output.md');
+      let agentOutput = '';
+      try {
+        agentOutput = (await secureFs.readFile(outputPath, 'utf-8')) as string;
+      } catch {
+        /* */
+      }
+
+      // Determine if the agent did meaningful work by checking for tool usage
+      // indicators in the output. The agent executor writes "🔧 Tool:" markers
+      // each time a tool is invoked. No tool usage suggests the CLI exited
+      // without performing implementation work.
+      const hasToolUsage = agentOutput.includes(TOOL_USE_MARKER);
+      const isOutputTooShort = agentOutput.trim().length < MIN_MEANINGFUL_OUTPUT_LENGTH;
+      const agentDidWork = hasToolUsage && !isOutputTooShort;
+
+      let finalStatus: 'verified' | 'waiting_approval';
+      if (feature.skipTests) {
+        finalStatus = 'waiting_approval';
+      } else if (!agentDidWork) {
+        // Agent didn't produce meaningful output (e.g., CLI exited quickly).
+        // Route to waiting_approval so the user can review and re-run.
+        finalStatus = 'waiting_approval';
+        logger.warn(
+          `[executeFeature] Feature ${featureId}: agent produced insufficient output ` +
+            `(${agentOutput.trim().length}/${MIN_MEANINGFUL_OUTPUT_LENGTH} chars, toolUsage=${hasToolUsage}). ` +
+            `Setting status to waiting_approval instead of verified.`
+        );
+      } else {
+        finalStatus = 'verified';
+      }
+
       await this.updateFeatureStatusFn(projectPath, featureId, finalStatus);
       this.recordSuccessFn();
 
@@ -421,13 +461,6 @@ Please continue from where you left off and complete all remaining tasks. Use th
       const hasIncompleteTasks = totalTasks > 0 && completedTasks < totalTasks;
 
       try {
-        const outputPath = path.join(getFeatureDir(projectPath, featureId), 'agent-output.md');
-        let agentOutput = '';
-        try {
-          agentOutput = (await secureFs.readFile(outputPath, 'utf-8')) as string;
-        } catch {
-          /* */
-        }
         if (agentOutput) {
           const summary = extractSummary(agentOutput);
           if (summary) await this.saveFeatureSummaryFn(projectPath, featureId, summary);

diff --git a/apps/server/tests/unit/lib/thinking-level-normalization.test.ts b/apps/server/tests/unit/lib/thinking-level-normalization.test.ts
@@ -0,0 +1,20 @@
+import { describe, it, expect } from 'vitest';
+import { normalizeThinkingLevelForModel } from '@automaker/types';
+
+describe('normalizeThinkingLevelForModel', () => {
+  it('preserves explicitly selected none for Opus models', () => {
+    expect(normalizeThinkingLevelForModel('claude-opus', 'none')).toBe('none');
+  });
+
+  it('falls back to none when Opus receives an unsupported manual thinking level', () => {
+    expect(normalizeThinkingLevelForModel('claude-opus', 'medium')).toBe('none');
+  });
+
+  it('keeps adaptive for Opus when adaptive is selected', () => {
+    expect(normalizeThinkingLevelForModel('claude-opus', 'adaptive')).toBe('adaptive');
+  });
+
+  it('preserves supported manual levels for non-Opus models', () => {
+    expect(normalizeThinkingLevelForModel('claude-sonnet', 'high')).toBe('high');
+  });
+});
diff --git a/apps/server/tests/unit/providers/copilot-provider.test.ts b/apps/server/tests/unit/providers/copilot-provider.test.ts
@@ -397,6 +397,45 @@ describe('copilot-provider.ts', () => {
       });
     });
 
+    it('should use error code in fallback when session.error message is empty', () => {
+      const event = {
+        type: 'session.error',
+        data: { message: '', code: 'RATE_LIMIT_EXCEEDED' },
+      };
+
+      const result = provider.normalizeEvent(event);
+      expect(result).not.toBeNull();
+      expect(result!.type).toBe('error');
+      expect(result!.error).toContain('RATE_LIMIT_EXCEEDED');
+      expect(result!.error).not.toBe('Unknown error');
+    });
+
+    it('should return generic "Copilot agent error" fallback when both message and code are empty', () => {
+      const event = {
+        type: 'session.error',
+        data: { message: '', code: '' },
+      };
+
+      const result = provider.normalizeEvent(event);
+      expect(result).not.toBeNull();
+      expect(result!.type).toBe('error');
+      expect(result!.error).toBe('Copilot agent error');
+      // Must NOT be the old opaque 'Unknown error'
+      expect(result!.error).not.toBe('Unknown error');
+    });
+
+    it('should return generic "Copilot agent error" fallback when data has no code field', () => {
+      const event = {
+        type: 'session.error',
+        data: { message: '' },
+      };
+
+      const result = provider.normalizeEvent(event);
+      expect(result).not.toBeNull();
+      expect(result!.type).toBe('error');
+      expect(result!.error).toBe('Copilot agent error');
+    });
+
     it('should return null for unknown event types', () => {
       const event = { type: 'unknown.event' };