-
Notifications
You must be signed in to change notification settings - Fork 576
Fix agent output validation to prevent false verified status #807
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
cb1da29
7014517
d4e74a1
bc38439
94c9f08
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -296,8 +296,28 @@ export class AgentExecutor { | |
| } | ||
| } | ||
| } else if (msg.type === 'error') { | ||
| throw new Error(AgentExecutor.sanitizeProviderError(msg.error)); | ||
| } else if (msg.type === 'result' && msg.subtype === 'success') scheduleWrite(); | ||
| const sanitized = AgentExecutor.sanitizeProviderError(msg.error); | ||
| logger.error( | ||
| `[execute] Feature ${featureId} received error from provider. ` + | ||
| `raw="${msg.error}", sanitized="${sanitized}", session_id=${msg.session_id ?? 'none'}` | ||
| ); | ||
|
Comment on lines
+299
to
+303
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Stop logging raw provider error payloads. Line 301 and Line 473 include 🔧 Suggested patch- logger.error(
- `[execute] Feature ${featureId} received error from provider. ` +
- `raw="${msg.error}", sanitized="${sanitized}", session_id=${msg.session_id ?? 'none'}`
- );
+ logger.error(
+ `[execute] Feature ${featureId} received error from provider. ` +
+ `error="${sanitized}", session_id=${msg.session_id ?? 'none'}`
+ );- logger.error(
- `[executeTasksLoop] Feature ${featureId} task ${task.id} received error from provider. ` +
- `raw="${msg.error}", sanitized="${sanitized}", session_id=${msg.session_id ?? 'none'}`
- );
+ logger.error(
+ `[executeTasksLoop] Feature ${featureId} task ${task.id} received error from provider. ` +
+ `error="${sanitized}", session_id=${msg.session_id ?? 'none'}`
+ );Also applies to: 473-475 🤖 Prompt for AI Agents |
||
| throw new Error(sanitized); | ||
| } else if (msg.type === 'result') { | ||
| if (msg.subtype === 'success') { | ||
| scheduleWrite(); | ||
| } else if (msg.subtype?.startsWith('error')) { | ||
| // Non-success result subtypes from the SDK (error_max_turns, error_during_execution, etc.) | ||
| logger.error( | ||
| `[execute] Feature ${featureId} ended with error subtype: ${msg.subtype}. ` + | ||
| `session_id=${msg.session_id ?? 'none'}` | ||
| ); | ||
| throw new Error(`Agent execution ended with: ${msg.subtype}`); | ||
| } else { | ||
|
Comment on lines
+308
to
+315
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Preserve provider error detail for Line 314 and Line 486 throw only the subtype string. If 🔧 Suggested patch- throw new Error(`Agent execution ended with: ${msg.subtype}`);
+ const detail = AgentExecutor.sanitizeProviderError(msg.error);
+ const suffix = detail !== 'Unknown error' ? ` - ${detail}` : '';
+ throw new Error(`Agent execution ended with: ${msg.subtype}${suffix}`);- throw new Error(`Agent execution ended with: ${msg.subtype}`);
+ const detail = AgentExecutor.sanitizeProviderError(msg.error);
+ const suffix = detail !== 'Unknown error' ? ` - ${detail}` : '';
+ throw new Error(`Agent execution ended with: ${msg.subtype}${suffix}`);Also applies to: 481-487 🤖 Prompt for AI Agents |
||
| logger.warn( | ||
| `[execute] Feature ${featureId} received unhandled result subtype: ${msg.subtype}` | ||
| ); | ||
| } | ||
| } | ||
| } | ||
| } finally { | ||
| clearInterval(streamHeartbeat); | ||
|
|
@@ -447,16 +467,28 @@ export class AgentExecutor { | |
| }); | ||
| } | ||
| } else if (msg.type === 'error') { | ||
| // Clean the error: strip ANSI codes and redundant "Error: " prefix | ||
| const cleanedError = | ||
| (msg.error || `Error during task ${task.id}`) | ||
| .replace(/\x1b\[[0-9;]*m/g, '') | ||
| .replace(/^Error:\s*/i, '') | ||
| .trim() || `Error during task ${task.id}`; | ||
| throw new Error(cleanedError); | ||
| } else if (msg.type === 'result' && msg.subtype === 'success') { | ||
| taskOutput += msg.result || ''; | ||
| responseText += msg.result || ''; | ||
| const fallback = `Error during task ${task.id}`; | ||
| const sanitized = AgentExecutor.sanitizeProviderError(msg.error || fallback); | ||
| logger.error( | ||
| `[executeTasksLoop] Feature ${featureId} task ${task.id} received error from provider. ` + | ||
| `raw="${msg.error}", sanitized="${sanitized}", session_id=${msg.session_id ?? 'none'}` | ||
| ); | ||
| throw new Error(sanitized); | ||
| } else if (msg.type === 'result') { | ||
| if (msg.subtype === 'success') { | ||
| taskOutput += msg.result || ''; | ||
| responseText += msg.result || ''; | ||
| } else if (msg.subtype?.startsWith('error')) { | ||
| logger.error( | ||
| `[executeTasksLoop] Feature ${featureId} task ${task.id} ended with error subtype: ${msg.subtype}. ` + | ||
| `session_id=${msg.session_id ?? 'none'}` | ||
| ); | ||
| throw new Error(`Agent execution ended with: ${msg.subtype}`); | ||
| } else { | ||
| logger.warn( | ||
| `[executeTasksLoop] Feature ${featureId} task ${task.id} received unhandled result subtype: ${msg.subtype}` | ||
| ); | ||
| } | ||
| } | ||
| } | ||
| if (!taskCompleteDetected) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| import { describe, it, expect } from 'vitest'; | ||
| import { normalizeThinkingLevelForModel } from '@automaker/types'; | ||
|
|
||
| describe('normalizeThinkingLevelForModel', () => { | ||
| it('preserves explicitly selected none for Opus models', () => { | ||
| expect(normalizeThinkingLevelForModel('claude-opus', 'none')).toBe('none'); | ||
| }); | ||
|
|
||
| it('falls back to none when Opus receives an unsupported manual thinking level', () => { | ||
| expect(normalizeThinkingLevelForModel('claude-opus', 'medium')).toBe('none'); | ||
| }); | ||
|
|
||
| it('keeps adaptive for Opus when adaptive is selected', () => { | ||
| expect(normalizeThinkingLevelForModel('claude-opus', 'adaptive')).toBe('adaptive'); | ||
| }); | ||
|
|
||
| it('preserves supported manual levels for non-Opus models', () => { | ||
| expect(normalizeThinkingLevelForModel('claude-sonnet', 'high')).toBe('high'); | ||
| }); | ||
| }); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Enriched
session.errortext can be bypassed at runtime.Line 392-Line 399 improves normalization, but
executeQuerystill throws from the event handler usingnew Error(errorEvent.data.message)before normalized events are consumed. Emptymessagestill loses your new fallback.🔧 Suggested patch (align throw-path with enrichment)
🤖 Prompt for AI Agents