Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion apps/server/src/providers/codex-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,14 @@ const TEXT_ENCODING = 'utf-8';
* This is the "no output" timeout - if the CLI doesn't produce any JSONL output
* for this duration, the process is killed. For reasoning models with high
* reasoning effort, this timeout is dynamically extended via calculateReasoningTimeout().
*
* For feature generation (which can generate 50+ features), we use a much longer
* base timeout (5 minutes) since Codex models are slower at generating large JSON responses.
*
* @see calculateReasoningTimeout from @automaker/types
*/
const CODEX_CLI_TIMEOUT_MS = DEFAULT_TIMEOUT_MS;
const CODEX_FEATURE_GENERATION_BASE_TIMEOUT_MS = 300000; // 5 minutes for feature generation
const CONTEXT_WINDOW_256K = 256000;
const MAX_OUTPUT_32K = 32000;
const MAX_OUTPUT_16K = 16000;
Expand Down Expand Up @@ -827,7 +832,14 @@ export class CodexProvider extends BaseProvider {
// Higher reasoning effort (e.g., 'xhigh' for "xtra thinking" mode) requires more time
// for the model to generate reasoning tokens before producing output.
// This fixes GitHub issue #530 where features would get stuck with reasoning models.
const timeout = calculateReasoningTimeout(options.reasoningEffort, CODEX_CLI_TIMEOUT_MS);
//
// For feature generation with 'xhigh', use the extended 5-minute base timeout
// since generating 50+ features takes significantly longer than normal operations.
const baseTimeout =
options.reasoningEffort === 'xhigh'
? CODEX_FEATURE_GENERATION_BASE_TIMEOUT_MS
: CODEX_CLI_TIMEOUT_MS;
const timeout = calculateReasoningTimeout(options.reasoningEffort, baseTimeout);

const stream = spawnJSONLProcess({
command: commandPath,
Expand Down
172 changes: 162 additions & 10 deletions apps/server/src/routes/app-spec/generate-features-from-spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
import * as secureFs from '../../lib/secure-fs.js';
import type { EventEmitter } from '../../lib/events.js';
import { createLogger } from '@automaker/utils';
import { DEFAULT_PHASE_MODELS } from '@automaker/types';
import { DEFAULT_PHASE_MODELS, supportsStructuredOutput, isCodexModel } from '@automaker/types';
import { resolvePhaseModel } from '@automaker/model-resolver';
import { streamingQuery } from '../../providers/simple-query-service.js';
import { parseAndCreateFeatures } from './parse-and-create-features.js';
import { extractJsonWithArray } from '../../lib/json-extractor.js';
import { getAppSpecPath } from '@automaker/platform';
import type { SettingsService } from '../../services/settings-service.js';
import {
Expand All @@ -25,6 +26,64 @@ const logger = createLogger('SpecRegeneration');

const DEFAULT_MAX_FEATURES = 50;

/**
* Timeout for Codex models when generating features (5 minutes).
* Codex models are slower and need more time to generate 50+ features.
*/
const CODEX_FEATURE_GENERATION_TIMEOUT_MS = 300000; // 5 minutes

/**
* Type for extracted features JSON response
*/
interface FeaturesExtractionResult {
features: Array<{
id: string;
category?: string;
title: string;
description: string;
priority?: number;
complexity?: 'simple' | 'moderate' | 'complex';
dependencies?: string[];
}>;
}

/**
* JSON schema for features output format (Claude/Codex structured output)
*/
const featuresOutputSchema = {
type: 'object',
properties: {
features: {
type: 'array',
items: {
type: 'object',
properties: {
id: { type: 'string', description: 'Unique feature identifier (kebab-case)' },
category: { type: 'string', description: 'Feature category' },
title: { type: 'string', description: 'Short, descriptive title' },
description: { type: 'string', description: 'Detailed feature description' },
priority: {
type: 'number',
description: 'Priority level: 1 (highest) to 5 (lowest)',
},
complexity: {
type: 'string',
enum: ['simple', 'moderate', 'complex'],
description: 'Implementation complexity',
},
dependencies: {
type: 'array',
items: { type: 'string' },
description: 'IDs of features this depends on',
},
},
required: ['id', 'title', 'description'],
},
},
},
required: ['features'],
} as const;

export async function generateFeaturesFromSpec(
projectPath: string,
events: EventEmitter,
Expand Down Expand Up @@ -136,23 +195,80 @@ Generate ${featureCount} NEW features that build on each other logically. Rememb
provider: undefined,
credentials: undefined,
};
const { model, thinkingLevel } = resolvePhaseModel(phaseModelEntry);
const { model, thinkingLevel, reasoningEffort } = resolvePhaseModel(phaseModelEntry);

logger.info('Using model:', model, provider ? `via provider: ${provider.name}` : 'direct API');

// Codex models need extended timeout for generating many features.
// Use 'xhigh' reasoning effort to get 5-minute timeout (300s base * 1.0x = 300s).
// The Codex provider has a special 5-minute base timeout for feature generation.
const isCodex = isCodexModel(model);
const effectiveReasoningEffort = isCodex ? 'xhigh' : reasoningEffort;

if (isCodex) {
logger.info('Codex model detected - using extended timeout (5 minutes for feature generation)');
}
if (effectiveReasoningEffort) {
logger.info('Reasoning effort:', effectiveReasoningEffort);
}

// Determine if we should use structured output based on model type
const useStructuredOutput = supportsStructuredOutput(model);
logger.info(
`Structured output mode: ${useStructuredOutput ? 'enabled (Claude/Codex)' : 'disabled (using JSON instructions)'}`
);

// Build the final prompt - for non-Claude/Codex models, include explicit JSON instructions
let finalPrompt = prompt;
if (!useStructuredOutput) {
finalPrompt = `${prompt}

CRITICAL INSTRUCTIONS:
1. DO NOT write any files. Return the JSON in your response only.
2. After analyzing the spec, respond with ONLY a JSON object - no explanations, no markdown, just raw JSON.
3. The JSON must have this exact structure:
{
"features": [
{
"id": "unique-feature-id",
"category": "Category Name",
"title": "Short Feature Title",
"description": "Detailed description of the feature",
"priority": 1,
"complexity": "simple|moderate|complex",
"dependencies": ["other-feature-id"]
}
]
}

4. Feature IDs must be unique, lowercase, kebab-case (e.g., "user-authentication", "data-export")
5. Priority ranges from 1 (highest) to 5 (lowest)
6. Complexity must be one of: "simple", "moderate", "complex"
7. Dependencies is an array of feature IDs that must be completed first (can be empty)

Your entire response should be valid JSON starting with { and ending with }. No text before or after.`;
}

// Use streamingQuery with event callbacks
const result = await streamingQuery({
prompt,
prompt: finalPrompt,
model,
cwd: projectPath,
maxTurns: 250,
allowedTools: ['Read', 'Glob', 'Grep'],
abortController,
thinkingLevel,
reasoningEffort: effectiveReasoningEffort, // Extended timeout for Codex models
readOnly: true, // Feature generation only reads code, doesn't write
settingSources: autoLoadClaudeMd ? ['user', 'project', 'local'] : undefined,
claudeCompatibleProvider: provider, // Pass provider for alternative endpoint configuration
credentials, // Pass credentials for resolving 'credentials' apiKeySource
outputFormat: useStructuredOutput
? {
type: 'json_schema',
schema: featuresOutputSchema,
}
: undefined,
onText: (text) => {
logger.debug(`Feature text block received (${text.length} chars)`);
events.emit('spec-regeneration:event', {
Expand All @@ -163,15 +279,51 @@ Generate ${featureCount} NEW features that build on each other logically. Rememb
},
});

const responseText = result.text;
// Get response content - prefer structured output if available
let contentForParsing: string;

if (result.structured_output) {
// Use structured output from Claude/Codex models
logger.info('✅ Received structured output from model');
contentForParsing = JSON.stringify(result.structured_output);
logger.debug('Structured output:', contentForParsing);
} else {
// Use text response (for non-Claude/Codex models or fallback)
// Pre-extract JSON to handle conversational text that may surround the JSON response
// This follows the same pattern used in generate-spec.ts and validate-issue.ts
const rawText = result.text;
logger.info(`Feature stream complete.`);
logger.info(`Feature response length: ${rawText.length} chars`);
logger.info('========== FULL RESPONSE TEXT ==========');
logger.info(rawText);
logger.info('========== END RESPONSE TEXT ==========');

logger.info(`Feature stream complete.`);
logger.info(`Feature response length: ${responseText.length} chars`);
logger.info('========== FULL RESPONSE TEXT ==========');
logger.info(responseText);
logger.info('========== END RESPONSE TEXT ==========');
// Pre-extract JSON from response - handles conversational text around the JSON
const extracted = extractJsonWithArray<FeaturesExtractionResult>(rawText, 'features', {
logger,
});
if (extracted) {
contentForParsing = JSON.stringify(extracted);
logger.info('✅ Pre-extracted JSON from text response');
} else {
// If pre-extraction fails, we know the next step will also fail.
// Throw an error here to avoid redundant parsing and make the failure point clearer.
logger.error(
'❌ Could not extract features JSON from model response. Full response text was:\n' +
rawText
);
const errorMessage =
'Failed to parse features from model response: No valid JSON with a "features" array found.';
events.emit('spec-regeneration:event', {
type: 'spec_regeneration_error',
error: errorMessage,
projectPath: projectPath,
});
throw new Error(errorMessage);
}
}

await parseAndCreateFeatures(projectPath, responseText, events);
await parseAndCreateFeatures(projectPath, contentForParsing, events);

logger.debug('========== generateFeaturesFromSpec() completed ==========');
}
11 changes: 7 additions & 4 deletions apps/server/src/routes/app-spec/generate-spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import * as secureFs from '../../lib/secure-fs.js';
import type { EventEmitter } from '../../lib/events.js';
import { specOutputSchema, specToXml, type SpecOutput } from '../../lib/app-spec-format.js';
import { createLogger } from '@automaker/utils';
import { DEFAULT_PHASE_MODELS, isCursorModel } from '@automaker/types';
import { DEFAULT_PHASE_MODELS, supportsStructuredOutput } from '@automaker/types';
import { resolvePhaseModel } from '@automaker/model-resolver';
import { extractJson } from '../../lib/json-extractor.js';
import { streamingQuery } from '../../providers/simple-query-service.js';
Expand Down Expand Up @@ -120,10 +120,13 @@ ${prompts.appSpec.structuredSpecInstructions}`;
let responseText = '';
let structuredOutput: SpecOutput | null = null;

// Determine if we should use structured output (Claude supports it, Cursor doesn't)
const useStructuredOutput = !isCursorModel(model);
// Determine if we should use structured output based on model type
const useStructuredOutput = supportsStructuredOutput(model);
logger.info(
`Structured output mode: ${useStructuredOutput ? 'enabled (Claude/Codex)' : 'disabled (using JSON instructions)'}`
);

// Build the final prompt - for Cursor, include JSON schema instructions
// Build the final prompt - for non-Claude/Codex models, include JSON schema instructions
let finalPrompt = prompt;
if (!useStructuredOutput) {
finalPrompt = `${prompt}
Expand Down
Loading