diff --git a/.eslintignore b/.eslintignore deleted file mode 100644 index ca9bb03..0000000 --- a/.eslintignore +++ /dev/null @@ -1,7 +0,0 @@ -dist/ -node_modules/ -coverage/ -.serena/ -assets/ -docs/ -spec/ diff --git a/.gitignore b/.gitignore index 6dbc763..6c7df89 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ node_modules/ bun.lockb +.worktrees/ dist/ coverage/ @@ -17,4 +18,3 @@ tmp .worktrees/ .envrc .env - diff --git a/CHANGELOG.md b/CHANGELOG.md index 0fcfa80..fcb39ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,89 +3,124 @@ All notable changes to this project are documented here. Dates use the ISO format (YYYY-MM-DD). ## [3.3.0] - 2025-11-19 + ### Added + - Codex Max support that mirrors the Codex CLI: normalization for every `gpt-5.1-codex-max` alias, `reasoningEffort: "xhigh"`, and unit tests covering both the transformer and request body integration path. - Documentation and configuration updates calling out Codex Max as the flagship preset, plus refreshed samples showing how to opt into the Extra High reasoning mode. ### Changed + - Sample configs (`full` + `minimal`), README tables, AGENTS.md, and the diagnostics script now prefer `gpt-5.1-codex-max`, keeping plugin defaults aligned with Codex CLI behaviour. ### Fixed + - Requests that specify `reasoningEffort: "xhigh"` for non-supported models are now automatically downgraded to `high`, preventing API errors when Codex Max isn't selected. ## [3.2.0] - 2025-11-13 + ### Added + - GPT-5.1 family integration: normalization for `gpt-5.1`/`gpt-5.1-codex`/`gpt-5.1-codex-mini`, expanded reasoning heuristics (including `reasoningEffort: "none"`), and preservation of the native `shell`/`apply_patch` tools emitted by Codex CLI. - Updated configuration, diagnostics script, and docs to showcase the 5.1 lineup (low/medium/high plus `none`) while keeping GPT-5 presets available for backwards compatibility. ### Changed + - Default fallback model now targets `gpt-5.1`, and Codex Mini requests always use the new `gpt-5.1-codex-mini` slug to stay in sync with the latest Codex release. ### Fixed + - Prevented invalid reasoning combinations by clamping unsupported `none`/`minimal` requests on Codex models and ensuring parallel tool-call behavior matches both GPT-5 and GPT-5.1 Codex variants. ## [3.1.0] - 2025-11-11 + ### Added + - Codex Mini support end-to-end: normalization to the `codex-mini-latest` slug, proper reasoning defaults, and two new presets (`gpt-5-codex-mini-medium` / `gpt-5-codex-mini-high`). - Documentation & configuration updates describing the Codex Mini tier (200k input / 100k output tokens) plus refreshed totals (11 presets, 160+ unit tests). ### Fixed + - Prevented Codex Mini from inheriting the lightweight (`minimal`) reasoning profile used by `gpt-5-mini`/`nano`, ensuring the API always receives supported effort levels. ## [3.0.0] - 2025-11-04 + ### Added + - Codex-style usage-limit messaging that mirrors the 5-hour and weekly windows reported by the Codex CLI. -- Documentation guidance noting that OpenCode's context auto-compaction and usage sidebar require the canonical `config/full-opencode.json`. +- Documentation guidance noting that OpenCode's usage sidebar requires the canonical `config/full-opencode.json`. ### Changed + - Prompt caching now relies solely on the host-supplied `prompt_cache_key`; conversation/session headers are forwarded only when OpenCode provides one. - CODEX_MODE bridge prompt refreshed to the newest Codex CLI release so tool awareness stays in sync. ### Fixed + - Clarified README, docs, and configuration references so the canonical config matches shipped behaviour. - Pinned `hono` (4.10.4) and `vite` (7.1.12) to resolve upstream security advisories. ## [2.1.2] - 2025-10-12 + ### Added + - Comprehensive compliance documentation (ToS guidance, security, privacy) and a full user/developer doc set. ### Fixed + - Per-model configuration lookup, stateless multi-turn conversations, case-insensitive model normalization, and GitHub instruction caching. ## [2.1.1] - 2025-10-04 + ### Fixed + - README cache-clearing snippet now runs in a subshell from the home directory to avoid path issues while removing cached plugin files. ## [2.1.0] - 2025-10-04 + ### Added + - Enhanced CODEX_MODE bridge prompt with Task tool and MCP awareness plus ETag-backed verification of OpenCode system prompts. ### Changed + - Request transformation made async to support prompt verification caching; AGENTS.md renamed to provide cross-agent guidance. ## [2.0.0] - 2025-10-03 + ### Added + - Full TypeScript rewrite with strict typing, 123 automated tests, and nine pre-configured model variants matching the Codex CLI. - CODEX_MODE introduced (enabled by default) with a lightweight bridge prompt and configurability via config file or `CODEX_MODE` env var. ### Changed + - Library reorganized into semantic folders (auth, prompts, request, etc.) and OAuth flow polished with the new success page. ## [1.0.3] - 2025-10-02 + ### Changed + - Major internal refactor splitting the runtime into focused modules (logger, request/response handlers) and removing legacy debug output. ## [1.0.2] - 2025-10-02 + ### Added + - ETag-based GitHub caching for Codex instructions and release-tag tracking for more stable prompt updates. ### Fixed + - Default model fallback, text verbosity initialization, and standardized error logging prefixes. ## [1.0.1] - 2025-10-01 + ### Added + - README clarifications: opencode auto-installs plugins, config locations, and streamlined quick-start instructions. ## [1.0.0] - 2025-10-01 + ### Added + - Initial production release with ChatGPT Plus/Pro OAuth support, tool remapping, auto-updating Codex instructions, and zero runtime dependencies. diff --git a/README.md b/README.md index 77b3c78..7490035 100644 --- a/README.md +++ b/README.md @@ -48,10 +48,20 @@ Want to customize? Jump to [Configuration reference](#configuration-reference). ## Plugin-Level Settings -Set these in `~/.opencode/openhax-codex-config.json` (applies to all models): +Set these in `~/.opencode/openhax-codex-config.json` (applies to all models). Related env vars control runtime tweaks (e.g., request logging, env tail): - `codexMode` (default `true`): enable the Codex ↔ OpenCode bridge prompt and tool remapping - `enablePromptCaching` (default `true`): keep a stable `prompt_cache_key` so Codex can reuse cached prompts +- `logging` (optional): override log defaults and related env vars (`ENABLE_PLUGIN_REQUEST_LOGGING`, `DEBUG_CODEX_PLUGIN`, `CODEX_LOG_MAX_BYTES`, `CODEX_LOG_MAX_FILES`, `CODEX_LOG_QUEUE_MAX`, `CODEX_SHOW_WARNING_TOASTS`, `CODEX_LOG_WARNINGS_TO_CONSOLE`). Fields: + - `enableRequestLogging`: force request log persistence even without `ENABLE_PLUGIN_REQUEST_LOGGING=1` + - `debug`: force debug logging regardless of env + - `showWarningToasts`: show warning-level toasts in the OpenCode UI + - `logWarningsToConsole`: mirror warnings to console when toasts are off + - `logMaxBytes` (default `5_242_880` bytes): rotate rolling log after this size + - `logMaxFiles` (default `5`): rotated log files to retain (plus the active log) + - `logQueueMax` (default `1000`): max buffered log entries before oldest entries drop +- Env tail (optional): set `CODEX_APPEND_ENV_CONTEXT=1` to reattach env/files context as a trailing developer message (stripped from system prompts to keep the prefix stable). Default is unset/0 (env/files removed for maximum cache stability). +- Log inspection helper: `node scripts/inspect-codex-logs.mjs [--dir ] [--limit N] [--id X] [--stage after-transform]` summarizes cached request logs (shows model, prompt_cache_key, roles, etc.). Example: @@ -59,9 +69,12 @@ Example: { "codexMode": true, "enablePromptCaching": true, - "enableCodexCompaction": true, - "autoCompactTokenLimit": 120000, - "autoCompactMinMessages": 8 + "logging": { + "enableRequestLogging": true, + "logMaxBytes": 5242880, + "logMaxFiles": 5, + "logQueueMax": 1000 + } } ``` @@ -85,6 +98,8 @@ Example: **Prompt caching is enabled by default** to optimize your token usage and reduce costs. +> Optional: `CODEX_APPEND_ENV_CONTEXT=1` keeps env/files context by reattaching it as a trailing developer message while preserving a stable prefix. Leave unset to maximize cache stability. + ### How Caching Works - **Enabled by default**: `enablePromptCaching: true` @@ -93,6 +108,14 @@ Example: - **Reduces token consumption** by reusing cached prompts - **Lowers costs** significantly for multi-turn conversations +### Reducing Cache Churn (keep `prompt_cache_key` stable) + +- Why caches reset: OpenCode rebuilds the system/developer prompt every turn; the env block includes today’s date and a ripgrep tree of your workspace, so daily rollovers or file tree changes alter the prefix and trigger a new cache key. +- Keep the tree stable: ensure noisy/ephemeral dirs are ignored (e.g. `dist/`, `build/`, `.next/`, `coverage/`, `.cache/`, `logs/`, `tmp/`, `.turbo/`, `.vite/`, `.stryker-tmp/`, `artifacts/`, and similar). Put transient outputs under an ignored directory or `/tmp`. +- Don’t thrash the workspace mid-session: large checkouts, mass file generation, or moving directories will change the ripgrep listing and force a cache miss. +- Model/provider switches also change the system prompt (different base prompt), so avoid swapping models in the middle of a session if you want to reuse cache. +- Optional: set `CODEX_APPEND_ENV_CONTEXT=1` to reattach env/files at the end of the prompt instead of stripping them. This keeps the shared prefix stable (better cache reuse) while still sending env/files as a trailing developer message. Default is off (env/files stripped to maximize stability). + ### Managing Caching #### Recommended: Full Configuration (Codex CLI Experience) diff --git a/docs/code-cleanup-summary.md b/docs/code-cleanup-summary.md index aa0c630..b114209 100644 --- a/docs/code-cleanup-summary.md +++ b/docs/code-cleanup-summary.md @@ -13,7 +13,7 @@ 2. **Created InputItemUtils** - `lib/utils/input-item-utils.ts` - Centralized text extraction logic used in multiple modules - Added utility functions for role checking, filtering, and formatting - - Eliminates duplication in `request-transformer.ts`, `session-manager.ts`, and `codex-compaction.ts` + - Eliminates duplication in `request-transformer.ts` and `session-manager.ts` - Functions: `extractTextFromItem()`, `hasTextContent()`, `formatRole()`, `formatEntry()`, `isSystemMessage()`, `isUserMessage()`, `isAssistantMessage()`, `filterByRole()`, `getLastUserMessage()`, `countConversationTurns()` 3. **Refactored Large Functions** @@ -53,12 +53,14 @@ ## Code Quality Improvements ### Before Refactoring + - **Code Duplication**: 3+ duplicate clone implementations - **Large Functions**: `transformRequestBody()` 1130 lines with high complexity - **Magic Numbers**: Scattered TTL values and limits throughout codebase - **No Complexity Enforcement**: No cognitive complexity limits ### After Refactoring + - **Eliminated Duplication**: Single source of truth for cloning and text extraction - **Reduced Complexity**: Large function now uses focused utility functions - **Centralized Configuration**: All magic numbers in constants with descriptive names @@ -67,30 +69,34 @@ ## Files Modified ### New Files Created + - `lib/utils/clone.ts` - Shared cloning utilities - `lib/utils/input-item-utils.ts` - InputItem processing utilities ### Files Updated + - `lib/constants.ts` - Added centralized configuration constants - `biome.json` - Enhanced linting rules for complexity - `lib/request/request-transformer.ts` - Updated to use shared utilities - `lib/session/session-manager.ts` - Updated to use shared utilities and constants -- `lib/compaction/codex-compaction.ts` - Updated to use shared utilities - `test/session-manager.test.ts` - Updated imports for new constants ## Impact ### Maintainability + - **Easier to modify** cloning behavior in one place - **Clearer separation of concerns** with focused utility functions - **Better discoverability** of common operations ### Performance + - **Optimized cloning** with `structuredClone` when available - **Reduced memory allocation** through shared utilities - **Consistent error handling** patterns ### Code Quality + - **Enforced complexity limits** to prevent future issues - **Standardized patterns** across all modules - **Improved type safety** with centralized utilities @@ -98,13 +104,15 @@ ## Next Steps The codebase now has: + - **B+ code quality rating** (improved from existing baseline) - **Zero critical code smells** - **Comprehensive test coverage** maintained - **Automated quality gates** in place Future development will benefit from: + - Shared utilities reducing duplication - Complexity limits preventing excessive nesting - Centralized configuration for easy maintenance -- Consistent patterns across all modules \ No newline at end of file +- Consistent patterns across all modules diff --git a/docs/code-quality-analysis-report.md b/docs/code-quality-analysis-report.md index 338c801..eb84161 100644 --- a/docs/code-quality-analysis-report.md +++ b/docs/code-quality-analysis-report.md @@ -7,6 +7,7 @@ This report analyzes the OpenHax Codex plugin codebase for code duplication, cod ## Key Findings ### ✅ Strengths + - **Excellent modular architecture** with clear separation of concerns - **Comprehensive test coverage** with 123 tests across all modules - **Strong type safety** with TypeScript interfaces and proper typing @@ -14,6 +15,7 @@ This report analyzes the OpenHax Codex plugin codebase for code duplication, cod - **Effective caching strategies** with proper TTL and invalidation ### ⚠️ Areas for Improvement + - **Large functions** that could be broken down - **Code duplication** in utility functions - **Complex conditional logic** in some areas @@ -24,6 +26,7 @@ This report analyzes the OpenHax Codex plugin codebase for code duplication, cod ## 1. Code Duplication Issues ### 1.1 Clone/Deep Copy Patterns + **Severity: Medium** Multiple modules implement similar deep cloning logic: @@ -31,31 +34,23 @@ Multiple modules implement similar deep cloning logic: ```typescript // In request-transformer.ts:29 function cloneInputItem>(item: T): T { - return JSON.parse(JSON.stringify(item)) as T; + return JSON.parse(JSON.stringify(item)) as T; } // In session-manager.ts:24 function getCloneFn(): CloneFn { - const globalClone = (globalThis as unknown as { structuredClone?: CloneFn }).structuredClone; - if (typeof globalClone === "function") { - return globalClone; - } - return (value: T) => JSON.parse(JSON.stringify(value)) as T; + const globalClone = (globalThis as unknown as { structuredClone?: CloneFn }).structuredClone; + if (typeof globalClone === "function") { + return globalClone; + } + return (value: T) => JSON.parse(JSON.stringify(value)) as T; } - -// In codex-compaction.ts:7 -const cloneValue = (() => { - const globalClone = (globalThis as { structuredClone?: (value: T) => T }).structuredClone; - if (typeof globalClone === "function") { - return (value: T) => globalClone(value); - } - return (value: T) => JSON.parse(JSON.stringify(value)) as T; -})(); ``` **Recommendation:** Create a shared utility `lib/utils/clone.ts` with a single implementation. ### 1.2 Hash Computation Duplication + **Severity: Low** Similar hash computation patterns appear in multiple places: @@ -63,21 +58,20 @@ Similar hash computation patterns appear in multiple places: ```typescript // request-transformer.ts:49 function computePayloadHash(item: InputItem): string { - const canonical = stableStringify(item); - return createHash("sha1").update(canonical).digest("hex"); + const canonical = stableStringify(item); + return createHash("sha1").update(canonical).digest("hex"); } // session-manager.ts:41 function computeHash(items: InputItem[]): string { - return createHash("sha1") - .update(JSON.stringify(items)) - .digest("hex"); + return createHash("sha1").update(JSON.stringify(items)).digest("hex"); } ``` **Recommendation:** Consolidate into a shared hashing utility. ### 1.3 Text Extraction Patterns + **Severity: Low** Multiple modules extract text from InputItem objects with similar logic: @@ -85,16 +79,16 @@ Multiple modules extract text from InputItem objects with similar logic: ```typescript // request-transformer.ts:510 const getContentText = (item: InputItem): string => { - if (typeof item.content === "string") { - return item.content; - } - if (Array.isArray(item.content)) { - return item.content - .filter((c) => c.type === "input_text" && c.text) - .map((c) => c.text) - .join("\n"); - } - return ""; + if (typeof item.content === "string") { + return item.content; + } + if (Array.isArray(item.content)) { + return item.content + .filter((c) => c.type === "input_text" && c.text) + .map((c) => c.text) + .join("\n"); + } + return ""; }; ``` @@ -105,10 +99,12 @@ const getContentText = (item: InputItem): string => { ### 2.1 Large Functions #### `transformRequestBody()` - 1130 lines + **File:** `lib/request/request-transformer.ts:973` **Severity: High** This function handles too many responsibilities: + - Model normalization - Configuration merging - Input filtering @@ -117,29 +113,34 @@ This function handles too many responsibilities: - Cache key management **Recommendation:** Break into smaller functions: + - `normalizeModelAndConfig()` - `processInputArray()` - `handleToolConfiguration()` - `managePromptInjection()` #### `getCodexInstructions()` - 218 lines + **File:** `lib/prompts/codex.ts:44` **Severity: Medium** Complex caching logic with multiple fallback paths. **Recommendation:** Extract: + - `loadFromFileCache()` - `fetchFromGitHub()` - `handleFetchFailure()` #### `handleErrorResponse()` - 77 lines + **File:** `lib/request/fetch-helpers.ts:252` **Severity: Medium** Complex error parsing and enrichment logic. **Recommendation:** Extract: + - `parseRateLimitHeaders()` - `enrichUsageLimitError()` - `createErrorResponse()` @@ -147,29 +148,31 @@ Complex error parsing and enrichment logic. ### 2.2 Complex Conditional Logic #### Model Normalization Logic + **File:** `lib/request/request-transformer.ts:314-347` ```typescript export function normalizeModel(model: string | undefined): string { - const fallback = "gpt-5.1"; - if (!model) return fallback; + const fallback = "gpt-5.1"; + if (!model) return fallback; - const lowered = model.toLowerCase(); - const sanitized = lowered.replace(/\./g, "-").replace(/[\s_\/]+/g, "-"); + const lowered = model.toLowerCase(); + const sanitized = lowered.replace(/\./g, "-").replace(/[\s_\/]+/g, "-"); - const contains = (needle: string) => sanitized.includes(needle); - const hasGpt51 = contains("gpt-5-1") || sanitized.includes("gpt51"); + const contains = (needle: string) => sanitized.includes(needle); + const hasGpt51 = contains("gpt-5-1") || sanitized.includes("gpt51"); - if (contains("gpt-5-1-codex-mini") || (hasGpt51 && contains("codex-mini"))) { - return "gpt-5.1-codex-mini"; - } - // ... many more conditions + if (contains("gpt-5-1-codex-mini") || (hasGpt51 && contains("codex-mini"))) { + return "gpt-5.1-codex-mini"; + } + // ... many more conditions } ``` **Recommendation:** Use a configuration-driven approach with model mapping tables. #### Reasoning Configuration Logic + **File:** `lib/request/request-transformer.ts:379-437` Complex nested conditionals for determining reasoning parameters. @@ -200,16 +203,19 @@ export const CACHE_TTL_MS = 15 * 60 * 1000; // 15 minutes ## 3. Anti-Patterns ### 3.1 God Object Configuration + **File:** `lib/types.ts` - 240 lines The `RequestBody` interface has too many optional properties, making it difficult to understand the required structure. **Recommendation:** Split into focused interfaces: + - `BaseRequestBody` - `ToolRequest` extends BaseRequestBody - `StreamingRequest` extends BaseRequestBody ### 3.2 Stringly-Typed Configuration + **Severity: Medium** Multiple places use string constants for configuration: @@ -217,28 +223,29 @@ Multiple places use string constants for configuration: ```typescript // constants.ts:70 export const AUTH_LABELS = { - OAUTH: "ChatGPT Plus/Pro (Codex Subscription)", - API_KEY: "Manually enter API Key", - INSTRUCTIONS: "A browser window should open. Complete login to finish.", + OAUTH: "ChatGPT Plus/Pro (Codex Subscription)", + API_KEY: "Manually enter API Key", + INSTRUCTIONS: "A browser window should open. Complete login to finish.", } as const; ``` **Recommendation:** Use enums or const assertions for better type safety. ### 3.3 Inconsistent Error Handling + **Severity: Low** Some functions throw exceptions while others return error objects: ```typescript // auth.ts:128 - returns TokenResult -export async function refreshAccessToken(refreshToken: string): Promise +export async function refreshAccessToken(refreshToken: string): Promise; // server.ts:64 - resolves with error object resolve({ - port: 1455, - close: () => server.close(), - waitForCode: async () => null, + port: 1455, + close: () => server.close(), + waitForCode: async () => null, }); ``` @@ -247,17 +254,19 @@ resolve({ ## 4. Test Code Issues ### 4.1 Repetitive Test Setup + **Severity: Low** Many test files have similar setup patterns: ```typescript -import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"; ``` **Recommendation:** Create test utilities in `test/helpers/`. ### 4.2 Mock Duplication + **Severity: Low** Similar mock patterns across multiple test files. @@ -267,6 +276,7 @@ Similar mock patterns across multiple test files. ## 5. Performance Concerns ### 5.1 Inefficient String Operations + **Severity: Low** Multiple JSON.stringify/deepClone operations in hot paths. @@ -274,6 +284,7 @@ Multiple JSON.stringify/deepClone operations in hot paths. **Recommendation:** Use structuredClone where available, cache results. ### 5.2 Redundant Network Requests + **Severity: Low** Potential for multiple cache warming calls. @@ -283,6 +294,7 @@ Potential for multiple cache warming calls. ## 6. Security Considerations ### 6.1 Token Exposure in Logs + **Severity: Low** Some debug logs might expose sensitive information. @@ -292,16 +304,19 @@ Some debug logs might expose sensitive information. ## Recommendations Priority ### High Priority + 1. **Refactor `transformRequestBody()`** - Break into smaller, focused functions 2. **Create shared cloning utility** - Eliminate duplication across modules 3. **Standardize error handling** - Use consistent Result/Response patterns ### Medium Priority + 1. **Extract model normalization logic** - Use configuration-driven approach 2. **Consolidate text extraction utilities** - Create InputItemUtils class 3. **Centralize magic numbers** - Move to constants with descriptive names ### Low Priority + 1. **Create test utilities** - Reduce test code duplication 2. **Add token sanitization** - Improve security in logging 3. **Optimize string operations** - Use structuredClone consistently @@ -316,4 +331,4 @@ Overall Code Quality Score: **B+ (85/100)** - Code Duplication: C+ (78/100) - Function Complexity: C+ (75/100) - Test Coverage: A (90/100) -- Type Safety: A- (88/100) \ No newline at end of file +- Type Safety: A- (88/100) diff --git a/docs/configuration.md b/docs/configuration.md index f455879..ef1a7ca 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -432,7 +432,7 @@ CODEX_MODE=1 opencode run "task" # Temporarily enable - [config/full-opencode.json](../config/full-opencode.json) - Complete with 11 variants (adds Codex Mini presets) - [config/minimal-opencode.json](../config/minimal-opencode.json) - Minimal setup -> **Why choose the full config?** OpenCode's auto-compaction and usage widgets rely on the per-model `limit` metadata present only in `full-opencode.json`. Use the minimal config only if you don't need those UI features. +> **Why choose the full config?** OpenCode's usage widgets rely on the per-model `limit` metadata present only in `full-opencode.json`. Use the minimal config only if you don't need those UI features. **Your Configs:** @@ -467,6 +467,22 @@ Look for: } ``` +### Surface warnings to console (opt-in) + +Warnings default to file/app logs only. To mirror warnings to the console/UI for debugging: + +```bash +CODEX_LOG_WARNINGS_TO_CONSOLE=1 opencode run "test" --model=openai/your-model-name +``` + +Or add to `~/.opencode/openhax-codex-config.json`: + +```json +{ + "logging": { "logWarningsToConsole": true } +} +``` + ### Test Per-Model Options ```bash diff --git a/docs/getting-started.md b/docs/getting-started.md index 370487d..2058710 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -218,7 +218,7 @@ Add this to `~/.config/opencode/opencode.json`: Prompt caching is enabled out of the box: when OpenCode sends its session identifier as `prompt_cache_key`, the plugin forwards it untouched so multi-turn runs reuse prior work. The plugin no longer synthesizes cache IDs; if the host omits that field, Codex treats the run as uncached. The CODEX_MODE bridge prompt bundled with the plugin is kept in sync with the latest Codex CLI release, so the OpenCode UI and Codex share the same tool contract. If you hit your ChatGPT subscription limits, the plugin returns a friendly Codex-style message with the 5-hour and weekly usage windows so you know when capacity resets. -> **Heads up:** OpenCode's context auto-compaction and usage sidebar only work when this full configuration is installed. The minimal configuration skips the per-model limits, so OpenCode cannot display token usage or compact history automatically. +> **Heads up:** OpenCode's usage sidebar relies on the per-model limits in this full configuration. The minimal configuration skips those limits, so token usage may not display correctly. #### Option B: Minimal Configuration diff --git a/docs/notes/2025.11.19.18.38.24.md b/docs/notes/2025.11.19.18.38.24.md index 9bb50f0..01c90b6 100644 --- a/docs/notes/2025.11.19.18.38.24.md +++ b/docs/notes/2025.11.19.18.38.24.md @@ -5,7 +5,6 @@ interface TransformResult so it’s exported from the module, and update any loc references or imports elsewhere if needed to use the exported type (no other logic changes). - In lib/request/request-transformer.ts around lines 621 to 633, the code duplicates the bridge message object creation (the developer role message with CODEX_OPENCODE_BRIDGE and input merging) which is repeated later at lines @@ -16,34 +15,6 @@ duplicated branches with a call to that helper, keeping existing types and imports and ensuring generateContentHash("add") checks still control whether to return the helper result or the original input. - -In lib/compaction/compaction-executor.ts around lines 24 to 66, wrap the -response.text() + JSON.parse(...) and subsequent payload manipulation in a -try/catch so non‑JSON or unexpected response shapes do not crash compaction; on -any parse or processing error, log or ignore the error and return the original -response object untouched. Ensure the catch block returns the original Response -(preserving status, statusText, headers, and body) so callers receive the -unmodified response when parsing fails. - - -In lib/compaction/codex-compaction.ts around lines 168 to 170, the cloneRange -function duplicates logic already implemented in lib/utils/clone.ts as -cloneInputItems; replace the local implementation by importing cloneInputItems -from 'lib/utils/clone' and call it where cloneRange is used (or rename uses to -cloneInputItems), remove the duplicate function, and ensure the import is added -and TypeScript types align with InputItem[]. - - -In lib/compaction/codex-compaction.ts around lines 131 to 144, the -extractTextFromItem function duplicates logic already in -lib/utils/input-item-utils.ts; replace this local implementation by importing -and calling the centralized utility (ensuring the import path is correct), and -if needed adapt or wrap the utility call so behavior remains identical (handle -null/undefined input and array/object type checks the same way as the previous -local function). Remove the duplicated function, run type checks/TS compile and -unit tests to confirm no behavioral regressions. - - lib/cache/cache-metrics.ts lines 34-53 (also apply similar changes at 59-79, 103-105, 167-185): the metrics object and API are tightened to prevent accidental writes to the aggregate bucket but getMetrics currently performs only @@ -55,32 +26,7 @@ return a deep-cloned/read-only snapshot from getMetrics or clearly document the return as read-only to prevent external mutation. In lib/cache/cache-warming.ts around lines 113 to 126, the catch block declares -an unused named parameter (_error) causing lint/typecheck warnings; remove the +an unused named parameter (\_error) causing lint/typecheck warnings; remove the unused binding by changing the catch to a bare catch (i.e., catch { ... }) so the error is still ignored and the function behavior remains identical while satisfying the linter. - -In lib/compaction/codex-compaction.ts around lines 131 to 144, the -extractTextFromItem function duplicates logic already in -lib/utils/input-item-utils.ts; replace this local implementation by importing -and calling the centralized utility (ensuring the import path is correct), and -if needed adapt or wrap the utility call so behavior remains identical (handle -null/undefined input and array/object type checks the same way as the previous -local function). Remove the duplicated function, run type checks/TS compile and -unit tests to confirm no behavioral regressions. - -In lib/compaction/codex-compaction.ts around lines 168 to 170, the cloneRange -function duplicates logic already implemented in lib/utils/clone.ts as -cloneInputItems; replace the local implementation by importing cloneInputItems -from 'lib/utils/clone' and call it where cloneRange is used (or rename uses to -cloneInputItems), remove the duplicate function, and ensure the import is added -and TypeScript types align with InputItem[]. - -In lib/compaction/compaction-executor.ts around lines 24 to 66, wrap the -response.text() + JSON.parse(...) and subsequent payload manipulation in a -try/catch so non‑JSON or unexpected response shapes do not crash compaction; on -any parse or processing error, log or ignore the error and return the original -response object untouched. Ensure the catch block returns the original Response -(preserving status, statusText, headers, and body) so callers receive the -unmodified response when parsing fails. - diff --git a/eslint.config.mjs b/eslint.config.mjs index aa9a2be..e706f28 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -16,6 +16,7 @@ export default [ "assets/**", "docs/**", "spec/**", + ".worktrees/**", ], }, { diff --git a/lib/compaction/codex-compaction.ts b/lib/compaction/codex-compaction.ts deleted file mode 100644 index 21682e7..0000000 --- a/lib/compaction/codex-compaction.ts +++ /dev/null @@ -1,155 +0,0 @@ -import { CODEX_COMPACTION_PROMPT, CODEX_SUMMARY_PREFIX } from "../prompts/codex-compaction.js"; -import type { InputItem } from "../types.js"; -import { cloneInputItems, deepClone } from "../utils/clone.js"; -import { extractTextFromItem } from "../utils/input-item-utils.js"; - -const DEFAULT_TRANSCRIPT_CHAR_LIMIT = 12_000; -const COMMAND_TRIGGERS = ["codex-compact", "compact", "codexcompact", "compactnow"]; - -export interface ConversationSerialization { - transcript: string; - totalTurns: number; - droppedTurns: number; -} - -export interface CompactionBuildResult { - items: InputItem[]; - serialization: ConversationSerialization; -} - -export interface CompactionConfig { - enabled: boolean; - autoLimitTokens?: number; - autoMinMessages?: number; -} - -export function approximateTokenCount(items: InputItem[] | undefined): number { - if (!Array.isArray(items) || items.length === 0) { - return 0; - } - let chars = 0; - for (const item of items) { - chars += extractTextFromItem(item).length; - } - return Math.max(0, Math.ceil(chars / 4)); -} - -export function detectCompactionCommand(input: InputItem[] | undefined): string | null { - if (!Array.isArray(input) || input.length === 0) { - return null; - } - for (let index = input.length - 1; index >= 0; index -= 1) { - const item = input[index]; - if (!item || item.role !== "user") continue; - const content = extractTextFromItem(item).trim(); - if (!content) continue; - const normalized = normalizeCommandTrigger(content); - if (COMMAND_TRIGGERS.some((trigger) => normalized === trigger || normalized.startsWith(`${trigger} `))) { - return normalized; - } - break; - } - return null; -} - -export function serializeConversation( - items: InputItem[] | undefined, - limit = DEFAULT_TRANSCRIPT_CHAR_LIMIT, -): ConversationSerialization { - if (!Array.isArray(items) || items.length === 0) { - return { transcript: "", totalTurns: 0, droppedTurns: 0 }; - } - const conversation: Array<{ role: string; text: string }> = []; - for (const item of items) { - const text = extractTextFromItem(item); - if (!text) continue; - const role = formatRole(item.role); - if (!role) continue; - conversation.push({ role, text }); - } - let totalChars = 0; - const selected: Array<{ role: string; text: string }> = []; - for (let index = conversation.length - 1; index >= 0; index -= 1) { - const entry = conversation[index]; - const chunk = formatEntry(entry.role, entry.text); - selected.push(entry); - totalChars += chunk.length; - if (totalChars >= limit) { - break; - } - } - selected.reverse(); - const transcript = selected.map((entry) => formatEntry(entry.role, entry.text)).join("\n"); - const droppedTurns = Math.max(0, conversation.length - selected.length); - return { transcript, totalTurns: conversation.length, droppedTurns }; -} - -export function buildCompactionPromptItems(transcript: string): InputItem[] { - const compactionMetadata = { source: "opencode-compaction", opencodeCompaction: true }; - const developer: InputItem = { - type: "message", - role: "developer", - content: CODEX_COMPACTION_PROMPT, - metadata: compactionMetadata, - }; - const user: InputItem = { - type: "message", - role: "user", - content: transcript || "(conversation is empty)", - metadata: compactionMetadata, - }; - return [developer, user]; -} - -export function collectSystemMessages(items: InputItem[] | undefined): InputItem[] { - if (!Array.isArray(items)) return []; - return items - .filter((item) => item && (item.role === "system" || item.role === "developer")) - .map((item) => deepClone(item)); -} - -export function createSummaryMessage(summaryText: string): InputItem { - const normalized = summaryText?.trim() ?? "(no summary available)"; - const withPrefix = normalized.startsWith(CODEX_SUMMARY_PREFIX) - ? normalized - : `${CODEX_SUMMARY_PREFIX}\n\n${normalized}`; - return { - type: "message", - role: "user", - content: withPrefix, - }; -} - -export function extractTailAfterSummary(items: InputItem[] | undefined): InputItem[] { - if (!Array.isArray(items) || items.length === 0) return []; - for (let index = items.length - 1; index >= 0; index -= 1) { - const item = items[index]; - if (!item || item.role !== "user") continue; - const text = extractTextFromItem(item); - if (!text) continue; - return cloneInputItems(items.slice(index)); - } - return []; -} - -function normalizeCommandTrigger(value: string): string { - const trimmed = value.trim().toLowerCase(); - if (!trimmed) return ""; - if (trimmed.startsWith("/") || trimmed.startsWith("?")) { - return trimmed.slice(1).trimStart(); - } - return trimmed; -} - -function formatRole(role: string): string | null { - if (!role) return null; - const lower = role.toLowerCase(); - if (lower === "user" || lower === "assistant") { - return lower === "user" ? "User" : "Assistant"; - } - return null; -} - -function formatEntry(role: string, text: string): string { - return `## ${role}\n${text.trim()}\n`; -} diff --git a/lib/compaction/compaction-executor.ts b/lib/compaction/compaction-executor.ts deleted file mode 100644 index 8f3a6ab..0000000 --- a/lib/compaction/compaction-executor.ts +++ /dev/null @@ -1,99 +0,0 @@ -import type { SessionManager } from "../session/session-manager.js"; -import type { InputItem, SessionContext } from "../types.js"; -import { createSummaryMessage } from "./codex-compaction.js"; - -export interface CompactionDecision { - mode: "command" | "auto"; - reason?: string; - approxTokens?: number; - preservedSystem: InputItem[]; - serialization: { - transcript: string; - totalTurns: number; - droppedTurns: number; - }; -} - -interface FinalizeOptions { - response: Response; - decision: CompactionDecision; - sessionManager?: SessionManager; - sessionContext?: SessionContext; -} - -export async function finalizeCompactionResponse({ - response, - decision, - sessionManager, - sessionContext, -}: FinalizeOptions): Promise { - const responseClone = response.clone(); - - try { - const text = await responseClone.text(); - const payload = JSON.parse(text) as any; - const summaryText = extractFirstAssistantText(payload) ?? "(no summary provided)"; - const summaryMessage = createSummaryMessage(summaryText); - const summaryContent = typeof summaryMessage.content === "string" ? summaryMessage.content : ""; - - const metaNote = - decision.mode === "auto" - ? `Auto compaction triggered (${decision.reason ?? "context limit"}). Review the summary below, then resend your last instruction.\n\n` - : ""; - const finalText = `${metaNote}${summaryContent}`.trim(); - - rewriteAssistantOutput(payload, finalText); - payload.metadata = { - ...(payload.metadata ?? {}), - codex_compaction: { - mode: decision.mode, - reason: decision.reason, - dropped_turns: decision.serialization.droppedTurns, - total_turns: decision.serialization.totalTurns, - }, - }; - - if (sessionManager && sessionContext) { - sessionManager.applyCompactionSummary(sessionContext, { - baseSystem: decision.preservedSystem, - summary: summaryContent, - }); - } - - const headers = new Headers(response.headers); - return new Response(JSON.stringify(payload), { - status: response.status, - statusText: response.statusText, - headers, - }); - } catch { - return response; - } -} - -function extractFirstAssistantText(payload: any): string | null { - const output = Array.isArray(payload?.output) ? payload.output : []; - for (const item of output) { - if (item?.role !== "assistant") continue; - const content = Array.isArray(item?.content) ? item.content : []; - for (const part of content) { - if (part?.type === "output_text" && typeof part.text === "string") { - return part.text; - } - } - } - return null; -} - -function rewriteAssistantOutput(payload: any, text: string): void { - const output = Array.isArray(payload?.output) ? payload.output : []; - for (const item of output) { - if (item?.role !== "assistant") continue; - const content = Array.isArray(item?.content) ? item.content : []; - const firstText = content.find((part: any) => part?.type === "output_text"); - if (firstText) { - firstText.text = text; - } - break; - } -} diff --git a/lib/config.ts b/lib/config.ts index 075fcb9..677926c 100644 --- a/lib/config.ts +++ b/lib/config.ts @@ -14,6 +14,7 @@ const DEFAULT_CONFIG: PluginConfig = { enablePromptCaching: true, logging: { showWarningToasts: false, + logWarningsToConsole: false, }, }; diff --git a/lib/logger.ts b/lib/logger.ts index 4cc6d81..472cd45 100644 --- a/lib/logger.ts +++ b/lib/logger.ts @@ -13,6 +13,7 @@ const envLoggingDefaults = { loggingEnabled: process.env.ENABLE_PLUGIN_REQUEST_LOGGING === "1", debugFlagEnabled: process.env.DEBUG_CODEX_PLUGIN === "1", showWarningToasts: process.env.CODEX_SHOW_WARNING_TOASTS === "1", + logWarningsToConsole: process.env.CODEX_LOG_WARNINGS_TO_CONSOLE === "1", logRotationMaxBytes: getEnvNumber("CODEX_LOG_MAX_BYTES", 5 * 1024 * 1024), logRotationMaxFiles: getEnvNumber("CODEX_LOG_MAX_FILES", 5), logQueueMaxLength: getEnvNumber("CODEX_LOG_QUEUE_MAX", 1000), @@ -24,6 +25,7 @@ export function isLoggingEnabled(): boolean { } let DEBUG_FLAG_ENABLED = envLoggingDefaults.debugFlagEnabled; let WARN_TOASTS_ENABLED = envLoggingDefaults.showWarningToasts ?? false; +let WARN_CONSOLE_ENABLED = envLoggingDefaults.logWarningsToConsole ?? false; let LOG_ROTATION_MAX_BYTES = Math.max(1, envLoggingDefaults.logRotationMaxBytes); let LOG_ROTATION_MAX_FILES = Math.max(1, envLoggingDefaults.logRotationMaxFiles); let LOG_QUEUE_MAX_LENGTH = Math.max(1, envLoggingDefaults.logQueueMaxLength); @@ -82,6 +84,7 @@ function applyLoggingOverrides(logging?: LoggingConfig): void { LOGGING_ENABLED = logging.enableRequestLogging ?? LOGGING_ENABLED; DEBUG_FLAG_ENABLED = logging.debug ?? DEBUG_FLAG_ENABLED; WARN_TOASTS_ENABLED = logging.showWarningToasts ?? WARN_TOASTS_ENABLED; + WARN_CONSOLE_ENABLED = logging.logWarningsToConsole ?? WARN_CONSOLE_ENABLED; LOG_ROTATION_MAX_BYTES = ensurePositiveNumber(logging.logMaxBytes, LOG_ROTATION_MAX_BYTES); LOG_ROTATION_MAX_FILES = ensurePositiveNumber(logging.logMaxFiles, LOG_ROTATION_MAX_FILES); LOG_QUEUE_MAX_LENGTH = ensurePositiveNumber(logging.logQueueMax, LOG_QUEUE_MAX_LENGTH); @@ -181,7 +184,8 @@ function emit(level: LogLevel, message: string, extra?: Record) extra: sanitizedExtra, }; - if (LOGGING_ENABLED || DEBUG_ENABLED) { + const shouldPersist = LOGGING_ENABLED || DEBUG_ENABLED || level === "warn"; + if (shouldPersist) { appendRollingLog(entry); } @@ -204,7 +208,10 @@ function emit(level: LogLevel, message: string, extra?: Record) notifyToast(level, message, sanitizedExtra); } - const shouldLogToConsole = level !== "warn" || !warnToastEnabled; + const shouldLogToConsole = + level === "warn" + ? WARN_CONSOLE_ENABLED && !warnToastEnabled + : level === "error" || CONSOLE_LOGGING_ENABLED; if (shouldLogToConsole) { logToConsole(level, message, sanitizedExtra); } diff --git a/lib/prompts/codex-compaction.ts b/lib/prompts/codex-compaction.ts deleted file mode 100644 index 56e8f4c..0000000 --- a/lib/prompts/codex-compaction.ts +++ /dev/null @@ -1,11 +0,0 @@ -export const CODEX_COMPACTION_PROMPT = `You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task. - -Include: -- Current progress and key decisions made -- Important context, constraints, or user preferences -- What remains to be done (clear next steps) -- Any critical data, examples, or references needed to continue - -Be concise, structured, and focused on helping the next LLM seamlessly continue the work.`; - -export const CODEX_SUMMARY_PREFIX = `Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:`; diff --git a/lib/request/compaction-helpers.ts b/lib/request/compaction-helpers.ts deleted file mode 100644 index f1c9810..0000000 --- a/lib/request/compaction-helpers.ts +++ /dev/null @@ -1,107 +0,0 @@ -/* eslint-disable no-param-reassign */ -import { - approximateTokenCount, - buildCompactionPromptItems, - collectSystemMessages, - serializeConversation, -} from "../compaction/codex-compaction.js"; -import type { CompactionDecision } from "../compaction/compaction-executor.js"; -import { filterInput } from "./input-filters.js"; -import type { InputItem, RequestBody } from "../types.js"; -import { countConversationTurns } from "../utils/input-item-utils.js"; - -export interface CompactionSettings { - enabled: boolean; - autoLimitTokens?: number; - autoMinMessages?: number; -} - -export interface CompactionOptions { - settings: CompactionSettings; - commandText: string | null; - originalInput: InputItem[]; - preserveIds?: boolean; -} - -/** - * Drop only the latest user message (e.g., a compaction command) while preserving any later assistant/tool items. - */ -function removeLastUserMessage(items: InputItem[]): InputItem[] { - for (let index = items.length - 1; index >= 0; index -= 1) { - if (items[index]?.role === "user") { - return [...items.slice(0, index), ...items.slice(index + 1)]; - } - } - return items; -} - -function maybeBuildCompactionPrompt( - originalInput: InputItem[], - commandText: string | null, - settings: CompactionSettings, -): { items: InputItem[]; decision: CompactionDecision } | null { - if (!settings.enabled) { - return null; - } - const conversationSource = commandText ? removeLastUserMessage(originalInput) : originalInput; - const turnCount = countConversationTurns(conversationSource); - let trigger: "command" | "auto" | null = null; - let reason: string | undefined; - let approxTokens: number | undefined; - - if (commandText) { - trigger = "command"; - } else if (settings.autoLimitTokens && settings.autoLimitTokens > 0) { - approxTokens = approximateTokenCount(conversationSource); - const minMessages = settings.autoMinMessages ?? 8; - if (approxTokens >= settings.autoLimitTokens && turnCount >= minMessages) { - trigger = "auto"; - reason = `~${approxTokens} tokens >= limit ${settings.autoLimitTokens}`; - } - } - - if (!trigger) { - return null; - } - - const serialization = serializeConversation(conversationSource); - const promptItems = buildCompactionPromptItems(serialization.transcript); - - return { - items: promptItems, - decision: { - mode: trigger, - reason, - approxTokens, - preservedSystem: collectSystemMessages(originalInput), - serialization, - }, - }; -} - -export function applyCompactionIfNeeded( - body: RequestBody, - compactionOptions?: CompactionOptions, -): CompactionDecision | undefined { - if (!compactionOptions?.settings.enabled) { - return undefined; - } - - const compactionBuild = maybeBuildCompactionPrompt( - compactionOptions.originalInput, - compactionOptions.commandText, - compactionOptions.settings, - ); - - if (!compactionBuild) { - return undefined; - } - - const preserveIds = compactionOptions.preserveIds ?? false; - body.input = filterInput(compactionBuild.items, { preserveIds, preserveMetadata: true }); - delete (body as any).tools; - delete (body as any).tool_choice; - delete (body as any).parallel_tool_calls; - - return compactionBuild.decision; -} diff --git a/lib/request/input-filters.ts b/lib/request/input-filters.ts index 9ad36a3..55fc9bf 100644 --- a/lib/request/input-filters.ts +++ b/lib/request/input-filters.ts @@ -69,10 +69,44 @@ export function isOpenCodeSystemPrompt(item: InputItem, cachedPrompt: string | n return contentText.startsWith("You are a coding agent running in"); } -export async function filterOpenCodeSystemPrompts( +type FilterResult = { input?: InputItem[]; envSegments: string[] }; + +function stripOpenCodeEnvBlocks(contentText: string): { + text: string; + removed: boolean; + removedBlocks: string[]; +} { + let removed = false; + let sanitized = contentText; + const removedBlocks: string[] = []; + + // Remove the standard environment header OpenCode prepends before + const envHeaderPattern = /Here is some useful information about the environment you are running in:\s*/i; + const headerStripped = sanitized.replace(envHeaderPattern, ""); + if (headerStripped !== sanitized) { + removed = true; + sanitized = headerStripped; + } + + const patterns = [/[\s\S]*?<\/env>/g, /[\s\S]*?<\/files>/g]; + + for (const pattern of patterns) { + const matches = sanitized.match(pattern); + if (matches) { + removedBlocks.push(...matches); + removed = true; + sanitized = sanitized.replace(pattern, ""); + } + } + + return { text: sanitized.trim(), removed, removedBlocks }; +} + +async function filterOpenCodeSystemPromptsInternal( input: InputItem[] | undefined, -): Promise { - if (!Array.isArray(input)) return input; + options: { captureEnv?: boolean } = {}, +): Promise { + if (!Array.isArray(input)) return input ? { input, envSegments: [] } : undefined; let cachedPrompt: string | null = null; try { @@ -81,81 +115,8 @@ export async function filterOpenCodeSystemPrompts( // Fallback to text-based detection only } - const compactionInstructionPatterns: RegExp[] = [ - /(summary[ _-]?file)/i, - /(summary[ _-]?path)/i, - /summary\s+(?:has\s+been\s+)?saved\s+(?:to|at)/i, - /summary\s+(?:is\s+)?stored\s+(?:in|at|to)/i, - /summary\s+(?:is\s+)?available\s+(?:at|in)/i, - /write\s+(?:the\s+)?summary\s+(?:to|into)/i, - /save\s+(?:the\s+)?summary\s+(?:to|into)/i, - /open\s+(?:the\s+)?summary/i, - /read\s+(?:the\s+)?summary/i, - /cat\s+(?:the\s+)?summary/i, - /view\s+(?:the\s+)?summary/i, - /~\/\.opencode/i, - /\.opencode\/.*summary/i, - ]; - - const hasCompactionMetadataFlag = (item: InputItem): boolean => { - const rawMeta = (item as Record)?.metadata ?? (item as Record)?.meta; - if (!rawMeta || typeof rawMeta !== "object") return false; - const meta = rawMeta as Record; - const metaAny = meta as Record; - const source = metaAny.source as unknown; - if (typeof source === "string" && source.toLowerCase() === "opencode-compaction") { - return true; - } - if (metaAny.opencodeCompaction === true || metaAny.opencode_compaction === true) { - return true; - } - return false; - }; - - const matchesCompactionInstruction = (value: string): boolean => - compactionInstructionPatterns.some((pattern) => pattern.test(value)); - - const sanitizeOpenCodeCompactionPrompt = (item: InputItem): InputItem | null => { - const text = extractTextFromItem(item); - if (!text) return null; - const sanitizedText = text - .split(/\r?\n/) - .map((line) => line.trimEnd()) - .filter((line) => { - const trimmed = line.trim(); - if (!trimmed) { - return true; - } - return !matchesCompactionInstruction(trimmed); - }) - .join("\n") - .replace(/\n{3,}/g, "\n\n") - .trim(); - if (!sanitizedText) { - return null; - } - const originalMentionedCompaction = /\bauto[-\s]?compaction\b/i.test(text); - let finalText = sanitizedText; - if (originalMentionedCompaction && !/\bauto[-\s]?compaction\b/i.test(finalText)) { - finalText = `Auto-compaction summary\n\n${finalText}`; - } - return { - ...item, - content: finalText, - }; - }; - - const isOpenCodeCompactionPrompt = (item: InputItem): boolean => { - const isSystemRole = item.role === "developer" || item.role === "system"; - if (!isSystemRole) return false; - const text = extractTextFromItem(item); - if (!text) return false; - const hasCompaction = /\b(auto[-\s]?compaction|compaction|compact)\b/i.test(text); - const hasSummary = /\b(summary|summarize|summarise)\b/i.test(text); - return hasCompaction && hasSummary && matchesCompactionInstruction(text); - }; - const filteredInput: InputItem[] = []; + const envSegments: string[] = []; for (const item of input) { if (item.role === "user") { filteredInput.push(item); @@ -166,19 +127,38 @@ export async function filterOpenCodeSystemPrompts( continue; } - const compactionMetadataFlagged = hasCompactionMetadataFlag(item); - if (compactionMetadataFlagged || isOpenCodeCompactionPrompt(item)) { - const sanitized = sanitizeOpenCodeCompactionPrompt(item); - if (sanitized) { - filteredInput.push(sanitized); + const contentText = extractTextFromItem(item); + if (typeof contentText === "string" && contentText.length > 0) { + const { text, removed, removedBlocks } = stripOpenCodeEnvBlocks(contentText); + if (options.captureEnv && removedBlocks.length > 0) { + envSegments.push(...removedBlocks.map((block) => block.trim()).filter(Boolean)); + } + if (removed && text.length === 0) { + continue; + } + if (removed) { + filteredInput.push({ ...item, content: text }); + continue; } - continue; } filteredInput.push(item); } - return filteredInput; + return { input: filteredInput, envSegments }; +} + +export async function filterOpenCodeSystemPrompts( + input: InputItem[] | undefined, +): Promise { + const result = await filterOpenCodeSystemPromptsInternal(input); + return result?.input; +} + +export async function filterOpenCodeSystemPromptsWithEnv( + input: InputItem[] | undefined, +): Promise { + return filterOpenCodeSystemPromptsInternal(input, { captureEnv: true }); } function analyzeBridgeRequirement( diff --git a/lib/request/request-transformer.ts b/lib/request/request-transformer.ts index d6c9f4e..f61f6b7 100644 --- a/lib/request/request-transformer.ts +++ b/lib/request/request-transformer.ts @@ -6,7 +6,9 @@ import { addToolRemapMessage, filterInput, filterOpenCodeSystemPrompts, + filterOpenCodeSystemPromptsWithEnv, } from "./input-filters.js"; + import { getModelConfig, getReasoningConfig, normalizeModel } from "./model-config.js"; import { ensurePromptCacheKey, logCacheKeyDecision } from "./prompt-cache.js"; import { normalizeToolsForCodexBody } from "./tooling.js"; @@ -60,7 +62,25 @@ async function transformInputForCodex( } if (codexMode) { - workingInput = await filterOpenCodeSystemPrompts(workingInput); + const appendEnvTail = process.env.CODEX_APPEND_ENV_CONTEXT === "1"; + if (appendEnvTail) { + const result = await filterOpenCodeSystemPromptsWithEnv(workingInput); + workingInput = result?.input; + if (result?.envSegments?.length) { + workingInput = workingInput || []; + workingInput = [ + ...(workingInput || []), + { + type: "message", + role: "developer", + content: result.envSegments.join("\n"), + }, + ]; + } + } else { + workingInput = await filterOpenCodeSystemPrompts(workingInput); + } + if (!preserveIds) { workingInput = filterInput(workingInput, { preserveIds }); } diff --git a/lib/session/session-manager.ts b/lib/session/session-manager.ts index c1c641a..c616d24 100644 --- a/lib/session/session-manager.ts +++ b/lib/session/session-manager.ts @@ -3,8 +3,7 @@ import { SESSION_CONFIG } from "../constants.js"; import { logDebug, logWarn } from "../logger.js"; import { PROMPT_CACHE_FORK_KEYS } from "../request/prompt-cache.js"; import type { CodexResponsePayload, InputItem, RequestBody, SessionContext, SessionState } from "../types.js"; -import { cloneInputItems, deepClone } from "../utils/clone.js"; -import { isAssistantMessage, isUserMessage } from "../utils/input-item-utils.js"; +import { cloneInputItems } from "../utils/clone.js"; export interface SessionManagerOptions { enabled: boolean; @@ -20,37 +19,6 @@ function computeHash(items: InputItem[]): string { return createHash("sha1").update(JSON.stringify(items)).digest("hex"); } -function extractLatestUserSlice(items: InputItem[] | undefined): InputItem[] { - if (!Array.isArray(items) || items.length === 0) { - return []; - } - - let lastUserIndex = -1; - for (let index = items.length - 1; index >= 0; index -= 1) { - const item = items[index]; - if (item && isUserMessage(item)) { - lastUserIndex = index; - break; - } - } - - if (lastUserIndex < 0) { - return []; - } - - const tail: InputItem[] = []; - for (let index = lastUserIndex; index < items.length; index += 1) { - const item = items[index]; - if (item && (isUserMessage(item) || isAssistantMessage(item))) { - tail.push(item); - } else { - break; - } - } - - return cloneInputItems(tail); -} - function longestSharedPrefixLength(previous: InputItem[], current: InputItem[]): number { if (previous.length === 0 || current.length === 0) { return 0; @@ -134,7 +102,7 @@ function findSuffixReuseStart(previous: InputItem[], current: InputItem[]): numb return start; } -type PrefixChangeCause = "system_prompt_changed" | "history_pruned" | "unknown"; +type PrefixChangeCause = "system_prompt_changed" | "history_pruned" | "user_message_changed" | "unknown"; type PrefixChangeAnalysis = { cause: PrefixChangeCause; @@ -179,6 +147,19 @@ function analyzePrefixChange( }; } + if (firstPrevious?.role === "user" && firstIncoming?.role === "user") { + return { + cause: "user_message_changed", + details: { + mismatchIndex: sharedPrefixLength, + previousFingerprint: fingerprintInputItem(firstPrevious), + incomingFingerprint: fingerprintInputItem(firstIncoming), + previousRole: firstPrevious.role, + incomingRole: firstIncoming.role, + }, + }; + } + return { cause: "unknown", details: { @@ -410,12 +391,16 @@ export class SessionManager { if (sharedPrefixLength === 0) { logWarn("SessionManager: prefix mismatch detected, regenerating cache key", { sessionId: state.id, + promptCacheKey: state.promptCacheKey, sharedPrefixLength, previousItems: state.lastInput.length, incomingItems: input.length, + previousHash: state.lastPrefixHash, + incomingHash: inputHash, prefixCause: prefixAnalysis.cause, ...prefixAnalysis.details, }); + const refreshed = this.resetSessionInternal(state.id, true); if (!refreshed) { return undefined; @@ -453,20 +438,19 @@ export class SessionManager { lastUpdated: Date.now(), lastCachedTokens: state.lastCachedTokens, bridgeInjected: state.bridgeInjected, - compactionBaseSystem: state.compactionBaseSystem - ? cloneInputItems(state.compactionBaseSystem) - : undefined, - compactionSummaryItem: state.compactionSummaryItem - ? deepClone(state.compactionSummaryItem) - : undefined, }; + this.sessions.set(forkSessionId, forkState); logWarn("SessionManager: prefix mismatch detected, forking session", { sessionId: state.id, + promptCacheKey: state.promptCacheKey, forkSessionId, + forkPromptCacheKey, sharedPrefixLength, previousItems: state.lastInput.length, incomingItems: input.length, + previousHash: state.lastPrefixHash, + incomingHash: inputHash, prefixCause: prefixAnalysis.cause, ...prefixAnalysis.details, }); @@ -492,39 +476,6 @@ export class SessionManager { return context; } - public applyCompactionSummary( - context: SessionContext | undefined, - payload: { baseSystem: InputItem[]; summary: string }, - ): void { - if (!context?.enabled) return; - const state = context.state; - state.compactionBaseSystem = cloneInputItems(payload.baseSystem); - state.compactionSummaryItem = deepClone({ - type: "message", - role: "user", - content: payload.summary, - }); - } - - public applyCompactedHistory( - body: RequestBody, - context: SessionContext | undefined, - opts?: { skip?: boolean }, - ): void { - if (!context?.enabled || opts?.skip) { - return; - } - const baseSystem = context.state.compactionBaseSystem; - const summary = context.state.compactionSummaryItem; - if (!baseSystem || !summary) { - return; - } - const tail = extractLatestUserSlice(body.input); - const merged = [...cloneInputItems(baseSystem), deepClone(summary), ...tail]; - // eslint-disable-next-line no-param-reassign - body.input = merged; - } - public recordResponse( context: SessionContext | undefined, payload: CodexResponsePayload | undefined, diff --git a/lib/types.ts b/lib/types.ts index 8d94ebf..9567d9c 100644 --- a/lib/types.ts +++ b/lib/types.ts @@ -30,6 +30,8 @@ export interface LoggingConfig { debug?: boolean; /** Whether warning-level toasts should be shown (default: false) */ showWarningToasts?: boolean; + /** Whether warnings should also be mirrored to console (default: false) */ + logWarningsToConsole?: boolean; /** Override max bytes before rolling log rotation */ logMaxBytes?: number; /** Override number of rotated log files to keep */ @@ -193,8 +195,6 @@ export interface SessionState { lastUpdated: number; lastCachedTokens?: number; bridgeInjected?: boolean; // Track whether Codex-OpenCode bridge prompt was added - compactionBaseSystem?: InputItem[]; - compactionSummaryItem?: InputItem; } /** diff --git a/scripts/inspect-codex-logs.mjs b/scripts/inspect-codex-logs.mjs new file mode 100644 index 0000000..f5948ab --- /dev/null +++ b/scripts/inspect-codex-logs.mjs @@ -0,0 +1,139 @@ +#!/usr/bin/env node +import { readFile, readdir } from "node:fs/promises"; +import path from "node:path"; +import os from "node:os"; + +const DEFAULT_DIR = path.join(os.homedir(), ".opencode", "logs", "codex-plugin"); + +function getArg(flag, fallback) { + const idx = process.argv.indexOf(flag); + if (idx === -1) return fallback; + const value = process.argv[idx + 1]; + if (!value || value.startsWith("-")) return true; + return value; +} + +function parseFilters() { + return { + dir: getArg("--dir", DEFAULT_DIR), + limit: Number(getArg("--limit", 10)) || 10, + id: getArg("--id", null), + stage: getArg("--stage", null), + }; +} + +function safeRoles(input) { + if (!Array.isArray(input)) return []; + const roles = new Set(); + for (const item of input) { + if (item && typeof item.role === "string" && item.role.trim()) { + roles.add(item.role.trim()); + } + } + return Array.from(roles); +} + +function summarizeStage(stage, data) { + const body = data.body || {}; + const model = data.model || data.normalizedModel || body.model || data.originalModel; + const promptCacheKey = body.prompt_cache_key || body.promptCacheKey; + const inputLength = Array.isArray(body.input) ? body.input.length : data.inputLength; + const roles = safeRoles(body.input); + const reasoning = body.reasoning || data.reasoning || {}; + const include = body.include || data.include; + return { + stage, + timestamp: data.timestamp, + model, + originalModel: data.originalModel, + promptCacheKey, + inputLength, + roles, + reasoning, + textVerbosity: body.text?.verbosity || data.textVerbosity, + include, + usage: data.usage, + }; +} + +async function readJson(filePath) { + const raw = await readFile(filePath, "utf8"); + return JSON.parse(raw); +} + +async function collectLogs(dir, stageFilter, idFilter, limit) { + const entries = await readdir(dir); + const pattern = /^request-(\d+)-(.+)\.json$/; + const requests = new Map(); + + for (const entry of entries) { + const match = entry.match(pattern); + if (!match) continue; + const [, idStr, stage] = match; + if (stageFilter && stage !== stageFilter) continue; + if (idFilter && idStr !== String(idFilter)) continue; + const id = Number(idStr); + const filePath = path.join(dir, entry); + const data = await readJson(filePath).catch(() => null); + if (!data) continue; + if (!requests.has(id)) { + requests.set(id, []); + } + requests.get(id).push({ stage, data, filePath }); + } + + const ids = Array.from(requests.keys()) + .sort((a, b) => b - a) + .slice(0, limit); + return ids.map((id) => ({ id, stages: requests.get(id) || [] })); +} + +function printSummary(requests) { + for (const { id, stages } of requests) { + console.log(`\n# Request ${id}`); + for (const { stage, data, filePath } of stages.sort((a, b) => a.stage.localeCompare(b.stage))) { + const summary = summarizeStage(stage, data); + console.log(`- stage: ${summary.stage} (${filePath})`); + console.log(` timestamp: ${summary.timestamp || "n/a"}`); + console.log( + ` model: ${summary.model || "n/a"}${summary.originalModel ? ` (orig ${summary.originalModel})` : ""}`, + ); + console.log(` prompt_cache_key: ${summary.promptCacheKey || "n/a"}`); + console.log(` inputLength: ${summary.inputLength ?? "n/a"}`); + if (summary.roles.length) { + console.log(` roles: ${summary.roles.join(", ")}`); + } + if (summary.reasoning?.effort || summary.reasoning?.summary) { + console.log( + ` reasoning: effort=${summary.reasoning.effort || "?"}, summary=${summary.reasoning.summary || "?"}`, + ); + } + if (summary.textVerbosity) { + console.log(` text verbosity: ${summary.textVerbosity}`); + } + if (Array.isArray(summary.include)) { + console.log(` include: ${summary.include.join(", ")}`); + } + if (summary.usage?.cached_tokens !== undefined) { + console.log(` cached_tokens: ${summary.usage.cached_tokens}`); + } + } + } +} + +async function main() { + const { dir, limit, id, stage } = parseFilters(); + try { + const requests = await collectLogs(dir, stage, id, limit); + if (requests.length === 0) { + console.log("No request logs found."); + return; + } + printSummary(requests); + } catch (error) { + console.error(`Failed to process logs: ${error.message}`); + process.exitCode = 1; + } +} + +main(); diff --git a/spec/auto-compaction-summary.md b/spec/auto-compaction-summary.md deleted file mode 100644 index 26cf853..0000000 --- a/spec/auto-compaction-summary.md +++ /dev/null @@ -1,32 +0,0 @@ -# Auto Compaction Summary Delivery - -## Context -- Users report that after OpenCode auto compaction fires, Codex-based agents respond with messages like `I don’t see the “above summary” you mentioned`, meaning the summarised context never reaches the model. -- CODEX_MODE currently strips any developer/system message that matches the auto-compaction heuristic in `filterOpenCodeSystemPrompts`, so the summary payload gets dropped before the bridge prompt or user instruction runs. - -## Affected Code -- `lib/request/request-transformer.ts:539-592` — `filterOpenCodeSystemPrompts()` removes messages detected by `isOpenCodeCompactionPrompt`, with no sanitisation or pass-through, so summaries disappear altogether. -- `test/request-transformer.test.ts:505-583` — lacks coverage for compaction prompts, so regressions around summary preservation go unnoticed. - -## External Signals -- GitHub issue [sst/opencode#2945](https://github.com/sst/opencode/issues/2945) discusses context loss after compaction and gives us a user-facing reproduction. -- Direct user transcript provided in this task highlights Codex replying “I don’t see the above summary,” confirming summaries are filtered before they ever reach the agent. - -## Requirements -1. Detect OpenCode compaction prompts but **sanitize** them instead of wholesale removal: - - Keep the actual summary text in the conversation. - - Strip only noisy guidance about nonexistent summary files or paths. - - Maintain developer-role metadata so downstream logic (bridge prompt injection, etc.) still works. -2. If a compaction prompt contains nothing except invalid file instructions, drop it to avoid confusing the agent. -3. Add regression tests covering: - - Summary text survives compaction filtering while path instructions are removed. - - Pure file-instruction prompts (no summary content) are still discarded. -4. Document behaviour inline so future updates know why compaction prompts are rewritten rather than discarded. - -## Definition of Done -- Running `npm test` locally covers the new cases and passes. -- Auto-compaction messages in live sessions now show summaries instead of “missing summary” errors, verified by inspecting transformed input in unit tests (and optionally via manual logging). -- Spec updated with decisions (this file) and commit references once implemented. - -## Changelog -- 2025-11-16: Implemented sanitized compaction prompt handling, preserved summaries, and added regression tests covering both summary retention and pure instruction drops. diff --git a/spec/cache-analysis.md b/spec/cache-analysis.md index 15ea8e9..a10029e 100644 --- a/spec/cache-analysis.md +++ b/spec/cache-analysis.md @@ -1,9 +1,11 @@ # Cache Comparison Analysis Spec ## Objective + Summarize caching behaviors across this plugin, the upstream `openai/codex` CLI, and the `sst/opencode` runtime to identify potential cache correctness issues (prompt caching, instruction caching, session reuse) that could affect bridging Codex into OpenCode. ## Code References + - `lib/cache/session-cache.ts:32-114` – local TTL-based session cache implementation with eviction metrics hooks. - `lib/cache/cache-warming.ts:30-151` – startup warming sequence and warm-state probes. - `lib/prompts/codex.ts:20-158` – GitHub-backed Codex instruction caching (15 min TTL, release tag probes, bundled fallback). @@ -13,31 +15,36 @@ Summarize caching behaviors across this plugin, the upstream `openai/codex` CLI, - `index.ts:124-211` – how warm caches + session manager integrate into fetch flow. ### Upstream `openai/codex` + - `codex-rs/core/src/client.rs:L246-L268` – always attaches `prompt_cache_key` = conversation ID; handles `store` toggling per provider. - `codex-rs/core/src/client_common.rs:L276-L331` – payload structure includes `prompt_cache_key`, `store`, reasoning + verbosity defaults. - `codex-rs/core/tests/suite/prompt_caching.rs:L481-L640` – reference behavior for cache reuse, prefix consistency, and overrides. - `codex-rs/core/src/conversation_manager.rs:L96-L251` – lifecycle for sessions, fork handling, and history reuse guarantees. ### `sst/opencode` + - `packages/opencode/src/provider/transform.ts:L86-L118` – provider option shaping, automatic `promptCacheKey` assignment, and runtime-specific defaults (`include`, `reasoningSummary`). ## Existing Issues / PRs + - Issues are disabled on this repository (`gh issue list -L 5`). -- Open PRs: #2 `this is a thing` (branch `bug-fix/compaction`, opened 2025-11-11T01:50:35Z). ## Requirements + 1. Map cache responsibilities for instructions, prompts, and session state across all three runtimes. 2. Highlight behavioral gaps where this plugin diverges from Codex CLI guarantees (e.g., prompt prefix stability, `prompt_cache_key` management, TTL policies). 3. Contrast with OpenCode runtime expectations (session IDs, provider defaults) to flag integration risks. 4. Produce actionable list of potential caching issues plus validation steps. ## Definition of Done + - Written comparison covering instruction caching, prompt caching, bridge prompt deduping, and session cache key management. - At least three concrete issue hypotheses backed by file references (include upstream references where applicable). - Recommendations for instrumentation or tests to validate each hypothesis. - Proposed validation/mitigation steps align with both Codex CLI behavior and OpenCode runtime constraints. ## Plan (Phases) + 1. **Discovery** – Review this repo's cache modules, session manager, and request transformer (completed per references above). 2. **Upstream Baseline** – Document how `openai/codex` handles prompt caching/session reuse (client + tests reviewed). 3. **Runtime Contrast** – Capture relevant parts of `sst/opencode` provider transformations impacting caching. @@ -48,16 +55,19 @@ Summarize caching behaviors across this plugin, the upstream `openai/codex` CLI, ## Phase 3 – Runtime Contrast Findings ### Instruction Caching Responsibilities + - **Plugin (`lib/prompts/codex.ts:20-158`)** – Fetches the Codex CLI instructions straight from the latest GitHub release, writes them to `~/.opencode/cache/codex-instructions.md`, and mirrors them into an in-memory session cache under both the release-specific key (`codex:{etag}:{tag}`) and the sentinel key `"latest"`. Cache warming (`lib/cache/cache-warming.ts:30-94`) simply calls these fetchers and records the result, so the plugin is responsible for both persistence and warm-up heuristics. - **Codex CLI (`codex-rs/core/src/client.rs`, payload builder around `ResponsesApiRequest`)** – Ships the instructions with the binary and always injects them per request via `prompt.get_full_instructions()`. There is no runtime fetch, which means the CLI never risks network failures while pulling the prompt, but it also means end users must upgrade the CLI to pick up a new instruction release. - **OpenCode Runtime (`packages/opencode/src/provider/transform.ts`)** – Delegates instruction management entirely to the provider. The runtime does not attempt to cache Codex instructions; it only sets provider options (e.g., `promptCacheKey`, `include`, `reasoningSummary`). When this plugin is active, OpenCode expects the provider (us) to guarantee that instruction caching matches Codex's expectations. ### Prompt + Session Caching Responsibilities + - **Codex CLI** – Always sets `prompt_cache_key = conversation_id` on every Responses API call and keeps the entire prefix (instructions + environment context + full history) byte-identical between turns (`codex-rs/core/tests/suite/prompt_caching.rs`). This guarantees that the backend cache can reuse encrypted reasoning and prefix tokens whenever the key repeats. -- **OpenCode Runtime** – Uses `ProviderTransform.options()` to set `promptCacheKey = sessionID` (for both the built-in OpenCode provider and OpenAI-compatible providers) and to force `store: false`, `include: ["reasoning.encrypted_content"]`, and `reasoningSummary: "auto"` when targeting gpt-5-family models. OpenCode *assumes* that the downstream provider will faithfully reuse the prefix that corresponds to this key. +- **OpenCode Runtime** – Uses `ProviderTransform.options()` to set `promptCacheKey = sessionID` (for both the built-in OpenCode provider and OpenAI-compatible providers) and to force `store: false`, `include: ["reasoning.encrypted_content"]`, and `reasoningSummary: "auto"` when targeting gpt-5-family models. OpenCode _assumes_ that the downstream provider will faithfully reuse the prefix that corresponds to this key. - **Plugin** – Extracts host-provided keys from either `prompt_cache_key`, `promptCacheKey`, or nested metadata (`lib/request/request-transformer.ts:692-706`) and, if prompt caching is enabled, replaces them with a sanitized per-session key maintained by `SessionManager` (`lib/session/session-manager.ts:117-214`). Prefix tracking relies on the exact JSON structure of the filtered input (`filterInput` strips IDs but leaves metadata intact), and prefix mismatches trigger a new random `prompt_cache_key` via `resetSessionInternal()`. ### Cache Warm / Diagnostics Responsibilities + - `warmCachesOnStartup()` cleans expired entries, fetches Codex + OpenCode prompts, and records which cache warmed (`lib/cache/cache-warming.ts:30-94`). - `areCachesWarm()` and the `/codex-metrics` command rely on sentinel keys (`"latest"` and `"main"`) instead of TTL metadata, so a cache entry is considered warm as long as it still lives in memory, regardless of whether the underlying ETag is stale. - `getCacheWarmingStats()` currently re-invokes the fetchers, which can trigger additional network requests even when the caller only needs a snapshot—unlike the Codex CLI, which never has to re-fetch instructions for diagnostics. @@ -67,7 +77,8 @@ Summarize caching behaviors across this plugin, the upstream `openai/codex` CLI, ## Phase 4 – Synthesis & Issue Hypotheses ### 1. Prompt caching is opt-in, unlike Codex CLI defaults -- **Evidence**: `index.ts:123-125` instantiates `SessionManager` with `enabled = pluginConfig.enablePromptCaching ?? false`, so caching is *disabled* unless users flip a config switch. Codex CLI always attaches a `prompt_cache_key` (`codex-rs/core/src/client.rs`, `ResponsesApiRequest` builder) and therefore guarantees cache reuse. + +- **Evidence**: `index.ts:123-125` instantiates `SessionManager` with `enabled = pluginConfig.enablePromptCaching ?? false`, so caching is _disabled_ unless users flip a config switch. Codex CLI always attaches a `prompt_cache_key` (`codex-rs/core/src/client.rs`, `ResponsesApiRequest` builder) and therefore guarantees cache reuse. - **Risk**: Any OpenCode workflow that forgets to set `promptCacheKey` (custom providers, tests, future refactors) will run fully stateless through this plugin, even though we aggressively strip IDs and system prompts. That yields zero cache hits and higher token usage than either OpenCode or the Codex CLI expect. - **Mitigation / Validation**: - Default `enablePromptCaching` to `true`, or automatically fall back to `SessionManager` when no host key is present. @@ -76,6 +87,7 @@ Summarize caching behaviors across this plugin, the upstream `openai/codex` CLI, - **Mitigation status**: Implemented via `index.ts:121-130` (prompt caching default + warning) and `lib/request/request-transformer.ts:645-712` (auto-derives or generates `prompt_cache_key`), with regression tests in `test/request-transformer.test.ts:546-586`. ### 2. Prefix comparisons include volatile metadata, causing spurious cache resets + - **Evidence**: `sharesPrefix()` (`lib/session/session-manager.ts:38-57`) uses `JSON.stringify` over the entire filtered input. `filterInput()` (`lib/request/request-transformer.ts:389-412`) removes IDs but keeps every `metadata` object untouched. OpenCode frequently stamps messages with per-turn metadata (trace IDs, sandbox policy diffs, file lists), so two logically identical prefixes may fail the byte-for-byte comparison even though only metadata changed. Codex CLI avoids this by constructing the prefix itself (see the `prompt_caching.rs` tests verifying exact prefix reuse even when environment overrides apply). - **Risk**: Every metadata mutation forces `SessionManager` to call `resetSessionInternal(..., true)`, generating a brand-new `prompt_cache_key`. That makes cache hit rates fall toward zero, exactly the problem the CLI tests guard against. - **Mitigation / Validation**: @@ -85,6 +97,7 @@ Summarize caching behaviors across this plugin, the upstream `openai/codex` CLI, - **Mitigation status**: `filterInput()` now removes metadata when operating in stateless mode (`lib/request/request-transformer.ts:389-421`) and `test/request-transformer.test.ts:245-280` guards the behavior; IDs/metadata are only preserved when `preserveIds` is true to keep host-managed sessions stable. ### 3. Session cache never evicts, diverging from Codex conversation lifecycle + - **Evidence**: `SessionManager` stores every conversation in an in-memory `Map` with no TTL or size cap (`lib/session/session-manager.ts:108-214, 273-313`). There is no `remove` call anywhere in the plugin. In contrast, the Codex CLI `ConversationManager` exposes `remove_conversation()` (`codex-rs/core/src/conversation_manager.rs`) and reuses conversations created via CLI flows, so memory usage is bounded by active chats. - **Risk**: An OpenCode user who runs many short-lived sessions (e.g., multiple `opencode run` commands) will accumulate unbounded session state inside the plugin process, eventually degrading cache lookup time or exhausting memory. Worse, `/codex-metrics` will report stale "recent sessions" even after the conversations are gone, obscuring real cache health. - **Mitigation / Validation**: @@ -94,6 +107,7 @@ Summarize caching behaviors across this plugin, the upstream `openai/codex` CLI, - **Mitigation status**: `lib/session/session-manager.ts:1-215` now enforces `SESSION_IDLE_TTL_MS` + `SESSION_MAX_ENTRIES`, pruning maps on every `getContext()` call, and `test/session-manager.test.ts:152-197` verifies idle/overflow eviction scenarios. ### 4. Diagnostics may trigger unwanted network fetches + - **Evidence**: `getCacheWarmingStats()` (`lib/cache/cache-warming.ts:121-149`) calls `getCodexInstructions()` and `getOpenCodeCodexPrompt()`, both of which perform ETag-guarded network requests when TTL has expired. Codex CLI diagnostics run entirely offline because the instructions are bundled. - **Risk**: Invoking a diagnostics endpoint (or `/codex-metrics` once it exposes warm stats) could accidentally spam GitHub, undermining the "zero network" goal of the command and masking real cold-start issues. - **Mitigation / Validation**: diff --git a/spec/complexity-reduction.md b/spec/complexity-reduction.md index 0174bfd..0b02329 100644 --- a/spec/complexity-reduction.md +++ b/spec/complexity-reduction.md @@ -27,6 +27,6 @@ ## Plan (Phases) 1. **Prompt Fetchers**: Refactor `getCodexInstructions` and `getOpenCodeCodexPrompt` by extracting helper routines for cache reads/writes, freshness checks, and network fetch handling to reduce branching. -2. **Request Transformation**: Break down `transformRequestForCodex` and tool normalization into smaller helpers (e.g., compaction config, logging wrappers, tool converters) to simplify flow. +2. **Request Transformation**: Break down `transformRequestForCodex` and tool normalization into smaller helpers (e.g., logging wrappers, tool converters) to simplify flow. 3. **Error/Reasoning Handling**: Simplify `handleErrorResponse` and `getReasoningConfig` with helper functions and clearer rule tables; ensure messaging and rate-limit parsing stay intact. 4. **Validation**: Run targeted lint/tests to confirm complexity warnings resolved and behavior intact. diff --git a/spec/issue-triage-2025-11-20.md b/spec/issue-triage-2025-11-20.md deleted file mode 100644 index 333e4e9..0000000 --- a/spec/issue-triage-2025-11-20.md +++ /dev/null @@ -1,23 +0,0 @@ -# Issue Triage — 2025-11-20 - -Scope: Verify status of open issues #6, #23, #22, #21, #39, #24, #40 against current main branch. - -## Findings - -- #23 SessionManager fork sync — Not done. `lib/session/session-manager.ts` extractForkIdentifier only checks `forkId|fork_id|branchId|branch_id` (lines ~120-143); does not consider `parentConversationId|parent_conversation_id` used in prompt cache derivation. -- #22 Compaction metadata flag — Not done. `lib/request/input-filters.ts` uses regex heuristics only to detect OpenCode compaction prompts (lines ~82-139); no metadata flag preferred path. -- #21 Summary-aware tail extraction — Not done. `lib/compaction/codex-compaction.ts` `extractTailAfterSummary` returns slice from last `user` message (lines ~120-129); no summary marker awareness. -- #24 Tests clarify tail semantics — Not done. `test/codex-compaction.test.ts` still names test "extracts tail after the latest user summary message" and asserts last-user behavior (lines ~80-89). -- #39 README installation section missing — Not done. README links to `#installation` (e.g., line ~531) but no `## Installation` heading exists. -- #40 Model stats HTML dashboard server — Not started. No references to "dashboard"/"stats html" in repo. -- #6 Richer metrics/inspect commands — Still blocked by upstream; no new implementation detected. - -## Definition of Done (per issue) - -- #23: Session key fork detection matches prompt cache fork hints (`parentConversationId` variants) with tests. -- #22: Input filtering prefers explicit metadata flag for OpenCode compaction prompts, falling back to heuristics. -- #21: Tail extraction skips summary-marked items; tests updated. -- #24: Tests renamed/rewritten to reflect current semantics and cover summary-aware path once added. -- #39: README gains actual Installation section and linked anchor. -- #40: Dashboard server implemented or scoped; code/tests/docs added. -- #6: Upstream dependency resolved; enhanced metrics/inspect commands implemented and tested. diff --git a/spec/log-warnings-default-file-only.md b/spec/log-warnings-default-file-only.md new file mode 100644 index 0000000..d06001b --- /dev/null +++ b/spec/log-warnings-default-file-only.md @@ -0,0 +1,39 @@ +# Log Warnings Default to File + +## Context + +OpenCode renders console warnings inline, causing severe UI clutter (see reported screenshot). The plugin currently logs warnings to console by default (e.g., personal-use notice in `index.ts:69-73` and other `logWarn` calls), even when request logging/debug flags are off. We need the default behavior to keep warnings out of the UI while still recording them to disk/app logs as appropriate. + +## Relevant Files & Pointers + +- `lib/logger.ts:12-111` — env defaults and logging flags; `WARN_TOASTS_ENABLED` and console toggles derived here. +- `lib/logger.ts:172-211` — `emit` decides forwarding to app log, toasts, and console (warnings currently mirrored to console by default). +- `lib/logger.ts:284-319` — `logToConsole` behavior; logs warn/error unconditionally. +- `index.ts:66-118` — plugin boot emits personal-use warning via `logWarn` after logger configuration. +- `lib/types.ts:26-39` — `LoggingConfig` fields (currently no toggle for console warnings). +- `test/logger.test.ts:140-229` — expectations for warn behavior (console, toasts) that will need updates. +- `lib/config.ts:12-18` — default config includes `logging.showWarningToasts: false`. + +## Existing Issues / PRs + +- None identified in repo related to warning display/logging defaults. + +## Definition of Done + +- Warning logs are not sent to console/UI by default; they are recorded to file/app logs without cluttering the terminal. +- Opt-in mechanism exists to surface warning logs to console/UX when desired. +- Personal-use and other warning emissions follow the new default and do not regress logging reliability. +- Tests updated/added to cover the new default and opt-in paths. + +## Requirements + +- Default: warnings persist to disk/app logs without console output; errors remain console-visible. +- Provide a config/env switch to re-enable console warnings for debugging or when toasts are desired. +- Preserve existing toast support (`showWarningToasts`) and avoid duplicate surfaces (toast + console). +- Maintain existing log rotation/queue behaviors and non-intrusive behavior in test envs. + +## Plan (Phases) + +- **Phase 1: Analysis** — Confirm logger state derivations and warning pathways; decide switch shape (config/env) to keep warn off console by default while allowing opt-in. +- **Phase 2: Implementation** — Update logger defaults/emit logic + config schema to make warn-to-console opt-in and ensure file/app logging retains warnings. +- **Phase 3: Validation** — Refresh tests for new defaults and opt-in behavior; run targeted logger suite (and related) to ensure changes pass. diff --git a/spec/merge-conflict-resolution.md b/spec/merge-conflict-resolution.md index 55927e2..90ecd40 100644 --- a/spec/merge-conflict-resolution.md +++ b/spec/merge-conflict-resolution.md @@ -1,6 +1,7 @@ # Merge Conflict Resolution Plan (ops/release-workflow) ## Context + - Branch: `ops/release-workflow` with merge state and unmerged paths. - Conflicted files (from `git diff --name-only --diff-filter=U`): - `.github/workflows/pr-auto-base.yml` @@ -18,17 +19,19 @@ - `test/session-manager.test.ts` ## Notable conflict locations (line references from current workspace) + - `index.ts`: bridge fetch creation formatting and indentation around ~126-148. - `lib/logger.ts`: toast/app log forwarding logic around ~142-178. - `lib/prompts/codex.ts`: cache metadata handling and ETag logic around ~177-270. - `lib/prompts/opencode-codex.ts`: cache migration/ETag fetch helpers around ~88-357. -- `lib/request/fetch-helpers.ts`: compaction settings and error enrichment around ~166-470. -- `lib/request/request-transformer.ts`: imports, compaction, prompt cache key, bridge/tool injection across file (multiple conflicts starting near top and ~620-1210). +- `lib/request/fetch-helpers.ts`: settings and error enrichment around ~166-470. +- `lib/request/request-transformer.ts`: imports, prompt cache key, bridge/tool injection across file (multiple conflicts starting near top and ~620-1210). - Workflows: `pr-auto-base.yml` trigger/permissions/checkout around ~5-53; `staging-release-prep.yml` release branch/tag creation and PR automation around ~25-296. - Config/test files: `eslint.config.mjs` test overrides (~95-100); `test/logger.test.ts` toast/console expectations (~1-190); `test/session-manager.test.ts` metrics variable naming (~159-165); `package.json` & `package-lock.json` version bump (0.3.0 vs 0.2.0). ## Definition of Done -- All merge conflicts resolved with cohesive logic that preserves newer behaviors (cache handling, logging/toast routing, compaction settings, workflow automation, version 0.3.0). + +- All merge conflicts resolved with cohesive logic that preserves newer behaviors (cache handling, logging/toast routing, workflow automation, version 0.3.0). - TypeScript sources compile conceptually (no mixed indentation or stale references). - Package metadata consistent across `package.json` and `package-lock.json`. - Workflow YAML passes basic syntax review. @@ -36,24 +39,29 @@ - `git status` clean of conflict markers; ready for commit. ## Plan (phased) + ### Phase 1 – Workflows & Config + - Merge `.github/workflows/pr-auto-base.yml` to include checkout + sync/reopen triggers, correct permissions, GH repo usage. - Merge `.github/workflows/staging-release-prep.yml` retaining branch/tag push and auto-merge reviewer steps. - Restore `eslint.config.mjs` test overrides for max-lines. ### Phase 2 – Core Source Merges + - Align `index.ts` fetch creator call with repository style (spaces, no tabs). - Resolve `lib/logger.ts` to avoid duplicate warn logging when toast available while still forwarding error logging. - Merge `lib/prompts/codex.ts` with unified cache metadata handling and fallback semantics. - Merge `lib/prompts/opencode-codex.ts` using fresh cache/ETag helpers and migration checks. -- Merge `lib/request/fetch-helpers.ts` compaction settings builder and enriched error handling using helper functions. -- Merge `lib/request/request-transformer.ts` (imports, prompt cache handling, compaction options, bridge/tool injection) ensuring Codex-mode defaults and logging. +- Merge `lib/request/fetch-helpers.ts` settings builder and enriched error handling using helper functions. +- Merge `lib/request/request-transformer.ts` (imports, prompt cache handling, bridge/tool injection) ensuring Codex-mode defaults and logging. ### Phase 3 – Packages & Tests + - Set version to 0.3.0 in `package.json` and `package-lock.json`; keep dependency blocks aligned. - Update `test/logger.test.ts` to match toast + logging behavior and `OpencodeClient` typing. - Fix `test/session-manager.test.ts` minor variable naming conflict. ### Phase 4 – Verification + - Run targeted tests if time allows (logger/session transformer) via `npm test -- logger` subset or full `npm test` if feasible. - Final `git status` check for cleanliness. diff --git a/spec/open-issues-triage.md b/spec/open-issues-triage.md deleted file mode 100644 index 869d84c..0000000 --- a/spec/open-issues-triage.md +++ /dev/null @@ -1,187 +0,0 @@ -# Open Issues Triage Analysis - -**Date**: 2025-11-19 -**Repository**: open-hax/codex -**Total Open Issues**: 10 - -## Proposed Labels - -### Topic Labels - -- `authentication` - OAuth, token management, cache file conflicts -- `session-management` - SessionManager, prompt cache keys, fork handling -- `compaction` - Conversation compaction, summary handling -- `model-support` - New model variants, normalization -- `metrics` - Request inspection, performance metrics -- `documentation` - README updates, package naming - -### Priority Labels - -- `priority-high` - Breaking bugs, critical functionality -- `priority-medium` - Important features, significant improvements -- `priority-low` - Minor enhancements, documentation fixes - -### Effort Labels - -- `effort-small` - < 4 hours, simple changes -- `effort-medium` - 4-12 hours, moderate complexity -- `effort-large` - > 12 hours, complex implementation - ---- - -## Issue Triage Details - -### #26: Feature: Add support for GPT-5.1-Codex-Max model - -**Labels**: `model-support`, `priority-medium`, `effort-small` -**Related Files**: - -- `lib/request/request-transformer.ts:217-244` - Model normalization logic -- `test/request-transformer.test.ts:50-120` - Model normalization tests - -### #25: [BUG] Plugin fails with confusing errors if started with the other oauth plugin's cache files - -**Labels**: `authentication`, `priority-high`, `effort-medium` -**Related Files**: - -- `lib/auth/auth.ts:31-69` - Token validation and refresh logic -- `lib/cache/session-cache.ts` - Cache file handling -- `lib/prompts/codex.ts:79-146` - Cache file operations - -### #24: Tests: clarify extractTailAfterSummary semantics in codex-compaction - -**Labels**: `compaction`, `priority-low`, `effort-small` -**Related Files**: - -- `lib/compaction/codex-compaction.ts:119` - extractTailAfterSummary function -- `test/codex-compaction.test.ts:86-93` - Related tests - -### #23: SessionManager: align fork identifier with prompt cache fork hints - -**Labels**: `session-management`, `priority-medium`, `effort-medium` -**Related Files**: - -- `lib/session/session-manager.ts:139-395` - SessionManager implementation -- `lib/request/request-transformer.ts:755-925` - Fork handling and cache key logic -- `test/session-manager.test.ts:161-181` - Fork session tests - -### #22: Compaction heuristics: prefer explicit metadata flag for OpenCode prompts - -**Labels**: `compaction`, `priority-medium`, `effort-medium` -**Related Files**: - -- `lib/request/request-transformer.ts:442-506` - OpenCode prompt filtering -- `lib/compaction/codex-compaction.ts` - Compaction logic -- `test/request-transformer.test.ts:596-624` - Compaction integration tests - -### #21: Compaction: make extractTailAfterSummary summary-aware - -**Labels**: `compaction`, `priority-medium`, `effort-medium` -**Related Files**: - -- `lib/compaction/codex-compaction.ts:119` - Core function -- `lib/compaction/compaction-executor.ts:1-45` - Compaction execution -- `test/codex-compaction.test.ts:86-93` - Function tests - -### #6: Feature: richer Codex metrics and request inspection commands - -**Labels**: `metrics`, `priority-medium`, `effort-large` -**Related Files**: - -- `lib/commands/codex-metrics.ts:1-343` - Metrics command implementation -- `lib/cache/cache-metrics.ts` - Cache metrics collection -- `test/codex-metrics-command.test.ts:1-342` - Comprehensive tests - -### #5: Feature: Codex-style conversation compaction and auto-compaction in plugin - -**Labels**: `compaction`, `priority-high`, `effort-large` -**Related Files**: - -- `lib/compaction/compaction-executor.ts:1-45` - Auto-compaction logic -- `lib/request/fetch-helpers.ts:120-185` - Compaction integration -- `lib/session/session-manager.ts:296-313` - Compaction state management -- `test/compaction-executor.test.ts:11-131` - Compaction tests - -### #4: Feature: fork-aware prompt_cache_key handling and overrides - -**Labels**: `session-management`, `priority-high`, `effort-large` -**Related Files**: - -- `lib/request/request-transformer.ts:755-1036` - Fork-aware cache key logic -- `lib/session/session-manager.ts:83-206` - Session ID derivation -- `test/request-transformer.test.ts:715-850` - Cache key tests -- `test/session-manager.test.ts:161-181` - Fork session tests - -### #11: Docs: Fix package name in test/README.md - -**Labels**: `documentation`, `priority-low`, `effort-small` -**Related Files**: - -- `test/README.md:1-4` - Package name reference - ---- - -## Priority Summary - -### High Priority (3 issues) - -- #25: OAuth cache file conflicts (bug) -- #5: Auto-compaction implementation (feature) -- #4: Fork-aware cache keys (feature) - -### Medium Priority (5 issues) - -- #26: GPT-5.1-Codex-Max support (feature) -- #23: SessionManager fork alignment (feature) -- #22: Compaction metadata flags (feature) -- #21: Summary-aware compaction (feature) -- #6: Enhanced metrics (feature) - -### Low Priority (2 issues) - -- #24: Test clarification (maintenance) -- #11: Documentation fix (maintenance) - -## Effort Distribution - -### Large Effort (>12 hours): 3 issues - -- #6: Enhanced metrics and inspection -- #5: Auto-compaction implementation -- #4: Fork-aware cache key handling - -### Medium Effort (4-12 hours): 5 issues - -- #25: OAuth cache file conflicts -- #23: SessionManager fork alignment -- #22: Compaction metadata flags -- #21: Summary-aware compaction -- #26: GPT-5.1-Codex-Max support - -### Small Effort (<4 hours): 2 issues - -- #24: Test clarification -- #11: Documentation fix - -## Topic Distribution - -- Session Management: 2 issues (#4, #23) -- Compaction: 4 issues (#5, #21, #22, #24) -- Authentication: 1 issue (#25) -- Model Support: 1 issue (#26) -- Metrics: 1 issue (#6) -- Documentation: 1 issue (#11) - -## Recommendations - -1. **Immediate Focus**: Address #25 (OAuth cache conflicts) as it's a breaking bug -2. **Strategic Features**: Prioritize #4 and #5 for core functionality improvements -3. **Quick Wins**: Complete #11 and #24 for immediate closure -4. **Incremental Development**: #21, #22, #23 can be tackled in sequence as they're related -5. **Future Enhancement**: #6 and #26 can be scheduled for future releases - -## Cross-Dependencies - -- #4 (fork-aware cache keys) enables #23 (SessionManager alignment) -- #21 and #22 both enhance compaction heuristics and should be coordinated -- #5 depends on improvements from #21 and #22 for optimal implementation diff --git a/spec/plugin-log-settings-doc.md b/spec/plugin-log-settings-doc.md new file mode 100644 index 0000000..cfe719b --- /dev/null +++ b/spec/plugin-log-settings-doc.md @@ -0,0 +1,24 @@ +# Plugin log settings docs update + +## Goal + +Add logging settings to the README Plugin-Level Settings section so users see rolling log controls alongside existing plugin config options. + +## References + +- README.md: Plugin-Level Settings section starts at ~49-63. +- docs/configuration.md: Plugin Configuration and Log file management at ~373-420. +- spec/environment-variables.md: notes logging env vars overrideable via ~/.opencode/openhax-codex-config.json (~43). + +## Definition of Done + +- README Plugin-Level Settings enumerates logging controls (max bytes/files/queue) available via plugin configuration/environment variables. +- Example updated/expanded to show logging block usage. +- Documentation consistent with docs/configuration.md values and defaults. +- No broken markdown formatting. + +## Plan + +- Phase 1: Align on messaging by pulling log setting names/defaults from docs/configuration.md. +- Phase 2: Update README Plugin-Level Settings bullet list and example to include logging settings. +- Phase 3: Self-review for clarity/consistency; no tests needed (docs-only). diff --git a/spec/pr-2-conflict-analysis.md b/spec/pr-2-conflict-analysis.md deleted file mode 100644 index 040c6e1..0000000 --- a/spec/pr-2-conflict-analysis.md +++ /dev/null @@ -1,24 +0,0 @@ -# PR #2 Conflict Analysis - -## Context -- Local work was done on `feature/review-automation`, then `git fetch && git merge main` was executed from that branch. -- `main` in the local worktree had not been updated since before commit `f3dd0e160cddbd2f08aa4294bd5b007d6b79d18b` ("Automate CI and review workflows"), so merging it brought in no new changes. -- `git checkout main` now shows `Your branch is behind 'origin/main' by 1 commit`, confirming that the local `main` is stale relative to `origin/main`. -- PR #2 (`bug-fix/compaction` → `main`) must merge into `origin/main`, which already contains the CI automation changes above; because `feature/review-automation` has not incorporated that commit, GitHub still flags conflicts. - -## Code References -- `.github/workflows/ci.yml:1` – workflow rewritten in commit `f3dd0e1`; PR #2 still has the previous structure. -- `scripts/detect-release-type.mjs:1` – new script created in the same commit that the feature branch is missing. -- `pnpm-lock.yaml:1` – lockfile introduced in `origin/main`; branch still tracks the removed `bun.lock` / `package-lock.json`, so GitHub reports conflicts in those files. - -## Existing Issues / PRs -- PR #2 "this is a thing" (head: `bug-fix/compaction`, base: `main`). - -## Definition of Done -- Explain why GitHub reports conflicts even though `git merge main` on the feature branch says "Already up to date". -- Provide concrete steps to sync the branch with the true base (`origin/main`) so that the PR no longer conflicts. - -## Requirements -1. Update local `main` with `git checkout main && git pull --ff-only origin main`. -2. Rebase or merge `origin/main` into `feature/review-automation` (or `bug-fix/compaction`, depending on the PR head) so that commit `f3dd0e1` and its files are present locally. -3. Resolve resulting conflicts locally (expect them in `.github/workflows/ci.yml`, `package-lock.json`, `.gitignore`, etc.), run tests, and push the updated branch. diff --git a/spec/pr-20-review.md b/spec/pr-20-review.md deleted file mode 100644 index 03eaa24..0000000 --- a/spec/pr-20-review.md +++ /dev/null @@ -1,28 +0,0 @@ -# PR 20 Review Tracking - -## Code files referenced - -- `test/plugin-config.test.ts:45-124` – validate that the two error-handling tests are de-duplicated, single `consoleSpy` call is scoped, and asserts match the extended default config shape (`enableCodexCompaction`, `autoCompactMinMessages`). -- `lib/request/fetch-helpers.ts:136-155` – ensure `applyCompactedHistory` is guarded by `compactionEnabled` and does not run when `pluginConfig.enableCodexCompaction === false`. -- `lib/request/request-transformer.ts:71-83` – keep `computeFallbackHashForBody` resilient to non-serializable metadata by wrapping the stringification in a `try/catch` and falling back to a stable seed (e.g., the normalized model name). -- `lib/request/request-transformer.ts:560-665` – preserve the compaction prompt sanitization heuristics while watching for future false positives (optional follow up). - -## Existing issues - -- `https://github.com/open-hax/codex/pull/20` (device/stealth) has open review comments from coderabbit.ai about the plugin-config tests, compaction gating, and hashing robustness. The `coderabbit` review thread `PRR_kwDOQJmo4M7O5BH7` is marked as TODO. - -## Existing PRs referenced - -- `https://github.com/open-hax/codex/pull/20` - -## Definition of done - -1. All actionable review comments on PR #20 are resolved (tests updated, compaction gating fixed, fallback hashing hardened, or noted as intentional). -2. `npm test` (or equivalent targeted regex) passes locally, proving the test suite is consistent with the new expectations. -3. The spec and summary explain which comments were addressed and why. - -## Requirements - -- Stick to the Codex CLI roadmap (no new features beyond review fixes). -- Do not revert or discard unrelated branch changes minted earlier in `device/stealth`. -- Maintain lint/format output (current `pnpm lint` steps already run by CI). Keep new tests minimal. diff --git a/spec/pr-29-review-analysis.md b/spec/pr-29-review-analysis.md index ddfb23c..4f0be0e 100644 --- a/spec/pr-29-review-analysis.md +++ b/spec/pr-29-review-analysis.md @@ -26,15 +26,12 @@ PR #29 has **1 unresolved review thread** from `coderabbitai` containing **19 ac 4. **Fix Mock Leakage** - `test/index.test.ts:22-28, 93-121` - Reset `sessionManager` instance mocks in `beforeEach` to prevent cross-test leakage -5. **Add Missing Test Case** - `test/codex-fetcher.test.ts` - - Add direct `compactionDecision` test case coverage - -6. **Fix Redundant Tests** - `test/codex-fetcher.test.ts:272-287` +5. **Fix Redundant Tests** - `test/codex-fetcher.test.ts:272-287` - Either provide distinct inputs for short/long text scenarios or remove redundant test ### 🔧 **Code Quality Improvements** -7. **Logger Hardening** - `lib/logger.ts:138-159` +6. **Logger Hardening** - `lib/logger.ts:138-159` - Add try/catch around `JSON.stringify(extra)` to prevent logging failures - Remove unused `error` parameter from `logToConsole` diff --git a/spec/pr-commit-2025-11-21.md b/spec/pr-commit-2025-11-21.md new file mode 100644 index 0000000..153f403 --- /dev/null +++ b/spec/pr-commit-2025-11-21.md @@ -0,0 +1,32 @@ +# Commit & PR Plan (2025-11-21) + +## Summary + +- Prepare commit and PR for current dev branch changes: logging controls, session cache key handling, prompt filtering, and removal of legacy compaction artifacts plus related docs updates. + +## Code Files & Line References + +- lib/config.ts:12-18,52-59 – Default config now includes logging toggles (warning toasts, console mirroring) merged with user config. +- lib/logger.ts:12-34,84-113,175-218,231-289 – Environment-driven logging defaults, overrides via plugin config, toast/console behaviors, and wrap logic to avoid truncation. +- lib/request/input-filters.ts:18-48,72-189 – Input sanitization, OpenCode system prompt filtering, bridge prompt injection with caching/continuity. +- lib/session/session-manager.ts:18-203,232-480 – Session cache key derivation (conversation/fork), prefix mismatch handling with reset or fork, prompt cache key reuse, and metrics. +- lib/types.ts:6-41,91-110 – Plugin/logging config shape and request/session typings. +- test/logger.test.ts:67-223 – Coverage for env vs config logging, warning toasts/console mirroring, rotation/queue overflow, and test-only console behavior. +- test/request-transformer.test.ts:121-1200 – Extensive cases for model normalization, prompt cache keys, bridge vs tool remap, ID stripping, include fields, and fallback logging expectations. +- test/session-manager.test.ts:48-189 – Session reuse, prefix mismatch warnings (system change/history prune), cache key regeneration, forks, and metrics/eviction behaviors. +- Removed: lib/compaction/\*.ts, lib/prompts/codex-compaction.ts, lib/request/compaction-helpers.ts, spec/auto-compaction-summary.md, spec/remove-plugin-compaction.md, spec/issue-triage-2025-11-20.md, spec/open-issues-triage.md, spec/pr-2-conflict-analysis.md, spec/pr-20-review.md, spec/review-pr-20-plan.md. + +## Existing Issues / PRs + +- None identified from history; last main commit a455bd1 "chore: release v0.4.3". Proceed on current dev branch. + +## Requirements + +- User request: commit all current changes and open a new PR from dev branch. +- Preserve user-authored changes; do not revert. + +## Definition of Done + +- All listed changes staged and committed on dev branch. +- `npm test` succeeds (or any failures documented). +- New PR opened targeting main with summary of logging/session/prompt updates and compaction removal. diff --git a/spec/readme-cleanup.md b/spec/readme-cleanup.md index a95f7ce..4ae4745 100644 --- a/spec/readme-cleanup.md +++ b/spec/readme-cleanup.md @@ -43,5 +43,5 @@ - 2025-11-21: Added Installation section, renamed Configuration Reference, removed standalone requirements block, moved TOS near bottom, and updated related anchors in docs/config README files. - 2025-11-21: Promoted minimal provider config (plugin array + single `openai/gpt-5.1-codex-max` model with provider/openai options) to top of Installation and Configuration Reference. - 2025-11-21: Removed non-functional Built-in Codex Commands section pending upstream support. -- 2025-11-21: Surfaced plugin-level settings (codexMode, caching, compaction) immediately after Installation with example JSON. +- 2025-11-21: Surfaced plugin-level settings (codexMode, caching) immediately after Installation with example JSON. - 2025-11-21: Removed duplicated plugin-level settings block from Configuration Reference; now it links back to the top settings section. diff --git a/spec/remove-plugin-compaction.md b/spec/remove-plugin-compaction.md deleted file mode 100644 index dfc7ff9..0000000 --- a/spec/remove-plugin-compaction.md +++ /dev/null @@ -1,30 +0,0 @@ -# Remove plugin compaction - -## Scope - -Remove Codex plugin-specific compaction (manual + auto) so compaction is left to OpenCode or other layers. - -## Code refs (entry points) - -- lib/request/fetch-helpers.ts: compaction settings, detectCompactionCommand, pass compaction options to transform, track compactionDecision. -- lib/request/request-transformer.ts: applyCompactionIfNeeded, skip transforms when compactionDecision present. -- lib/request/compaction-helpers.ts: builds compaction prompt and decision logic. -- lib/compaction/codex-compaction.ts and lib/prompts/codex-compaction.ts: prompt content and helpers (detect command, approximate tokens, build summary). -- lib/compaction/compaction-executor.ts: rewrites responses and stores summaries. -- lib/session/session-manager.ts: applyCompactionSummary/applyCompactedHistory state injections. -- lib/request/input-filters.ts: compaction heuristics and metadata flags. -- lib/types.ts: plugin config fields for compaction. -- lib/request/codex-fetcher.ts: finalizeCompactionResponse usage. -- Tests: compaction-executor.test.ts, codex-compaction.test.ts, compaction-helpers.test.ts, codex-fetcher.test.ts, fetch-helpers.test.ts (compaction section), request-transformer.test.ts (compaction metadata), session-manager.test.ts (compaction state), docs README/configuration/getting-started. - -## Definition of done - -- Plugin no longer performs or triggers compaction (manual/auto) in request/response flow. -- Plugin config no longer exposes compaction knobs, docs updated accordingly. -- Tests updated/removed to reflect lack of plugin compaction. - -## Requirements - -- Preserve prompt caching/session behavior unrelated to compaction. -- Avoid breaking tool/transform flow; codex bridge still applied. -- Keep code ASCII and minimal surgical changes. diff --git a/spec/request-transformer-refactor.md b/spec/request-transformer-refactor.md index 4c5eefc..c2767ec 100644 --- a/spec/request-transformer-refactor.md +++ b/spec/request-transformer-refactor.md @@ -8,7 +8,7 @@ ## Relevant Code References -- `lib/request/request-transformer.ts` lines 1-1094: monolithic helpers for model normalization, reasoning config, input filtering, bridge/tool messages, compaction, prompt cache keys, and `transformRequestBody` entrypoint. +- `lib/request/request-transformer.ts` lines 1-1094: monolithic helpers for model normalization, reasoning config, input filtering, bridge/tool messages, prompt cache keys, and `transformRequestBody` entrypoint. - `lib/request/tool-normalizer.ts` lines 1-158: provides `normalizeToolsForResponses` used by transformer but not imported. - Tests mirror structure under `test/` (e.g., `test/request-transformer.test.ts`). @@ -23,8 +23,8 @@ ### Phase 1: Extraction Design -- Identify logical groupings (model/reasoning config, input filtering/bridge, compaction helpers, prompt cache key utilities, tool normalization usage, main transform orchestration). -- Decide target helper modules under `lib/request/` to move into (e.g., `model-config.ts`, `input-filters.ts`, `prompt-cache.ts`, `compaction-helpers.ts`). +- Identify logical groupings (model/reasoning config, input filtering/bridge, prompt cache key utilities, tool normalization usage, main transform orchestration). +- Decide target helper modules under `lib/request/` to move into (e.g., `model-config.ts`, `input-filters.ts`, `prompt-cache.ts`). ### Phase 2: Implement Refactors @@ -39,11 +39,11 @@ ## Notes -- Preserve existing behavior (stateless filtering, bridge prompt caching, compaction decisions, prompt cache key derivation). +- Preserve existing behavior (stateless filtering, bridge prompt caching, prompt cache key derivation). - Avoid altering public APIs consumed by tests unless necessary; adjust tests if import paths change. ## Change Log -- Split `lib/request/request-transformer.ts` into helper modules (`model-config.ts`, `input-filters.ts`, `prompt-cache.ts`, `compaction-helpers.ts`, `tooling.ts`) and re-exported APIs to keep the transformer under 500 lines. +- Split `lib/request/request-transformer.ts` into helper modules (`model-config.ts`, `input-filters.ts`, `prompt-cache.ts`, `tooling.ts`) and re-exported APIs to keep the transformer under 500 lines. - Added missing `normalizeToolsForResponses` import via `normalizeToolsForCodexBody` helper. - Ran `pnpm build` and `pnpm lint` (lint only warning remains about legacy `.eslintignore`). diff --git a/spec/review-pr-20-plan.md b/spec/review-pr-20-plan.md deleted file mode 100644 index 12c352f..0000000 --- a/spec/review-pr-20-plan.md +++ /dev/null @@ -1,28 +0,0 @@ -# Review Plan for PR #20 (Device/stealth) - -## Overview -- Address coderabbitai's remaining comments on https://github.com/open-hax/codex/pull/20 before merging. -- Focus on fixing the failing `test/plugin-config.test.ts` assertions and strengthening compaction-related logic. - -## Target files and lines -1. `test/plugin-config.test.ts` (≈90‑140): Remove duplicate `it('should handle file read errors gracefully')`, keep a single error-handling test that asserts the current `PluginConfig` defaults (`codexMode`, `enablePromptCaching`, `enableCodexCompaction`, `autoCompactMinMessages`) and verifies warning logging. -2. `lib/request/fetch-helpers.ts` (≈34‑55): Guard `sessionManager?.applyCompactedHistory` behind `compactionEnabled` so `enableCodexCompaction = false` truly disables history reuse. -3. `lib/request/request-transformer.ts` (≈896‑977): Wrap `computeFallbackHashForBody` serialization in `try/catch` and fall back to hashing just the `model` string when metadata is not JSON-safe. - -## Existing references -- Open PR: open-hax/codex#20 (Device/stealth branch). Coderabbitai submitted reviews on commits f56e506e0f07… and 8757e76457dc… with blockers noted above. -- No upstream GitHub issues are cited; the actionable items come solely from the reviewer’s comments. - -## Definition of done -1. `test/plugin-config.test.ts` compiles, contains no duplicate `it` names, and asserts the current default config (includes `enableCodexCompaction` and `autoCompactMinMessages`), logging expectations remain within the test body. -2. `transformRequestForCodex` only applies compacted history when `pluginConfig.enableCodexCompaction !== false` (in addition to the existing manual command guard). -3. `computeFallbackHashForBody` no longer throws when metadata/input contain non-serializable values; it falls back to hashing a stable string (e.g., `model`). -4. Documented plan is shared in PR comment before implementing code. -5. Tests covering touched files pass locally (at least the relevant suites). -6. Changes committed, pushed, and the reviewer notified via response. - -## Requirements -- Must respond on PR with the plan before coding begins. -- Keep existing tests (plugin config, fetch helpers, session manager) green after modifications. -- Preserve logging expectations in relevant tests (use spies to verify warnings in failure cases). -- Push updates to the same branch once changes and tests are complete. diff --git a/spec/review-v0.3.5-fixes.md b/spec/review-v0.3.5-fixes.md index a2ba497..e416fa3 100644 --- a/spec/review-v0.3.5-fixes.md +++ b/spec/review-v0.3.5-fixes.md @@ -3,7 +3,6 @@ ## Scope - Handle null/empty cache reads in `lib/prompts/codex.ts` around readCachedInstructions caching logic -- Remove redundant cloning in `lib/request/compaction-helpers.ts` (removeLastUserMessage, maybeBuildCompactionPrompt) - Prevent duplicate tool remap injection in `lib/request/input-filters.ts` addToolRemapMessage ## Existing issues / PRs @@ -13,7 +12,6 @@ ## Definition of done - safeReadFile null results do not get cached as empty content; fallback logic remains available for caller -- Compaction helpers avoid unnecessary clones while preserving immutability semantics (original input reused unless truncated) - Tool remap message is only prepended once when tools are present; logic handles undefined/null safely - All relevant tests updated or added if behavior changes; existing suite passes locally if run diff --git a/spec/session-prefix-mismatch.md b/spec/session-prefix-mismatch.md index b4176f8..1b18c68 100644 --- a/spec/session-prefix-mismatch.md +++ b/spec/session-prefix-mismatch.md @@ -1,23 +1,28 @@ # Session cache prefix mismatch – bridge injection ## Context + - Repeated log: `SessionManager: prefix mismatch detected, regenerating cache key` (e.g., sessionId `ses_5610847c3ffey8KLQaUCsUdtks`) now appears beyond the first turn, implying cache keys reset every request. - Suspect flow: `addCodexBridgeMessage` skips reinjection when `sessionContext.state.bridgeInjected` is true, so turn 1 includes the bridge, turn 2 omits it; SessionManager compares the prior bridged input to the new unbridged input and treats it as a prefix mismatch. ## Code links + - `lib/session/session-manager.ts:248-299` — prefix check and regeneration path (`sharesPrefix`, `applyRequest`). - `lib/request/request-transformer.ts:612-657` — bridge injection with session-scoped skip flag. - `lib/request/fetch-helpers.ts:119-205` — session context retrieval + transform + `applyRequest` ordering. ## Existing issues / PRs + - None found specific to this regression (branch: `chore/codex-max-release-review`). ## Definition of done + - Bridge/system prompt handling keeps the input prefix stable across sequential tool turns; no repeated prefix-mismatch warnings after the first turn of a conversation. - `prompt_cache_key` remains stable across multi-turn sessions unless the history genuinely diverges. - Automated tests cover a multi-turn tool conversation to ensure bridge injection does not trigger SessionManager resets. ## Requirements + - Add a regression test demonstrating stable caching across consecutive turns with the bridge prompt injected. - Adjust bridge injection or prefix handling so SessionManager sees a consistent prefix across turns. -- Keep existing behavior for compaction and tool normalization intact; avoid altering host-provided prompt_cache_key semantics. +- Keep existing behavior for tool normalization intact; avoid altering host-provided prompt_cache_key semantics. diff --git a/test/cache-e2e.test.ts b/test/cache-e2e.test.ts new file mode 100644 index 0000000..3c201ef --- /dev/null +++ b/test/cache-e2e.test.ts @@ -0,0 +1,121 @@ +import { describe, it, expect, vi, afterEach } from "vitest"; +import { transformRequestForCodex } from "../lib/request/fetch-helpers.js"; +import { SessionManager } from "../lib/session/session-manager.js"; +import * as openCodeCodex from "../lib/prompts/opencode-codex.js"; +import type { InputItem, RequestBody, UserConfig } from "../lib/types.js"; +import * as logger from "../lib/logger.js"; + +const CODEX_INSTRUCTIONS = "codex instructions"; +const USER_CONFIG: UserConfig = { global: {}, models: {} }; +const API_URL = "https://api.openai.com/v1/responses"; + +function envMessage(date: string, files: string[]): InputItem { + return { + type: "message", + role: "developer", + content: [ + { + type: "input_text", + text: [ + "Here is some useful information about the environment you are running in:", + "", + ` Today's date: ${date}`, + "", + "", + ...files.map((f) => ` ${f}`), + "", + ].join("\n"), + }, + ], + }; +} + +async function runTransform(body: RequestBody, sessionManager: SessionManager) { + const init: RequestInit = { body: JSON.stringify(body) }; + const result = await transformRequestForCodex( + init, + API_URL, + CODEX_INSTRUCTIONS, + USER_CONFIG, + true, + sessionManager, + ); + if (!result) throw new Error("transformRequestForCodex returned undefined"); + return result; +} + +describe("cache e2e without hitting Codex", () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + it("keeps prompt_cache_key stable when env/files churn across turns", async () => { + // Avoid network in filterOpenCodeSystemPrompts + vi.spyOn(openCodeCodex, "getOpenCodeCodexPrompt").mockResolvedValue( + "You are a coding agent running in OpenCode", + ); + + const manager = new SessionManager({ enabled: true }); + + const body1: RequestBody = { + model: "gpt-5", + metadata: { conversation_id: "conv-env-e2e" }, + input: [ + envMessage("Mon Jan 01 2024", ["README.md", "dist/index.js"]), + { type: "message", role: "user", content: "hello" }, + ], + }; + + const res1 = await runTransform(body1, manager); + const transformed1 = res1.body as RequestBody; + expect(transformed1.prompt_cache_key).toContain("conv-env-e2e"); + expect(transformed1.input).toHaveLength(1); + expect(transformed1.input?.[0].role).toBe("user"); + + const body2: RequestBody = { + model: "gpt-5", + metadata: { conversation_id: "conv-env-e2e" }, + input: [ + envMessage("Tue Jan 02 2024", ["README.md", "dist/main.js", "coverage/index.html"]), + { type: "message", role: "user", content: "hello" }, + ], + }; + + const res2 = await runTransform(body2, manager); + const transformed2 = res2.body as RequestBody; + expect(transformed2.prompt_cache_key).toBe(transformed1.prompt_cache_key); + expect(transformed2.input).toHaveLength(1); + expect(transformed2.input?.[0].role).toBe("user"); + }); + + it("logs user_message_changed when only user content changes", async () => { + vi.spyOn(openCodeCodex, "getOpenCodeCodexPrompt").mockResolvedValue( + "You are a coding agent running in OpenCode", + ); + const warnSpy = vi.spyOn(logger, "logWarn").mockImplementation(() => {}); + const manager = new SessionManager({ enabled: true }); + + const body1: RequestBody = { + model: "gpt-5", + metadata: { conversation_id: "conv-user-e2e" }, + input: [{ type: "message", role: "user", content: "hello" }], + }; + await runTransform(body1, manager); + + const body2: RequestBody = { + model: "gpt-5", + metadata: { conversation_id: "conv-user-e2e" }, + input: [{ type: "message", role: "user", content: "second" }], + }; + await runTransform(body2, manager); + + const warnCall = warnSpy.mock.calls.find( + ([message]) => typeof message === "string" && message.includes("prefix mismatch"), + ); + expect(warnCall?.[1]).toMatchObject({ + prefixCause: "user_message_changed", + previousRole: "user", + incomingRole: "user", + }); + }); +}); diff --git a/test/logger.test.ts b/test/logger.test.ts index 9929631..4736a96 100644 --- a/test/logger.test.ts +++ b/test/logger.test.ts @@ -142,14 +142,19 @@ describe("logger", () => { expect(logSpy).not.toHaveBeenCalled(); }); - it("logWarn emits to console even without env overrides", async () => { + it("logWarn writes to rolling log but stays off console by default", async () => { fsMocks.existsSync.mockReturnValue(true); const { logWarn, flushRollingLogsForTest } = await import("../lib/logger.js"); logWarn("warning"); await flushRollingLogsForTest(); - expect(warnSpy).toHaveBeenCalledWith("[openhax/codex] warning"); + expect(warnSpy).not.toHaveBeenCalled(); + expect(fsMocks.appendFile).toHaveBeenCalledTimes(1); + const [logPath, logLine, logEncoding] = fsMocks.appendFile.mock.calls[0]; + expect(logPath).toBe("/mock-home/.opencode/logs/codex-plugin/codex-plugin.log"); + expect(logEncoding).toBe("utf8"); + expect(logLine as string).toContain('"message":"warning"'); }); it("logWarn does not send warning toasts by default even when tui is available", async () => { @@ -170,7 +175,8 @@ describe("logger", () => { expect(showToast).not.toHaveBeenCalled(); expect(appLog).toHaveBeenCalledTimes(1); - expect(warnSpy).toHaveBeenCalledWith("[openhax/codex] toast-warning"); + expect(warnSpy).not.toHaveBeenCalled(); + expect(fsMocks.appendFile).toHaveBeenCalledTimes(1); }); it("logWarn sends warning toasts only when enabled via config", async () => { @@ -200,6 +206,19 @@ describe("logger", () => { expect(warnSpy).not.toHaveBeenCalled(); }); + it("logWarn mirrors to console when enabled via config", async () => { + fsMocks.existsSync.mockReturnValue(true); + const { configureLogger, logWarn, flushRollingLogsForTest } = await import("../lib/logger.js"); + + configureLogger({ pluginConfig: { logging: { logWarningsToConsole: true } } }); + + logWarn("console-warning"); + await flushRollingLogsForTest(); + + expect(warnSpy).toHaveBeenCalledWith("[openhax/codex] console-warning"); + expect(fsMocks.appendFile).toHaveBeenCalled(); + }); + it("wraps long toast messages to avoid truncation", async () => { fsMocks.existsSync.mockReturnValue(true); const showToast = vi.fn(); diff --git a/test/plugin-config.test.ts b/test/plugin-config.test.ts index f213f47..513b98e 100644 --- a/test/plugin-config.test.ts +++ b/test/plugin-config.test.ts @@ -52,7 +52,7 @@ describe("Plugin Configuration", () => { expect(config).toEqual({ codexMode: true, enablePromptCaching: true, - logging: { showWarningToasts: false }, + logging: { showWarningToasts: false, logWarningsToConsole: false }, }); expect(mockExistsSync).toHaveBeenCalledWith( @@ -69,7 +69,7 @@ describe("Plugin Configuration", () => { expect(config).toEqual({ codexMode: false, enablePromptCaching: true, - logging: { showWarningToasts: false }, + logging: { showWarningToasts: false, logWarningsToConsole: false }, }); }); @@ -82,7 +82,7 @@ describe("Plugin Configuration", () => { expect(config).toEqual({ codexMode: true, enablePromptCaching: true, - logging: { showWarningToasts: false }, + logging: { showWarningToasts: false, logWarningsToConsole: false }, }); }); @@ -98,6 +98,7 @@ describe("Plugin Configuration", () => { enableRequestLogging: false, logMaxFiles: 2, showWarningToasts: false, + logWarningsToConsole: false, }); }); @@ -111,8 +112,9 @@ describe("Plugin Configuration", () => { expect(config).toEqual({ codexMode: true, enablePromptCaching: true, - logging: { showWarningToasts: false }, - }); + logging: { showWarningToasts: false, logWarningsToConsole: false }, + }); + expect(logWarnSpy).toHaveBeenCalled(); logWarnSpy.mockRestore(); }); @@ -129,7 +131,7 @@ describe("Plugin Configuration", () => { expect(config).toEqual({ codexMode: true, enablePromptCaching: true, - logging: { showWarningToasts: false }, + logging: { showWarningToasts: false, logWarningsToConsole: false }, }); expect(logWarnSpy).toHaveBeenCalled(); logWarnSpy.mockRestore(); diff --git a/test/prompts-codex.test.ts b/test/prompts-codex.test.ts index fbfccef..b5babdf 100644 --- a/test/prompts-codex.test.ts +++ b/test/prompts-codex.test.ts @@ -112,7 +112,8 @@ describe("Codex Instructions Fetcher", () => { it("falls back to cached instructions when fetch fails", async () => { const consoleError = vi.spyOn(console, "error").mockImplementation(() => {}); - const consoleWarn = vi.spyOn(console, "warn").mockImplementation(() => {}); + const logger = await import("../lib/logger.js"); + const logWarnSpy = vi.spyOn(logger, "logWarn").mockImplementation(() => {}); const previousLastChecked = Date.now() - 20 * 60 * 1000; files.set(cacheFile, "still-good"); files.set( @@ -140,9 +141,7 @@ describe("Codex Instructions Fetcher", () => { expect(consoleError).toHaveBeenCalledWith( '[openhax/codex] Failed to fetch instructions from GitHub {"error":"HTTP 500 fetching https://raw.githubusercontent.com/openai/codex/v2/codex-rs/core/gpt_5_codex_prompt.md"}', ); - expect(consoleWarn).toHaveBeenCalledWith( - "[openhax/codex] Using cached instructions due to fetch failure", - ); + expect(logWarnSpy).toHaveBeenCalledWith("Using cached instructions due to fetch failure"); const meta = JSON.parse(files.get(cacheMeta) ?? "{}"); expect(meta.lastChecked).toBeGreaterThan(previousLastChecked); @@ -150,7 +149,7 @@ describe("Codex Instructions Fetcher", () => { expect(meta.url).toContain("codex-rs/core/gpt_5_codex_prompt.md"); consoleError.mockRestore(); - consoleWarn.mockRestore(); + logWarnSpy.mockRestore(); }); it("serves in-memory session cache when latest entry exists", async () => { @@ -242,7 +241,8 @@ describe("Codex Instructions Fetcher", () => { it("falls back to bundled instructions when no cache is available", async () => { const consoleError = vi.spyOn(console, "error").mockImplementation(() => {}); - const consoleWarn = vi.spyOn(console, "warn").mockImplementation(() => {}); + const logger = await import("../lib/logger.js"); + const logWarnSpy = vi.spyOn(logger, "logWarn").mockImplementation(() => {}); fetchMock .mockResolvedValueOnce( @@ -260,7 +260,7 @@ describe("Codex Instructions Fetcher", () => { expect(consoleError).toHaveBeenCalledWith( '[openhax/codex] Failed to fetch instructions from GitHub {"error":"HTTP 500 fetching https://raw.githubusercontent.com/openai/codex/v1/codex-rs/core/gpt_5_codex_prompt.md"}', ); - expect(consoleWarn).toHaveBeenCalledWith("[openhax/codex] Falling back to bundled instructions"); + expect(logWarnSpy).toHaveBeenCalledWith("Falling back to bundled instructions"); const meta = JSON.parse(files.get(cacheMeta) ?? "{}"); expect(meta.tag).toBe("v1"); @@ -268,6 +268,6 @@ describe("Codex Instructions Fetcher", () => { expect(meta.url).toContain("codex-rs/core/gpt_5_codex_prompt.md"); consoleError.mockRestore(); - consoleWarn.mockRestore(); + logWarnSpy.mockRestore(); }); }); diff --git a/test/request-transformer.test.ts b/test/request-transformer.test.ts index bdc733d..9b9ab6e 100644 --- a/test/request-transformer.test.ts +++ b/test/request-transformer.test.ts @@ -245,8 +245,8 @@ describe("filterInput", () => { id: "msg_456", type: "message", role: "developer", - content: "Summary saved to ~/.opencode/summary.md", - metadata: { source: "opencode-compaction" }, + content: "Custom host metadata message", + metadata: { source: "host-metadata" }, }, ]; const result = filterInput(input, { preserveMetadata: true }); @@ -587,6 +587,33 @@ describe("filterOpenCodeSystemPrompts", () => { expect(result).toHaveLength(2); }); + it("should drop env-only system messages", async () => { + const input: InputItem[] = [ + { + type: "message", + role: "system", + content: [ + { + type: "input_text", + text: [ + "Here is some useful information about the environment you are running in:", + "", + " Working directory: /tmp", + "", + "", + " tmpfile.txt", + "", + ].join("\n"), + }, + ], + }, + { type: "message", role: "user", content: "hello" }, + ]; + const result = await filterOpenCodeSystemPrompts(input); + expect(result).toHaveLength(1); + expect(result![0].role).toBe("user"); + }); + it("should keep AGENTS.md content (not filter it)", async () => { const input: InputItem[] = [ { @@ -608,7 +635,7 @@ describe("filterOpenCodeSystemPrompts", () => { expect(result![1].role).toBe("user"); }); - it("should keep environment+AGENTS.md concatenated message", async () => { + it("should strip environment blocks but keep AGENTS.md content", async () => { const input: InputItem[] = [ { type: "message", @@ -618,39 +645,134 @@ describe("filterOpenCodeSystemPrompts", () => { { type: "message", role: "developer", - // environment + AGENTS.md joined (like OpenCode does) - content: - "Working directory: /path/to/project\nDate: 2025-01-01\n\n# AGENTS.md\n\nCustom instructions.", + // environment + files + AGENTS.md joined (like OpenCode does) + content: [ + { + type: "input_text", + text: [ + "Here is some useful information about the environment you are running in:", + "", + " Working directory: /path/to/project", + " Is directory a git repo: yes", + "", + "", + " README.md", + "", + "\n# AGENTS.md\n\nCustom instructions.", + ].join("\n"), + }, + ], }, { type: "message", role: "user", content: "hello" }, ]; const result = await filterOpenCodeSystemPrompts(input); - // Should filter first message (codex.txt) but keep second (env+AGENTS.md) + // Should filter codex.txt, strip env/files, and keep AGENTS.md text expect(result).toHaveLength(2); expect(result![0].content).toContain("AGENTS.md"); + expect(result![0].content as string).not.toContain(""); + expect(result![0].content as string).not.toContain(""); expect(result![1].role).toBe("user"); }); - it("should use metadata flag to detect compaction prompts", async () => { - const input: InputItem[] = [ - { - type: "message", - role: "developer", - content: "Summary saved to ~/.opencode/summary.md for inspection", - metadata: { source: "opencode-compaction" }, - }, - { type: "message", role: "user", content: "continue" }, - ]; - const result = await filterOpenCodeSystemPrompts(input); - expect(result).toHaveLength(1); - expect(result![0].role).toBe("user"); - }); - it("should return undefined for undefined input", async () => { expect(await filterOpenCodeSystemPrompts(undefined)).toBeUndefined(); }); }); +describe("transformRequestBody caching stability", () => { + const CODEX_INSTRUCTIONS = "codex instructions"; + const userConfig = { global: {}, models: {} }; + + function envMessage(date: string, files: string[]): InputItem { + return { + type: "message", + role: "developer", + content: [ + { + type: "input_text", + text: [ + "Here is some useful information about the environment you are running in:", + "", + ` Today's date: ${date}`, + "", + "", + ...files.map((f) => ` ${f}`), + "", + ].join("\n"), + }, + ], + }; + } + + it("keeps prompt_cache_key stable when only env/files churn", async () => { + const body1: RequestBody = { + model: "gpt-5", + metadata: { conversation_id: "conv-env-stable" }, + input: [ + envMessage("Mon Jan 01 2024", ["README.md", "dist/index.js"]), + { type: "message", role: "user", content: "hello" }, + ], + }; + + const result1 = await transformRequestBody(body1, CODEX_INSTRUCTIONS, userConfig, true, {}, undefined); + expect(result1.prompt_cache_key).toContain("conv-env-stable"); + expect(result1.input).toHaveLength(1); + expect(result1.input?.[0].role).toBe("user"); + + const body2: RequestBody = { + model: "gpt-5", + metadata: { conversation_id: "conv-env-stable" }, + input: [ + envMessage("Tue Jan 02 2024", ["README.md", "dist/main.js", "coverage/index.html"]), + { type: "message", role: "user", content: "hello" }, + ], + }; + + const result2 = await transformRequestBody(body2, CODEX_INSTRUCTIONS, userConfig, true, {}, undefined); + expect(result2.prompt_cache_key).toBe(result1.prompt_cache_key); + expect(result2.input).toHaveLength(1); + expect(result2.input?.[0].role).toBe("user"); + }); + + it("can reattach env/files tail when flag enabled", async () => { + process.env.CODEX_APPEND_ENV_CONTEXT = "1"; + const body: RequestBody = { + model: "gpt-5", + metadata: { conversation_id: "conv-env-tail" }, + input: [ + { + type: "message", + role: "developer", + content: [ + { + type: "input_text", + text: [ + "Here is some useful information about the environment you are running in:", + "", + " Working directory: /tmp", + "", + "", + " tmpfile.txt", + "", + ].join("\n"), + }, + ], + }, + { type: "message", role: "user", content: "hello" }, + ], + }; + + const result = await transformRequestBody(body, CODEX_INSTRUCTIONS, userConfig, true, {}, undefined); + expect(result.input?.length).toBe(2); + expect(result.input?.[0].role).toBe("user"); + expect(result.input?.[1].role).toBe("developer"); + expect(result.input?.[1].content as string).toContain(""); + expect(result.input?.[1].content as string).toContain(""); + + delete process.env.CODEX_APPEND_ENV_CONTEXT; + }); +}); + describe("addCodexBridgeMessage", () => { it("should prepend bridge message when tools present", async () => { const input = [{ type: "message", role: "user", content: [{ type: "input_text", text: "test" }] }]; @@ -808,29 +930,6 @@ describe("transformRequestBody", () => { expect(result2.prompt_cache_key).toBe("cache_meta-conv-789-fork-fork-x"); }); - it("filters metadata-tagged compaction prompts and strips metadata when IDs are not preserved", async () => { - const body: RequestBody = { - model: "gpt-5", - input: [ - { - type: "message", - role: "developer", - content: "Summary saved to ~/.opencode/summary.md for inspection", - metadata: { source: "opencode-compaction" }, - }, - { type: "message", role: "user", content: "continue" }, - ], - }; - - const transformedBody = await transformRequestBody(body, codexInstructions); - expect(transformedBody).toBeDefined(); - const messages = transformedBody.input ?? []; - - expect(messages.some((item) => (item as any).metadata)).toBe(false); - expect(JSON.stringify(messages)).not.toContain(".opencode/summary"); - expect(messages.some((item) => item.role === "user" && (item as any).content === "continue")).toBe(true); - }); - it("keeps bridge prompt across turns so prompt_cache_key stays stable", async () => { const sessionManager = new SessionManager({ enabled: true }); const baseInput: InputItem[] = [ diff --git a/test/session-manager.test.ts b/test/session-manager.test.ts index fa6a820..6f5d84f 100644 --- a/test/session-manager.test.ts +++ b/test/session-manager.test.ts @@ -1,9 +1,8 @@ -import { createHash } from "node:crypto"; import { describe, expect, it, vi } from "vitest"; import { SESSION_CONFIG } from "../lib/constants.js"; import { SessionManager } from "../lib/session/session-manager.js"; import * as logger from "../lib/logger.js"; -import type { InputItem, RequestBody, SessionContext } from "../lib/types.js"; +import type { RequestBody, SessionContext } from "../lib/types.js"; interface BodyOptions { forkId?: string; @@ -37,10 +36,6 @@ function createBody(conversationId: string, inputCount = 1, options: BodyOptions }; } -function hashItems(items: InputItem[]): string { - return createHash("sha1").update(JSON.stringify(items)).digest("hex"); -} - describe("SessionManager", () => { it("returns undefined when disabled", () => { const manager = new SessionManager({ enabled: false }); @@ -54,13 +49,9 @@ describe("SessionManager", () => { const manager = new SessionManager({ enabled: true }); const body = createBody("conv-123"); - let context = manager.getContext(body) as SessionContext; - expect(context.enabled).toBe(true); - expect(context.isNew).toBe(true); - expect(context.preserveIds).toBe(true); - expect(context.state.promptCacheKey).toBe("conv-123"); + const context = manager.getContext(body) as SessionContext; + manager.applyRequest(body, context); - context = manager.applyRequest(body, context) as SessionContext; expect(body.prompt_cache_key).toBe("conv-123"); expect(context.state.lastInput.length).toBe(1); }); @@ -83,67 +74,71 @@ describe("SessionManager", () => { }); it("regenerates cache key when prefix differs", () => { + const warnSpy = vi.spyOn(logger, "logWarn").mockImplementation(() => {}); const manager = new SessionManager({ enabled: true }); const baseBody = createBody("conv-789", 2); - let context = manager.getContext(baseBody) as SessionContext; - context = manager.applyRequest(baseBody, context) as SessionContext; + const context = manager.getContext(baseBody) as SessionContext; + manager.applyRequest(baseBody, context); - const branchBody: RequestBody = { - model: "gpt-5", - metadata: { conversation_id: "conv-789" }, + const changedBody: RequestBody = { + ...baseBody, input: [ - { - type: "message", - role: "user", - id: "new_msg", - content: "fresh-start", - }, + { type: "message", role: "system", content: "updated system prompt" }, + { type: "message", role: "user", content: "hello" }, ], }; - let branchContext = manager.getContext(branchBody) as SessionContext; - branchContext = manager.applyRequest(branchBody, branchContext) as SessionContext; + const nextContext = manager.getContext(changedBody) as SessionContext; + manager.applyRequest(changedBody, nextContext); + + const warnCall = warnSpy.mock.calls.find( + ([message]) => typeof message === "string" && message.includes("prefix mismatch"), + ); + + expect(warnCall?.[1]).toMatchObject({ + prefixCause: "system_prompt_changed", + previousRole: "user", + incomingRole: "system", + }); - expect(branchBody.prompt_cache_key).toMatch(/^cache_/); - expect(branchContext.isNew).toBe(true); - expect(branchContext.state.promptCacheKey).not.toBe(context.state.promptCacheKey); + warnSpy.mockRestore(); }); - it("logs system prompt changes when regenerating cache key", () => { + it("does not warn on user-only content changes", () => { const warnSpy = vi.spyOn(logger, "logWarn").mockImplementation(() => {}); const manager = new SessionManager({ enabled: true }); const baseBody: RequestBody = { model: "gpt-5", - metadata: { conversation_id: "conv-system-change" }, + metadata: { conversation_id: "conv-user-change" }, input: [ - { type: "message", role: "system", content: "initial system" }, - { type: "message", role: "user", content: "hello" }, + { type: "message", role: "system", content: "sys" }, + { type: "message", role: "user", content: "first" }, ], }; - let context = manager.getContext(baseBody) as SessionContext; - context = manager.applyRequest(baseBody, context) as SessionContext; + const context = manager.getContext(baseBody) as SessionContext; + manager.applyRequest(baseBody, context); - const changedBody: RequestBody = { + const nextBody: RequestBody = { ...baseBody, input: [ - { type: "message", role: "system", content: "updated system prompt" }, - { type: "message", role: "user", content: "hello" }, + { type: "message", role: "system", content: "sys" }, + { type: "message", role: "user", content: "second" }, ], }; - const nextContext = manager.getContext(changedBody) as SessionContext; - manager.applyRequest(changedBody, nextContext); + const nextContext = manager.getContext(nextBody) as SessionContext; + manager.applyRequest(nextBody, nextContext); const warnCall = warnSpy.mock.calls.find( ([message]) => typeof message === "string" && message.includes("prefix mismatch"), ); - expect(warnCall?.[1]).toMatchObject({ - prefixCause: "system_prompt_changed", - previousRole: "system", - incomingRole: "system", + prefixCause: "user_message_changed", + previousRole: "user", + incomingRole: "user", + sharedPrefixLength: 1, }); warnSpy.mockRestore(); @@ -169,8 +164,8 @@ describe("SessionManager", () => { ], }; - let context = manager.getContext(fullBody) as SessionContext; - context = manager.applyRequest(fullBody, context) as SessionContext; + const context = manager.getContext(fullBody) as SessionContext; + manager.applyRequest(fullBody, context); const prunedBody: RequestBody = { ...fullBody, @@ -193,51 +188,12 @@ describe("SessionManager", () => { warnSpy.mockRestore(); }); - it("forks session when prefix matches partially and reuses compaction state", () => { - const manager = new SessionManager({ enabled: true }); - const baseBody = createBody("conv-prefix-fork", 3); - - let baseContext = manager.getContext(baseBody) as SessionContext; - baseContext = manager.applyRequest(baseBody, baseContext) as SessionContext; - - const systemMessage: InputItem = { type: "message", role: "system", content: "env vars" }; - manager.applyCompactionSummary(baseContext, { - baseSystem: [systemMessage], - summary: "Base summary", - }); - - const branchBody = createBody("conv-prefix-fork", 3); - branchBody.input = [ - { type: "message", role: "user", id: "msg_1", content: "message-1" }, - { type: "message", role: "user", id: "msg_2", content: "message-2" }, - { type: "message", role: "assistant", id: "msg_3", content: "diverged" }, - ]; - - let branchContext = manager.getContext(branchBody) as SessionContext; - branchContext = manager.applyRequest(branchBody, branchContext) as SessionContext; - - const sharedPrefix = branchBody.input.slice(0, 2) as InputItem[]; - const expectedSuffix = hashItems(sharedPrefix).slice(0, 8); - expect(branchBody.prompt_cache_key).toBe(`conv-prefix-fork::prefix::${expectedSuffix}`); - expect(branchContext.state.promptCacheKey).toBe(`conv-prefix-fork::prefix::${expectedSuffix}`); - expect(branchContext.isNew).toBe(true); - - const followUp = createBody("conv-prefix-fork", 1); - followUp.input = [{ type: "message", role: "user", content: "follow-up" }]; - manager.applyCompactedHistory(followUp, branchContext); - - expect(followUp.input).toHaveLength(3); - expect(followUp.input?.[0].role).toBe("system"); - expect(followUp.input?.[1].content).toContain("Base summary"); - expect(followUp.input?.[2].content).toBe("follow-up"); - }); - it("records cached token usage from response payload", () => { const manager = new SessionManager({ enabled: true }); const body = createBody("conv-usage"); - let context = manager.getContext(body) as SessionContext; - context = manager.applyRequest(body, context) as SessionContext; + const context = manager.getContext(body) as SessionContext; + manager.applyRequest(body, context); manager.recordResponse(context, { usage: { cached_tokens: 42 } }); @@ -247,8 +203,8 @@ describe("SessionManager", () => { it("reports metrics snapshot with recent sessions", () => { const manager = new SessionManager({ enabled: true }); const body = createBody("conv-metrics"); - let context = manager.getContext(body) as SessionContext; - context = manager.applyRequest(body, context) as SessionContext; + const context = manager.getContext(body) as SessionContext; + manager.applyRequest(body, context); const metrics = manager.getMetrics(); expect(metrics.enabled).toBe(true); @@ -317,7 +273,7 @@ describe("SessionManager", () => { it("derives fork ids from parent conversation hints", () => { const manager = new SessionManager({ enabled: true }); const parentBody = createBody("conv-fork-parent", 1, { parentConversationId: "parent-conv" }); - let parentContext = manager.getContext(parentBody) as SessionContext; + const parentContext = manager.getContext(parentBody) as SessionContext; expect(parentContext.isNew).toBe(true); expect(parentContext.state.promptCacheKey).toBe("conv-fork-parent::fork::parent-conv"); manager.applyRequest(parentBody, parentContext); @@ -331,46 +287,6 @@ describe("SessionManager", () => { expect(snakeParentContext.state.promptCacheKey).toBe("conv-fork-parent::fork::parent-snake"); }); - it("scopes compaction summaries per fork session", () => { - const manager = new SessionManager({ enabled: true }); - const alphaBody = createBody("conv-fork-summary", 1, { forkId: "alpha" }); - let alphaContext = manager.getContext(alphaBody) as SessionContext; - alphaContext = manager.applyRequest(alphaBody, alphaContext) as SessionContext; - - const systemMessage: InputItem = { type: "message", role: "system", content: "env vars" }; - manager.applyCompactionSummary(alphaContext, { - baseSystem: [systemMessage], - summary: "Alpha summary", - }); - - const alphaNext = createBody("conv-fork-summary", 1, { forkId: "alpha" }); - alphaNext.input = [{ type: "message", role: "user", content: "alpha task" }]; - manager.applyCompactedHistory(alphaNext, alphaContext); - expect(alphaNext.input).toHaveLength(3); - expect(alphaNext.input?.[1].content).toContain("Alpha summary"); - - const betaBody = createBody("conv-fork-summary", 1, { forkId: "beta" }); - let betaContext = manager.getContext(betaBody) as SessionContext; - betaContext = manager.applyRequest(betaBody, betaContext) as SessionContext; - - const betaNext = createBody("conv-fork-summary", 1, { forkId: "beta" }); - betaNext.input = [{ type: "message", role: "user", content: "beta task" }]; - manager.applyCompactedHistory(betaNext, betaContext); - expect(betaNext.input).toHaveLength(1); - - manager.applyCompactionSummary(betaContext, { - baseSystem: [], - summary: "Beta summary", - }); - - const betaFollowUp = createBody("conv-fork-summary", 1, { forkId: "beta" }); - betaFollowUp.input = [{ type: "message", role: "user", content: "beta follow-up" }]; - manager.applyCompactedHistory(betaFollowUp, betaContext); - expect(betaFollowUp.input).toHaveLength(2); - expect(betaFollowUp.input?.[0].content).toContain("Beta summary"); - expect(betaFollowUp.input?.[1].content).toBe("beta follow-up"); - }); - it("evicts sessions that exceed idle TTL", () => { const manager = new SessionManager({ enabled: true }); const body = createBody("conv-expire"); @@ -390,8 +306,9 @@ describe("SessionManager", () => { const totalSessions = SESSION_CONFIG.MAX_ENTRIES + 5; for (let index = 0; index < totalSessions; index += 1) { const body = createBody(`conv-cap-${index}`); - let context = manager.getContext(body) as SessionContext; - context = manager.applyRequest(body, context) as SessionContext; + const context = manager.getContext(body) as SessionContext; + manager.applyRequest(body, context); + context.state.lastUpdated -= index; // ensure ordering } @@ -399,27 +316,4 @@ describe("SessionManager", () => { expect(metrics.totalSessions).toBe(SESSION_CONFIG.MAX_ENTRIES); expect(metrics.recentSessions.length).toBeLessThanOrEqual(SESSION_CONFIG.MAX_ENTRIES); }); - - it("applies compacted history when summary stored", () => { - const manager = new SessionManager({ enabled: true }); - const body = createBody("conv-compaction"); - let context = manager.getContext(body) as SessionContext; - context = manager.applyRequest(body, context) as SessionContext; - - const systemMessage: InputItem = { type: "message", role: "system", content: "env" }; - manager.applyCompactionSummary(context, { - baseSystem: [systemMessage], - summary: "Auto-compaction summary", - }); - - const nextBody = createBody("conv-compaction"); - nextBody.input = [{ type: "message", role: "user", content: "new task" }]; - manager.applyCompactedHistory(nextBody, context); - - expect(nextBody.input).toHaveLength(3); - expect(nextBody.input?.[0].role).toBe("system"); - expect(nextBody.input?.[1].role).toBe("user"); - expect(nextBody.input?.[1].content).toContain("Auto-compaction summary"); - expect(nextBody.input?.[2].content).toBe("new task"); - }); });