coder
diff --git a/‎.github/workflows/chromatic.yml‎
Lines changed: 0 additions & 1 deletion b/‎.github/workflows/chromatic.yml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 6 additions & 0 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎.github/workflows/nightly-terminal-bench.yml‎
Lines changed: 5 additions & 5 deletions b/‎.github/workflows/nightly-terminal-bench.yml‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎.github/workflows/publish-npm.yml‎
Lines changed: 10 additions & 10 deletions b/‎.github/workflows/publish-npm.yml‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎.github/workflows/terminal-bench.yml‎
Lines changed: 18 additions & 19 deletions b/‎.github/workflows/terminal-bench.yml‎
Lines changed: 18 additions & 19 deletions
diff --git a/‎.storybook/main.ts‎
Lines changed: 1 addition & 5 deletions b/‎.storybook/main.ts‎
Lines changed: 1 addition & 5 deletions
diff --git a/‎.storybook/mocks/version.ts‎
Lines changed: 0 additions & 1 deletion b/‎.storybook/mocks/version.ts‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎Makefile‎
Lines changed: 3 additions & 3 deletions b/‎Makefile‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎benchmarks/terminal_bench/README.md‎
Lines changed: 1 addition & 0 deletions b/‎benchmarks/terminal_bench/README.md‎
Lines changed: 1 addition & 0 deletions
@@ -36,4 +36,3 @@ jobs:
           projectToken: ${{ secrets.CHROMATIC_PROJECT_TOKEN }}
           exitZeroOnChanges: true
           onlyChanged: true
-
@@ -74,6 +74,9 @@ jobs:
 
       - uses: ./.github/actions/setup-cmux
 
+      - name: Build worker files
+        run: make build-main
+
       - name: Run tests with coverage
         run: bun test --coverage --coverage-reporter=lcov ${{ github.event.inputs.test_filter || 'src' }}
 
@@ -96,6 +99,9 @@ jobs:
 
       - uses: ./.github/actions/setup-cmux
 
+      - name: Build worker files
+        run: make build-main
+
       - name: Run integration tests with coverage
         # --silent suppresses per-test output (17 test files × 32 workers = overwhelming logs)
         run: TEST_INTEGRATION=1 bun x jest --coverage --maxWorkers=100% --silent ${{ github.event.inputs.test_filter || 'tests' }}
 
@@ -3,13 +3,13 @@ name: Nightly Terminal-Bench
 on:
   schedule:
     # Run full benchmark suite (~80 tasks) every night at midnight UTC
-    - cron: '0 0 * * *'
+    - cron: "0 0 * * *"
   workflow_dispatch:
     inputs:
       models:
         description: 'Models to test (comma-separated, or "all" for both)'
         required: false
-        default: 'all'
+        default: "all"
         type: string
 
 jobs:
@@ -41,9 +41,9 @@ jobs:
     uses: ./.github/workflows/terminal-bench.yml
     with:
       model_name: ${{ matrix.model }}
-      thinking_level: 'high'
-      dataset: 'terminal-bench-core==0.1.1'
-      concurrency: '4'
+      thinking_level: "high"
+      dataset: "terminal-bench-core==0.1.1"
+      concurrency: "4"
       livestream: true
     secrets:
       ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
 
@@ -5,7 +5,7 @@ on:
     branches:
       - main
     tags:
-      - 'v*'
+      - "v*"
   workflow_dispatch:
 
 permissions:
@@ -24,12 +24,12 @@ jobs:
 
       - uses: ./.github/actions/setup-cmux
         with:
-          install-imagemagick: 'true'
+          install-imagemagick: "true"
 
       # Sets up .npmrc with the auth token
       - uses: actions/setup-node@v4
         with:
-          registry-url: 'https://registry.npmjs.org'
+          registry-url: "https://registry.npmjs.org"
 
       - run: sudo npm i -g npm@latest
 
@@ -38,10 +38,10 @@ jobs:
         run: |
           # Get base version from package.json
           BASE_VERSION=$(node -p "require('./package.json').version")
-          
+
           # Generate git describe version
           GIT_DESCRIBE=$(git describe --tags --always --dirty 2>/dev/null || echo "unknown")
-          
+
           if [[ $GITHUB_REF == refs/tags/* ]]; then
             # For tags, use the base version as-is (stable release)
             NPM_VERSION="${BASE_VERSION}"
@@ -56,13 +56,13 @@ jobs:
             NPM_TAG="next"
             echo "Publishing pre-release: ${NPM_VERSION}"
           fi
-          
+
           echo "version=${NPM_VERSION}" >> $GITHUB_OUTPUT
           echo "tag=${NPM_TAG}" >> $GITHUB_OUTPUT
-          
+
           # Update package.json with the new version
           node -e "const fs = require('fs'); const pkg = JSON.parse(fs.readFileSync('package.json')); pkg.version = '${NPM_VERSION}'; fs.writeFileSync('package.json', JSON.stringify(pkg, null, 2) + '\n');"
-          
+
           echo "Updated package.json to version ${NPM_VERSION}"
 
       - name: Generate version file
@@ -76,7 +76,7 @@ jobs:
         run: |
           PACKAGE_NAME=$(node -p "require('./package.json').name")
           VERSION="${{ steps.version.outputs.version }}"
-          
+
           if npm view "${PACKAGE_NAME}@${VERSION}" version &>/dev/null; then
             echo "exists=true" >> $GITHUB_OUTPUT
             echo "Version ${VERSION} already exists on npm"
@@ -95,7 +95,7 @@ jobs:
           PACKAGE_NAME=$(node -p "require('./package.json').name")
           VERSION="${{ steps.version.outputs.version }}"
           TAG="${{ steps.version.outputs.tag }}"
-          
+
           echo "Version ${VERSION} already published, updating dist-tag to ${TAG}"
           npm dist-tag add "${PACKAGE_NAME}@${VERSION}" "${TAG}"
 
 
@@ -4,34 +4,34 @@ on:
   workflow_call:
     inputs:
       model_name:
-        description: 'Model to use (e.g., anthropic:claude-sonnet-4-5)'
+        description: "Model to use (e.g., anthropic:claude-sonnet-4-5)"
         required: false
         type: string
       thinking_level:
-        description: 'Thinking level (off, low, medium, high)'
+        description: "Thinking level (off, low, medium, high)"
         required: false
         type: string
       dataset:
-        description: 'Terminal-Bench dataset to use'
+        description: "Terminal-Bench dataset to use"
         required: false
         type: string
-        default: 'terminal-bench-core==0.1.1'
+        default: "terminal-bench-core==0.1.1"
       concurrency:
-        description: 'Number of concurrent tasks (--n-concurrent)'
+        description: "Number of concurrent tasks (--n-concurrent)"
         required: false
         type: string
-        default: '4'
+        default: "4"
       livestream:
-        description: 'Enable livestream mode'
+        description: "Enable livestream mode"
         required: false
         type: boolean
         default: true
       sample_size:
-        description: 'Number of random tasks to run (empty = all tasks)'
+        description: "Number of random tasks to run (empty = all tasks)"
         required: false
         type: string
       extra_args:
-        description: 'Additional arguments to pass to terminal-bench'
+        description: "Additional arguments to pass to terminal-bench"
         required: false
         type: string
     secrets:
@@ -42,34 +42,34 @@ on:
   workflow_dispatch:
     inputs:
       dataset:
-        description: 'Terminal-Bench dataset to use'
+        description: "Terminal-Bench dataset to use"
         required: false
-        default: 'terminal-bench-core==0.1.1'
+        default: "terminal-bench-core==0.1.1"
         type: string
       concurrency:
-        description: 'Number of concurrent tasks (--n-concurrent)'
+        description: "Number of concurrent tasks (--n-concurrent)"
         required: false
-        default: '4'
+        default: "4"
         type: string
       livestream:
-        description: 'Enable livestream mode'
+        description: "Enable livestream mode"
         required: false
         default: true
         type: boolean
       sample_size:
-        description: 'Number of random tasks to run (empty = all tasks)'
+        description: "Number of random tasks to run (empty = all tasks)"
         required: false
         type: string
       model_name:
-        description: 'Model to use (e.g., anthropic:claude-sonnet-4-5, openai:gpt-5-codex)'
+        description: "Model to use (e.g., anthropic:claude-sonnet-4-5, openai:gpt-5-codex)"
         required: false
         type: string
       thinking_level:
-        description: 'Thinking level (off, low, medium, high)'
+        description: "Thinking level (off, low, medium, high)"
         required: false
         type: string
       extra_args:
-        description: 'Additional arguments to pass to terminal-bench'
+        description: "Additional arguments to pass to terminal-bench"
         required: false
         type: string
 
@@ -148,4 +148,3 @@ jobs:
             runs/
           if-no-files-found: warn
           retention-days: 30
-
@@ -4,11 +4,7 @@ import path from "path";
 
 const config: StorybookConfig = {
   stories: ["../src/**/*.stories.@(ts|tsx)"],
-  addons: [
-    "@storybook/addon-links",
-    "@storybook/addon-docs",
-    "@storybook/addon-interactions",
-  ],
+  addons: ["@storybook/addon-links", "@storybook/addon-docs", "@storybook/addon-interactions"],
   framework: {
     name: "@storybook/react-vite",
     options: {},
 
@@ -6,4 +6,3 @@ export const VERSION = {
   git_describe: "v1.0.0",
   buildTime: "2024-01-24T17:41:00Z", // 9:41 AM PST
 };
-
@@ -203,11 +203,11 @@ check-deadcode: node_modules/.installed ## Check for potential dead code (manual
 		|| echo "✓ No obvious dead code found"
 
 ## Testing
-test-integration: node_modules/.installed ## Run all tests (unit + integration)
+test-integration: node_modules/.installed build-main ## Run all tests (unit + integration)
 	@bun test src
 	@TEST_INTEGRATION=1 bun x jest tests
 
-test-unit: node_modules/.installed ## Run unit tests
+test-unit: node_modules/.installed build-main ## Run unit tests
 	@bun test src
 
 test: test-unit ## Alias for test-unit
@@ -220,7 +220,7 @@ test-coverage: ## Run tests with coverage
 
 test-e2e: ## Run end-to-end tests
 	@$(MAKE) build
-	@CMUX_E2E_LOAD_DIST=1 CMUX_E2E_SKIP_BUILD=1 PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 bun x playwright test --project=electron
+	@CMUX_E2E_LOAD_DIST=1 CMUX_E2E_SKIP_BUILD=1 PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 bun x playwright test --project=electron $(PLAYWRIGHT_ARGS)
 
 ## Distribution
 dist: build ## Build distributable packages
 
@@ -36,6 +36,7 @@ The benchmark uses a **global timeout** applied to all tasks. The default is **3
 **Design Rationale:**
 
 Based on analysis of Oct 30, 2025 nightly runs:
+
 - Longest successful task: `blind-maze-explorer-algorithm.hard` at 20 minutes
 - 95th percentile: ~15 minutes
 - Mean duration: ~6 minutes