Skip to content

Commit d86727b

Browse files
authored
perf: async tokenization with worker threads (#465)
1 parent e8aaa77 commit d86727b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+1803
-1848
lines changed

.github/workflows/chromatic.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,3 @@ jobs:
3636
projectToken: ${{ secrets.CHROMATIC_PROJECT_TOKEN }}
3737
exitZeroOnChanges: true
3838
onlyChanged: true
39-

.github/workflows/ci.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ jobs:
7474

7575
- uses: ./.github/actions/setup-cmux
7676

77+
- name: Build worker files
78+
run: make build-main
79+
7780
- name: Run tests with coverage
7881
run: bun test --coverage --coverage-reporter=lcov ${{ github.event.inputs.test_filter || 'src' }}
7982

@@ -96,6 +99,9 @@ jobs:
9699

97100
- uses: ./.github/actions/setup-cmux
98101

102+
- name: Build worker files
103+
run: make build-main
104+
99105
- name: Run integration tests with coverage
100106
# --silent suppresses per-test output (17 test files × 32 workers = overwhelming logs)
101107
run: TEST_INTEGRATION=1 bun x jest --coverage --maxWorkers=100% --silent ${{ github.event.inputs.test_filter || 'tests' }}

.github/workflows/nightly-terminal-bench.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@ name: Nightly Terminal-Bench
33
on:
44
schedule:
55
# Run full benchmark suite (~80 tasks) every night at midnight UTC
6-
- cron: '0 0 * * *'
6+
- cron: "0 0 * * *"
77
workflow_dispatch:
88
inputs:
99
models:
1010
description: 'Models to test (comma-separated, or "all" for both)'
1111
required: false
12-
default: 'all'
12+
default: "all"
1313
type: string
1414

1515
jobs:
@@ -41,9 +41,9 @@ jobs:
4141
uses: ./.github/workflows/terminal-bench.yml
4242
with:
4343
model_name: ${{ matrix.model }}
44-
thinking_level: 'high'
45-
dataset: 'terminal-bench-core==0.1.1'
46-
concurrency: '4'
44+
thinking_level: "high"
45+
dataset: "terminal-bench-core==0.1.1"
46+
concurrency: "4"
4747
livestream: true
4848
secrets:
4949
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}

.github/workflows/publish-npm.yml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ on:
55
branches:
66
- main
77
tags:
8-
- 'v*'
8+
- "v*"
99
workflow_dispatch:
1010

1111
permissions:
@@ -24,12 +24,12 @@ jobs:
2424

2525
- uses: ./.github/actions/setup-cmux
2626
with:
27-
install-imagemagick: 'true'
27+
install-imagemagick: "true"
2828

2929
# Sets up .npmrc with the auth token
3030
- uses: actions/setup-node@v4
3131
with:
32-
registry-url: 'https://registry.npmjs.org'
32+
registry-url: "https://registry.npmjs.org"
3333

3434
- run: sudo npm i -g npm@latest
3535

@@ -38,10 +38,10 @@ jobs:
3838
run: |
3939
# Get base version from package.json
4040
BASE_VERSION=$(node -p "require('./package.json').version")
41-
41+
4242
# Generate git describe version
4343
GIT_DESCRIBE=$(git describe --tags --always --dirty 2>/dev/null || echo "unknown")
44-
44+
4545
if [[ $GITHUB_REF == refs/tags/* ]]; then
4646
# For tags, use the base version as-is (stable release)
4747
NPM_VERSION="${BASE_VERSION}"
@@ -56,13 +56,13 @@ jobs:
5656
NPM_TAG="next"
5757
echo "Publishing pre-release: ${NPM_VERSION}"
5858
fi
59-
59+
6060
echo "version=${NPM_VERSION}" >> $GITHUB_OUTPUT
6161
echo "tag=${NPM_TAG}" >> $GITHUB_OUTPUT
62-
62+
6363
# Update package.json with the new version
6464
node -e "const fs = require('fs'); const pkg = JSON.parse(fs.readFileSync('package.json')); pkg.version = '${NPM_VERSION}'; fs.writeFileSync('package.json', JSON.stringify(pkg, null, 2) + '\n');"
65-
65+
6666
echo "Updated package.json to version ${NPM_VERSION}"
6767
6868
- name: Generate version file
@@ -76,7 +76,7 @@ jobs:
7676
run: |
7777
PACKAGE_NAME=$(node -p "require('./package.json').name")
7878
VERSION="${{ steps.version.outputs.version }}"
79-
79+
8080
if npm view "${PACKAGE_NAME}@${VERSION}" version &>/dev/null; then
8181
echo "exists=true" >> $GITHUB_OUTPUT
8282
echo "Version ${VERSION} already exists on npm"
@@ -95,7 +95,7 @@ jobs:
9595
PACKAGE_NAME=$(node -p "require('./package.json').name")
9696
VERSION="${{ steps.version.outputs.version }}"
9797
TAG="${{ steps.version.outputs.tag }}"
98-
98+
9999
echo "Version ${VERSION} already published, updating dist-tag to ${TAG}"
100100
npm dist-tag add "${PACKAGE_NAME}@${VERSION}" "${TAG}"
101101

.github/workflows/terminal-bench.yml

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,34 +4,34 @@ on:
44
workflow_call:
55
inputs:
66
model_name:
7-
description: 'Model to use (e.g., anthropic:claude-sonnet-4-5)'
7+
description: "Model to use (e.g., anthropic:claude-sonnet-4-5)"
88
required: false
99
type: string
1010
thinking_level:
11-
description: 'Thinking level (off, low, medium, high)'
11+
description: "Thinking level (off, low, medium, high)"
1212
required: false
1313
type: string
1414
dataset:
15-
description: 'Terminal-Bench dataset to use'
15+
description: "Terminal-Bench dataset to use"
1616
required: false
1717
type: string
18-
default: 'terminal-bench-core==0.1.1'
18+
default: "terminal-bench-core==0.1.1"
1919
concurrency:
20-
description: 'Number of concurrent tasks (--n-concurrent)'
20+
description: "Number of concurrent tasks (--n-concurrent)"
2121
required: false
2222
type: string
23-
default: '4'
23+
default: "4"
2424
livestream:
25-
description: 'Enable livestream mode'
25+
description: "Enable livestream mode"
2626
required: false
2727
type: boolean
2828
default: true
2929
sample_size:
30-
description: 'Number of random tasks to run (empty = all tasks)'
30+
description: "Number of random tasks to run (empty = all tasks)"
3131
required: false
3232
type: string
3333
extra_args:
34-
description: 'Additional arguments to pass to terminal-bench'
34+
description: "Additional arguments to pass to terminal-bench"
3535
required: false
3636
type: string
3737
secrets:
@@ -42,34 +42,34 @@ on:
4242
workflow_dispatch:
4343
inputs:
4444
dataset:
45-
description: 'Terminal-Bench dataset to use'
45+
description: "Terminal-Bench dataset to use"
4646
required: false
47-
default: 'terminal-bench-core==0.1.1'
47+
default: "terminal-bench-core==0.1.1"
4848
type: string
4949
concurrency:
50-
description: 'Number of concurrent tasks (--n-concurrent)'
50+
description: "Number of concurrent tasks (--n-concurrent)"
5151
required: false
52-
default: '4'
52+
default: "4"
5353
type: string
5454
livestream:
55-
description: 'Enable livestream mode'
55+
description: "Enable livestream mode"
5656
required: false
5757
default: true
5858
type: boolean
5959
sample_size:
60-
description: 'Number of random tasks to run (empty = all tasks)'
60+
description: "Number of random tasks to run (empty = all tasks)"
6161
required: false
6262
type: string
6363
model_name:
64-
description: 'Model to use (e.g., anthropic:claude-sonnet-4-5, openai:gpt-5-codex)'
64+
description: "Model to use (e.g., anthropic:claude-sonnet-4-5, openai:gpt-5-codex)"
6565
required: false
6666
type: string
6767
thinking_level:
68-
description: 'Thinking level (off, low, medium, high)'
68+
description: "Thinking level (off, low, medium, high)"
6969
required: false
7070
type: string
7171
extra_args:
72-
description: 'Additional arguments to pass to terminal-bench'
72+
description: "Additional arguments to pass to terminal-bench"
7373
required: false
7474
type: string
7575

@@ -148,4 +148,3 @@ jobs:
148148
runs/
149149
if-no-files-found: warn
150150
retention-days: 30
151-

.storybook/main.ts

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,7 @@ import path from "path";
44

55
const config: StorybookConfig = {
66
stories: ["../src/**/*.stories.@(ts|tsx)"],
7-
addons: [
8-
"@storybook/addon-links",
9-
"@storybook/addon-docs",
10-
"@storybook/addon-interactions",
11-
],
7+
addons: ["@storybook/addon-links", "@storybook/addon-docs", "@storybook/addon-interactions"],
128
framework: {
139
name: "@storybook/react-vite",
1410
options: {},

.storybook/mocks/version.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,3 @@ export const VERSION = {
66
git_describe: "v1.0.0",
77
buildTime: "2024-01-24T17:41:00Z", // 9:41 AM PST
88
};
9-

Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,11 +203,11 @@ check-deadcode: node_modules/.installed ## Check for potential dead code (manual
203203
|| echo "✓ No obvious dead code found"
204204

205205
## Testing
206-
test-integration: node_modules/.installed ## Run all tests (unit + integration)
206+
test-integration: node_modules/.installed build-main ## Run all tests (unit + integration)
207207
@bun test src
208208
@TEST_INTEGRATION=1 bun x jest tests
209209

210-
test-unit: node_modules/.installed ## Run unit tests
210+
test-unit: node_modules/.installed build-main ## Run unit tests
211211
@bun test src
212212

213213
test: test-unit ## Alias for test-unit
@@ -220,7 +220,7 @@ test-coverage: ## Run tests with coverage
220220

221221
test-e2e: ## Run end-to-end tests
222222
@$(MAKE) build
223-
@CMUX_E2E_LOAD_DIST=1 CMUX_E2E_SKIP_BUILD=1 PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 bun x playwright test --project=electron
223+
@CMUX_E2E_LOAD_DIST=1 CMUX_E2E_SKIP_BUILD=1 PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 bun x playwright test --project=electron $(PLAYWRIGHT_ARGS)
224224

225225
## Distribution
226226
dist: build ## Build distributable packages

benchmarks/terminal_bench/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ The benchmark uses a **global timeout** applied to all tasks. The default is **3
3636
**Design Rationale:**
3737

3838
Based on analysis of Oct 30, 2025 nightly runs:
39+
3940
- Longest successful task: `blind-maze-explorer-algorithm.hard` at 20 minutes
4041
- 95th percentile: ~15 minutes
4142
- Mean duration: ~6 minutes

0 commit comments

Comments
 (0)