Skip to content

Commit 348dc9d

Browse files
committed
Add nightly throughput stress
1 parent 6fed7d4 commit 348dc9d

File tree

1 file changed

+187
-0
lines changed

1 file changed

+187
-0
lines changed
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
name: Nightly Throughput Stress
2+
3+
on:
4+
schedule:
5+
# Run at 3 AM PST (11:00 UTC) - offset from existing nightly
6+
- cron: '00 11 * * *'
7+
workflow_dispatch:
8+
inputs:
9+
duration:
10+
description: 'Test duration (e.g., 45m, 1h)'
11+
required: false
12+
default: '45m'
13+
type: string
14+
timeout:
15+
description: 'Scenario timeout (should always be 30m more than duration)'
16+
required: false
17+
default: '75m'
18+
type: string
19+
20+
env:
21+
# Workflow configuration
22+
TEST_DURATION: ${{ inputs.duration || '45m' }}
23+
TEST_TIMEOUT: ${{ inputs.timeout || '75m' }}
24+
25+
# Logging and artifacts
26+
WORKER_LOG_DIR: /tmp/throughput-stress-logs
27+
28+
# Omes configuration
29+
OMES_REPO: temporalio/omes
30+
OMES_REF: main
31+
RUN_ID: ${{ github.run_id }}-throughput-stress
32+
33+
jobs:
34+
throughput-stress:
35+
runs-on: ubuntu-latest-4-cores
36+
timeout-minutes: 75 # 45min test + 30min buffer for setup/teardown
37+
38+
steps:
39+
- name: Print test configuration
40+
run: |
41+
echo "=== Throughput Stress Test Configuration ==="
42+
echo "Duration: $TEST_DURATION"
43+
echo "Timeout: $TEST_TIMEOUT"
44+
echo "Run ID: $RUN_ID"
45+
echo "=========================================="
46+
47+
- name: Checkout SDK
48+
uses: actions/checkout@v4
49+
with:
50+
submodules: recursive
51+
52+
- name: Checkout OMES
53+
uses: actions/checkout@v4
54+
with:
55+
repository: ${{ env.OMES_REPO }}
56+
ref: ${{ env.OMES_REF }}
57+
path: omes
58+
59+
- name: Setup Go
60+
uses: actions/setup-go@v5
61+
with:
62+
go-version-file: omes/go.mod
63+
cache-dependency-path: omes/go.sum
64+
65+
- name: Setup Node
66+
uses: actions/setup-node@v4
67+
with:
68+
node-version: 22
69+
70+
- name: Get NPM cache directory
71+
id: npm-cache-dir
72+
run: echo "dir=$(npm config get cache)" >> ${GITHUB_OUTPUT}
73+
74+
- name: Restore NPM cache
75+
uses: actions/cache/restore@v4
76+
with:
77+
path: ${{ steps.npm-cache-dir.outputs.dir }}
78+
key: npm-main-linux-x64-${{ hashFiles('./package-lock.json') }}
79+
restore-keys: |
80+
npm-main-linux-x64-
81+
82+
- name: Install protoc
83+
uses: arduino/setup-protoc@v3
84+
with:
85+
version: '23.x'
86+
repo-token: ${{ secrets.GITHUB_TOKEN }}
87+
88+
- name: Upgrade Rust
89+
uses: dtolnay/rust-toolchain@stable
90+
91+
- name: Rust cache
92+
uses: Swatinem/rust-cache@v2
93+
with:
94+
workspaces: packages/core-bridge -> target
95+
prefix-key: corebridge-buildcache
96+
shared-key: linux-intel
97+
env-vars: ''
98+
99+
- name: Install SDK dependencies
100+
run: |
101+
npm ci --ignore-scripts --verbose || \
102+
npm ci --ignore-scripts --verbose || \
103+
npm ci --ignore-scripts --verbose
104+
105+
- name: Build SDK
106+
run: npm run build
107+
env:
108+
BUILD_CORE_RELEASE: true
109+
110+
- name: Install Temporal CLI
111+
uses: temporalio/setup-temporal@v0
112+
113+
- name: Setup log directory
114+
run: mkdir -p $WORKER_LOG_DIR
115+
116+
- name: Start Temporal Server
117+
run: |
118+
temporal server start-dev \
119+
--db-filename temporal-throughput-stress.sqlite \
120+
--sqlite-pragma journal_mode=WAL \
121+
--sqlite-pragma synchronous=OFF \
122+
--headless &> $WORKER_LOG_DIR/temporal-server.log &
123+
124+
- name: Run throughput stress scenario with local SDK
125+
working-directory: omes
126+
run: |
127+
set +e # Don't fail immediately on error
128+
129+
# Use run-scenario-with-worker to build and run in one step
130+
# Pass the SDK directory as --version for local testing
131+
# Note: The hardcoded values below match OMES defaults, except:
132+
# - visibility-count-timeout: 5m (vs 3m default)
133+
# to give CI a bit more time for visibility consistency
134+
go run ./cmd run-scenario-with-worker \
135+
--scenario throughput_stress \
136+
--language typescript \
137+
--version $(pwd)/../sdk-typescript \
138+
--run-id $RUN_ID \
139+
--duration $TEST_DURATION \
140+
--timeout $TEST_TIMEOUT \
141+
--max-concurrent 10 \
142+
--option internal-iterations=10 \
143+
--option continue-as-new-after-iterations=3 \
144+
--option sleep-time=1s \
145+
--option visibility-count-timeout=5m \
146+
2>&1 | tee $WORKER_LOG_DIR/scenario.log
147+
148+
SCENARIO_EXIT_CODE=$?
149+
echo "SCENARIO_EXIT_CODE=$SCENARIO_EXIT_CODE" >> $GITHUB_ENV
150+
exit $SCENARIO_EXIT_CODE
151+
152+
- name: Upload logs on failure
153+
if: failure()
154+
uses: actions/upload-artifact@v4
155+
with:
156+
name: throughput-stress-logs
157+
path: ${{ env.WORKER_LOG_DIR }}
158+
retention-days: 30
159+
160+
- name: Notify Slack on failure
161+
if: failure()
162+
uses: slackapi/slack-github-action@v1
163+
with:
164+
payload: |
165+
{
166+
"text": "Nightly TypeScript throughput stress test failed",
167+
"blocks": [{
168+
"type": "section",
169+
"text": {
170+
"type": "mrkdwn",
171+
"text": "*Nightly Throughput Stress Failed*\n\n*Run*: <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Logs>"
172+
}
173+
}]
174+
}
175+
env:
176+
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_SDK_ALERTS_WEBHOOK }}
177+
178+
- name: Fail if scenario failed
179+
if: always()
180+
run: |
181+
if [ "${SCENARIO_EXIT_CODE:-1}" != "0" ]; then
182+
echo "❌ Throughput stress test failed with exit code ${SCENARIO_EXIT_CODE}"
183+
echo "Check the artifacts for detailed logs and state"
184+
exit 1
185+
else
186+
echo "✅ Throughput stress test completed successfully"
187+
fi

0 commit comments

Comments
 (0)