Skip to content

Commit 7ecb6f0

Browse files
committed
Clone workers in streams
1 parent b96795f commit 7ecb6f0

File tree

8 files changed

+229
-68
lines changed

8 files changed

+229
-68
lines changed

chaos/db.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ export type DbChaosConfig = {
55
minLockTime: number;
66
maxLockTime: number;
77
lockInterval: number;
8+
impactedWorkerPercentage: number; // number between 0 and 100 for what % of workers to lock on each run
89
};
910

1011
export class DbChaos implements ChaosProvider {
@@ -17,14 +18,18 @@ export class DbChaos implements ChaosProvider {
1718
}
1819

1920
start(workers: WorkerManager): Promise<void> {
20-
const { minLockTime, maxLockTime, lockInterval } = this.config;
21+
const { minLockTime, maxLockTime, lockInterval, impactedWorkerPercentage } =
22+
this.config;
2123
console.log(
2224
`Starting DB Chaos:
2325
Locking for ${minLockTime}ms - ${maxLockTime}ms
2426
Interval: ${lockInterval}ms`,
2527
);
2628
this.interval = setInterval(() => {
2729
for (const worker of workers.getAll()) {
30+
if (Math.random() * 100 > impactedWorkerPercentage) {
31+
continue;
32+
}
2833
const duration = Math.floor(
2934
minLockTime + Math.random() * (maxLockTime - minLockTime),
3035
);

chaos/streams.ts

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,38 @@
11
import type { ChaosProvider } from "@chaos/provider";
2-
import { typeofStream } from "@workers/main";
2+
import { typeofStream, type WorkerClient } from "@workers/main";
33
import type { WorkerManager } from "@workers/manager";
44

5+
export type StreamsConfig = {
6+
cloned: boolean;
7+
};
8+
59
export class StreamsChaos implements ChaosProvider {
6-
workers?: WorkerManager;
7-
start(workers: WorkerManager) {
10+
workers?: WorkerClient[];
11+
config: StreamsConfig;
12+
13+
constructor(config: StreamsConfig) {
14+
this.config = config;
15+
}
16+
17+
async start(workers: WorkerManager) {
818
console.log("Starting StreamsChaos");
9-
this.workers = workers;
10-
for (const worker of workers.getAll()) {
11-
worker.worker.startStream(typeofStream.Message);
19+
let allWorkers = workers.getAll().map((w) => w.worker);
20+
if (this.config.cloned) {
21+
allWorkers = await Promise.all(allWorkers.map((w) => w.clone()));
22+
}
23+
24+
this.workers = allWorkers;
25+
for (const worker of allWorkers) {
26+
worker.startStream(typeofStream.Message);
1227
}
1328

1429
return Promise.resolve();
1530
}
1631

1732
stop() {
1833
if (this.workers) {
19-
for (const worker of this.workers.getAll()) {
20-
worker.worker.stopStreams();
34+
for (const worker of this.workers) {
35+
worker.stopStreams();
2136
}
2237
}
2338

forks/README.md

Lines changed: 104 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,15 @@ XMTP_ENV=production
2525
### Running locally
2626
Before running this suite locally you _must_ run `yarn gen update:local` to pre-populate the database with inboxes to add and remove from the group. Otherwise add/remove member operations will fail, which will not increase the epoch or trigger forks.
2727

28-
### Fork generation through send testing
28+
### Fork generation through parallel operations
2929

3030
The main approach creates intentional conflicts by running parallel operations on shared groups:
3131

3232
- Create X groups in parallel
3333
- Add X workers as super admins to each group
3434
- Loop each group until epoch Y:
3535
- Choose random worker and syncAll conversations
36-
- Run between 2 random operations:
36+
- Run parallel operations:
3737
- Update group name
3838
- Send message (random message)
3939
- Add member (random inboxId)
@@ -45,23 +45,25 @@ The main approach creates intentional conflicts by running parallel operations o
4545

4646
### Parameters
4747

48-
- **groupCount**: `5` - Number of groups to create in parallel
49-
- **nodeBindings**: `3.x.x` - Node SDK version to use
50-
- **parallelOperations**: `1` - How many operations to perform in parallel
51-
- **enabledOperations**: - Operations configuration - enable/disable specific operations
52-
- `updateName`: true, // updates the name of the group
53-
- `sendMessage`: false, // sends a message to the group
54-
- `addMember`: true, // adds a random member to the group
55-
- `removeMember`: true, // removes a random member from the group
56-
- `createInstallation`: true, // creates a new installation for a random worker
57-
- **workerNames**: Random workers (`random1`, `random2`, ..., `random10`)
58-
- **targetEpoch**: `100n` - The target epoch to stop the test (epochs are when performing forks to the group)
59-
- **network**: `process.env.XMTP_ENV` - Network environment setting
60-
- **randomInboxIdsCount**: `30` - How many inboxIds to use randomly in the add/remove operations
61-
- **installationCount**: `5` - How many installations to use randomly in the createInstallation operations
62-
- **typeofStreamForTest**: `typeofStream.None` - No streams started by default (configured on-demand)
63-
- **typeOfSyncForTest**: `typeOfSync.None` - No automatic syncing (configured on-demand)
64-
- **streams**: `false` - Enable message streams on all workers (configured via `--streams` CLI flag)
48+
Default configuration values (can be overridden via CLI flags):
49+
50+
- **groupCount**: `5` - Number of groups to create in parallel (override with `--group-count`)
51+
- **nodeBindings**: Latest version - Node SDK version to use
52+
- **parallelOperations**: `5` - How many operations to perform in parallel (override with `--parallel-operations`)
53+
- **epochRotationOperations**: Operations that rotate epochs:
54+
- `updateName`: true - Updates the name of the group
55+
- `addMember`: true - Adds a random member to the group
56+
- `removeMember`: true - Removes a random member from the group
57+
- **otherOperations**: Additional operations:
58+
- `sendMessage`: true - Sends a message to the group
59+
- `createInstallation`: false - Creates a new installation for a random worker
60+
- `sync`: false - Syncs the group
61+
- **workerNames**: Random workers (`random1`, `random2`, `random3`, `random4`, `random5`)
62+
- **targetEpoch**: `20` - The target epoch to stop the test (override with `--target-epoch`)
63+
- **network**: `dev` - Network environment setting (override with `--env`)
64+
- **randomInboxIdsCount**: `50` - How many inboxIds to use randomly in the add/remove operations
65+
- **installationCount**: `2` - How many installations to use randomly in the createInstallation operations
66+
- **backgroundStreams**: `false` - Enable message streams on all workers (enable with `--with-background-streams`)
6567

6668
### Test setup in local network
6769

@@ -73,7 +75,7 @@ The main approach creates intentional conflicts by running parallel operations o
7375
yarn local-update
7476

7577
# Process that runs the test 100 times and exports forks logs
76-
yarn test forks --attempts 100 --env local --log warn --file --forks
78+
yarn fork --count 100 --env local
7779
```
7880

7981
## CLI Usage
@@ -90,21 +92,50 @@ yarn fork --count 50
9092
# Clean all raw logs before starting
9193
yarn fork --clean-all
9294

95+
# Keep logs that don't contain fork content
96+
yarn fork --no-remove-non-matching
97+
9398
# Run on a specific environment
9499
yarn fork --count 200 --env local
95100

96101
# Enable message streams on all workers
97-
yarn fork --streams
102+
yarn fork --with-background-streams
103+
104+
# Configure test parameters
105+
yarn fork --group-count 10 --parallel-operations 3 --target-epoch 50
106+
107+
# Set log level for test runner
108+
yarn fork --log-level debug
98109

99110
# Show help
100111
yarn fork --help
101112
```
102113

114+
### CLI Options
115+
116+
- `--count`: Number of times to run the fork detection process (default: 100)
117+
- `--clean-all`: Clean all raw logs before starting (default: false)
118+
- `--remove-non-matching`: Remove logs that don't contain fork content (default: true)
119+
- `--no-remove-non-matching`: Keep logs that don't contain fork content
120+
- `--env`: XMTP environment - `local`, `dev`, or `production` (default: `dev` or `XMTP_ENV`)
121+
- `--network-chaos-level`: Network chaos level - `none`, `low`, `medium`, or `high` (default: `none`)
122+
- `--db-chaos-level`: Database chaos level - `none`, `low`, `medium`, or `high` (default: `none`)
123+
- `--with-background-streams`: Enable message streams on all workers (default: false)
124+
- `--log-level`: Log level for test runner - `debug`, `info`, `warn`, `error` (default: `warn`)
125+
- `--group-count`: Number of groups to run the test against (default: 5)
126+
- `--parallel-operations`: Number of parallel operations run on each group (default: 5)
127+
- `--target-epoch`: Target epoch to stop the test at (default: 20)
128+
129+
### Statistics Output
130+
103131
The CLI provides statistics including:
104132

105133
- Total runs and forks detected
134+
- Runs with forks vs. runs without forks
135+
- Runs with errors
106136
- Fork detection rate
107137
- Average forks per run
138+
- Average forks per run (with forks only)
108139

109140
### Network Chaos Testing
110141

@@ -113,7 +144,7 @@ The fork test can inject network chaos (latency, jitter, packet loss) to simulat
113144
**Requirements:**
114145
- Network chaos requires `--env local`
115146
- Multinode Docker containers must be running (`./multinode/up`)
116-
- Must be run on linux with `tc` and `iptables` commands available. Will not work on MacOS.
147+
- Must be run on Linux with `tc` and `iptables` commands available. Will not work on MacOS.
117148
- Requires `sudo` access
118149

119150
**Chaos Levels:**
@@ -122,19 +153,19 @@ The fork test can inject network chaos (latency, jitter, packet loss) to simulat
122153
|--------|-------------|--------------|-------------|----------|
123154
| low | 50-150ms | 0-50ms | 0-2% | 15s |
124155
| medium | 100-300ms | 0-75ms | 0-3.5% | 10s |
125-
| high | 100-500ms | 0-100ms | 0-5% | 10s |
156+
| high | 0-500ms | 50-200ms | 0-25% | 10s |
126157

127158
**Usage:**
128159

129160
```bash
130-
# Run with default (medium) chaos
131-
yarn fork --env local --chaos-enabled
161+
# Run with low network chaos
162+
yarn fork --env local --network-chaos-level low
132163

133-
# Run with high chaos level
134-
yarn fork --env local --chaos-enabled --chaos-level high
164+
# Run with high network chaos level
165+
yarn fork --env local --network-chaos-level high
135166

136-
# Run 50 iterations with low chaos
137-
yarn fork --count 50 --env local --chaos-enabled --chaos-level low
167+
# Run 50 iterations with medium network chaos
168+
yarn fork --count 50 --env local --network-chaos-level medium
138169
```
139170

140171
**How it works:**
@@ -146,10 +177,49 @@ yarn fork --count 50 --env local --chaos-enabled --chaos-level low
146177
**Example output:**
147178
```
148179
NETWORK CHAOS PARAMETERS
149-
chaosEnabled: true
150-
chaosLevel: high
151-
delay: 100-500ms
152-
jitter: 0-100ms
153-
packetLoss: 0-5%
180+
delay: 0-500ms
181+
jitter: 50-200ms
182+
packetLoss: 0-25%
154183
interval: 10000ms
155184
```
185+
186+
### Database Chaos Testing
187+
188+
The fork test can inject database chaos by temporarily locking database files to simulate database contention and I/O issues.
189+
190+
**Chaos Levels:**
191+
192+
| Level | Lock Duration | Interval |
193+
|--------|---------------|----------|
194+
| low | 50-250ms | 10s |
195+
| medium | 100-2000ms | 15s |
196+
| high | 500-2000ms | 5s |
197+
198+
**Usage:**
199+
200+
```bash
201+
# Run with low database chaos
202+
yarn fork --db-chaos-level low
203+
204+
# Run with high database chaos level
205+
yarn fork --db-chaos-level high
206+
207+
# Run 50 iterations with medium database chaos
208+
yarn fork --count 50 --db-chaos-level medium
209+
210+
# Combine network and database chaos
211+
yarn fork --env local --network-chaos-level medium --db-chaos-level medium
212+
```
213+
214+
**How it works:**
215+
1. Periodically locks database files for each worker
216+
2. Lock duration is randomized within the preset range
217+
3. Workers experience database busy/locked errors during operations
218+
4. Cleans up and waits for all locks to complete when test finishes
219+
220+
**Example output:**
221+
```
222+
DATABASE CHAOS PARAMETERS
223+
lockDuration: 500-2000ms
224+
interval: 5000ms
225+
```

forks/cli.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,11 @@ function buildRuntimeConfig(options: ForkOptions): RuntimeConfig {
5050
network: (options.env || "dev") as "local" | "dev" | "production",
5151
networkChaos: resolveNetworkChaosConfig(options.networkChaosLevel),
5252
dbChaos: resolveDbChaosConfig(options.dbChaosLevel),
53-
backgroundStreams: options.withBackgroundStreams,
53+
backgroundStreams: options.withBackgroundStreams
54+
? {
55+
cloned: true,
56+
}
57+
: null,
5458
};
5559
}
5660

@@ -129,7 +133,7 @@ async function runForkDetection(options: ForkOptions): Promise<void> {
129133
const success = runForkTest(options, runtimeConfig);
130134
if (!success) {
131135
stats.runsWithErrors++;
132-
console.log(`❌ Error in run ${i}/${options.count}`);
136+
console.info(`❌ Error in run ${i}/${options.count}`);
133137
}
134138

135139
// Clean and analyze fork logs after the test (suppress output)

forks/config.ts

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { type DbChaosConfig } from "@chaos/db";
22
import type { NetworkChaosConfig } from "@chaos/network";
3+
import type { StreamsConfig } from "@chaos/streams";
34
import { getActiveVersion, type XmtpEnv } from "@helpers/versions";
45

56
export const NODE_VERSION = getActiveVersion().nodeBindings; // default to latest version, can be overridden with --nodeBindings=3.1.1
@@ -22,7 +23,7 @@ export const epochRotationOperations = {
2223
export const otherOperations = {
2324
createInstallation: false, // creates a new installation for a random worker
2425
sendMessage: true, // sends a message to the group
25-
sync: true, // syncs the group
26+
sync: false, // syncs the group
2627
};
2728
export const randomInboxIdsCount = 50; // How many inboxIds to use randomly in the add/remove operations
2829
export const installationCount = 2; // How many installations to use randomly in the createInstallation operations
@@ -74,17 +75,20 @@ export const dbChaosPresets: Record<
7475
low: {
7576
minLockTime: 50,
7677
maxLockTime: 250,
77-
lockInterval: 10000, // 20 seconds
78+
lockInterval: 10000, // 10 seconds
79+
impactedWorkerPercentage: 20,
7880
},
7981
medium: {
8082
minLockTime: 100,
8183
maxLockTime: 2000,
8284
lockInterval: 15000, // 15 seconds
85+
impactedWorkerPercentage: 40,
8386
},
8487
high: {
8588
minLockTime: 500,
8689
maxLockTime: 2000,
8790
lockInterval: 5000, // 5 seconds
91+
impactedWorkerPercentage: 60,
8892
},
8993
};
9094

@@ -130,7 +134,7 @@ export type RuntimeConfig = {
130134
network: XmtpEnv; // XMTP network
131135
networkChaos: NetworkChaosConfig | null; // Network chaos configuration
132136
dbChaos: DbChaosConfig | null; // Database chaos configuration
133-
backgroundStreams: boolean; //
137+
backgroundStreams: StreamsConfig | null; //
134138
};
135139

136140
export function getConfigFromEnv(): RuntimeConfig {
@@ -161,7 +165,9 @@ export function printConfig(config: RuntimeConfig): void {
161165
console.info(`randomInboxIdsCount: ${randomInboxIdsCount}`);
162166
console.info(`installationCount: ${installationCount}`);
163167
console.info(`testName: ${testName}`);
164-
console.info(`backgroundStreams: ${config.backgroundStreams}`);
168+
console.info(
169+
`backgroundStreams: ${config.backgroundStreams ? "enabled" : "disabled"}. From separate client instances: ${config.backgroundStreams?.cloned}`,
170+
);
165171

166172
if (config.networkChaos) {
167173
console.info("\nNETWORK CHAOS PARAMETERS");

0 commit comments

Comments
 (0)