Skip to content

Commit 02d407e

Browse files
committed
Set back to 20 epochs
1 parent 1a49ed3 commit 02d407e

File tree

12 files changed

+123
-128
lines changed

12 files changed

+123
-128
lines changed

cli/gen.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,9 @@ EXAMPLES:
6262
yarn gen --help
6363
6464
PRESET COMMANDS:
65-
yarn update:local Update 500 inboxes for local testing
66-
yarn update:prod Update inboxes for production testing
67-
yarn restart:prod Restart production installations (force recreate)
65+
yarn gen update:local Update 500 inboxes for local testing
66+
yarn gen update:prod Update inboxes for production testing
67+
yarn gen restart:prod Restart production installations (force recreate)
6868
6969
For more information, see: cli/readme.md
7070
`);

forks/README.md

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ LOG_LEVEL=debug
2222
XMTP_ENV=production
2323
```
2424

25+
### Running locally
26+
Before running this suite locally you _must_ run `yarn gen update:local` to pre-populate the database with inboxes to add and remove from the group. Otherwise add/remove member operations will fail, which will not increase the epoch or trigger forks.
27+
2528
### Fork generation through send testing
2629

2730
The main approach creates intentional conflicts by running parallel operations on shared groups:
@@ -105,8 +108,9 @@ The fork test can inject network chaos (latency, jitter, packet loss) to simulat
105108

106109
**Requirements:**
107110
- Network chaos requires `--env local`
108-
- Multinode Docker containers must be running (`./dev/up`)
109-
- Requires `sudo` access for `tc` and `iptables` commands
111+
- Multinode Docker containers must be running (`./multinode/up`)
112+
- Must be run on linux with `tc` and `iptables` commands available. Will not work on MacOS.
113+
- Requires `sudo` access
110114

111115
**Chaos Levels:**
112116

@@ -145,11 +149,3 @@ chaosLevel: high
145149
packetLoss: 0-5%
146150
interval: 10000ms
147151
```
148-
149-
### Log processing features
150-
151-
- **Clean slate**: Removes old logs and data before starting
152-
- **Continuous capture**: Each iteration captures debug logs
153-
- **ANSI cleaning**: Strips escape codes for analysis
154-
- **Fork counting**: Automatically counts detected conflicts
155-
- **Graceful interruption**: Ctrl+C exits cleanly

forks/cli.ts

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import {
88
epochRotationOperations,
99
groupCount,
1010
installationCount,
11-
network,
1211
NODE_VERSION,
1312
otherOperations,
1413
parallelOperations,
@@ -89,8 +88,8 @@ function runForkTest(options: ForkOptions): void {
8988
CHAOS_LEVEL: options.chaosLevel,
9089
},
9190
});
92-
} catch {
93-
console.log("Error running fork test");
91+
} catch (e) {
92+
console.error("Error running fork test", e);
9493
// Test may fail if forks are detected, that's expected
9594
// We'll analyze the logs afterward
9695
}
@@ -111,7 +110,7 @@ function logForkMatrixParameters(options: ForkOptions): void {
111110
);
112111
console.info(`otherOperations: ${JSON.stringify(otherOperations)}`);
113112
console.info(`targetEpoch: ${targetEpoch}`);
114-
console.info(`network: ${network || "undefined"}`);
113+
console.info(`network: ${options.env || "undefined"}`);
115114
console.info(`randomInboxIdsCount: ${randomInboxIdsCount}`);
116115
console.info(`installationCount: ${installationCount}`);
117116
console.info(`testName: ${testName}`);
@@ -192,15 +191,6 @@ async function runForkDetection(options: ForkOptions): Promise<void> {
192191
stats.runsWithoutForks++;
193192
console.info(`Run ${i}/${options.count}: ⚪ No forks`);
194193
}
195-
196-
// Clean up empty cleaned directory if it exists
197-
const logsDir = path.join(process.cwd(), "logs", "cleaned");
198-
if (fs.existsSync(logsDir)) {
199-
const files = fs.readdirSync(logsDir);
200-
if (files.length === 0) {
201-
fs.rmdirSync(logsDir);
202-
}
203-
}
204194
}
205195

206196
// Display final statistics

forks/config.ts

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ export const epochRotationOperations = {
2121
removeMember: true, // removes a random member from the group
2222
};
2323
export const otherOperations = {
24-
createInstallation: true, // creates a new installation for a random worker
24+
createInstallation: false, // creates a new installation for a random worker
2525
sendMessage: true, // sends a message to the group
2626
};
27-
export const targetEpoch = 150n; // The target epoch to stop the test (epochs are when performing forks to the group)
27+
export const targetEpoch = 30n; // The target epoch to stop the test (epochs are when performing forks to the group)
2828
export const network = process.env.XMTP_ENV; // Network environment setting
2929
export const randomInboxIdsCount = 10; // How many inboxIds to use randomly in the add/remove operations
3030
export const installationCount = 2; // How many installations to use randomly in the createInstallation operations
@@ -63,12 +63,12 @@ export const chaosPresets: Record<ChaosLevel, ChaosPreset> = {
6363
interval: 10000, // 10 seconds
6464
},
6565
high: {
66-
delayMin: 100,
66+
delayMin: 0,
6767
delayMax: 500,
68-
jitterMin: 0,
69-
jitterMax: 100,
68+
jitterMin: 50,
69+
jitterMax: 200,
7070
lossMin: 0,
71-
lossMax: 5,
71+
lossMax: 10,
7272
interval: 10000, // 10 seconds
7373
},
7474
};
@@ -90,4 +90,6 @@ export const multinodeContainers = [
9090
"multinode-node2-1",
9191
"multinode-node3-1",
9292
"multinode-node4-1",
93+
// Include the MLS validation service to add some additional chaos
94+
"multinode-validation-1",
9395
];

forks/constants.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
export const forkDetectedString = "[FORK DETECTED]";

forks/forks.test.ts

Lines changed: 77 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,61 @@ import {
2020
targetEpoch,
2121
testName,
2222
workerNames,
23+
type ChaosPreset,
2324
} from "./config";
2425

26+
const startChaos = (
27+
allNodes: DockerContainer[],
28+
preset: ChaosPreset,
29+
): NodeJS.Timeout => {
30+
console.log(`[chaos] Initialized ${allNodes.length} Docker containers`);
31+
32+
// Validate containers are running
33+
for (const node of allNodes) {
34+
try {
35+
// Test if container exists by trying to get its IP
36+
if (!node.ip) {
37+
throw new Error(`Container ${node.name} has no IP address`);
38+
}
39+
} catch {
40+
throw new Error(
41+
`Docker container ${node.name} is not running. Network chaos requires local multinode setup (./dev/up).`,
42+
);
43+
}
44+
}
45+
console.log("[chaos] All Docker containers validated");
46+
47+
// Function to apply chaos to all nodes
48+
const applyChaos = () => {
49+
console.log(
50+
"[chaos] Applying jitter, delay, and drop rules to all nodes...",
51+
);
52+
for (const node of allNodes) {
53+
const delay = Math.floor(
54+
preset.delayMin + Math.random() * (preset.delayMax - preset.delayMin),
55+
);
56+
const jitter = Math.floor(
57+
preset.jitterMin +
58+
Math.random() * (preset.jitterMax - preset.jitterMin),
59+
);
60+
const loss =
61+
preset.lossMin + Math.random() * (preset.lossMax - preset.lossMin);
62+
63+
try {
64+
node.addJitter(delay, jitter);
65+
if (Math.random() < 0.5) node.addLoss(loss);
66+
} catch (err) {
67+
console.warn(`[chaos] Error applying netem on ${node.name}:`, err);
68+
}
69+
}
70+
};
71+
72+
// Apply chaos immediately
73+
applyChaos();
74+
75+
return setInterval(applyChaos, preset.interval);
76+
};
77+
2578
describe(testName, () => {
2679
setupDurationTracking({ testName });
2780

@@ -91,79 +144,29 @@ describe(testName, () => {
91144

92145
// Initialize Docker containers for multinode setup
93146
allNodes = multinodeContainers.map((name) => new DockerContainer(name));
94-
console.log(`[chaos] Initialized ${allNodes.length} Docker containers`);
95-
96-
// Validate containers are running
97-
for (const node of allNodes) {
98-
try {
99-
// Test if container exists by trying to get its IP
100-
if (!node.ip) {
101-
throw new Error(`Container ${node.name} has no IP address`);
102-
}
103-
} catch (_err) {
104-
throw new Error(
105-
`Docker container ${node.name} is not running. Network chaos requires local multinode setup (./dev/up).`,
106-
);
107-
}
108-
}
109-
console.log("[chaos] All Docker containers validated");
110-
111147
const preset = chaosPresets[chaosConfig.level];
112-
113-
// Function to apply chaos to all nodes
114-
const applyChaos = () => {
115-
console.log(
116-
"[chaos] Applying jitter, delay, and drop rules to all nodes...",
117-
);
118-
for (const node of allNodes) {
119-
const delay = Math.floor(
120-
preset.delayMin +
121-
Math.random() * (preset.delayMax - preset.delayMin),
122-
);
123-
const jitter = Math.floor(
124-
preset.jitterMin +
125-
Math.random() * (preset.jitterMax - preset.jitterMin),
126-
);
127-
const loss =
128-
preset.lossMin +
129-
Math.random() * (preset.lossMax - preset.lossMin);
130-
131-
try {
132-
node.addJitter(delay, jitter);
133-
if (Math.random() < 0.5) node.addLoss(loss);
134-
} catch (err) {
135-
console.warn(
136-
`[chaos] Error applying netem on ${node.name}:`,
137-
err,
138-
);
139-
}
140-
}
141-
};
142-
143-
// Apply chaos immediately
144-
applyChaos();
145-
146148
// Then set interval for continued chaos
147-
chaosInterval = setInterval(applyChaos, preset.interval);
149+
chaosInterval = startChaos(allNodes, preset);
148150
console.log(`[chaos] Started chaos interval (${preset.interval}ms)`);
151+
}
149152

150-
// Start periodic verification during chaos
151-
const verifyLoop = () => {
152-
verifyInterval = setInterval(() => {
153-
void (async () => {
154-
try {
155-
console.log("[verify] Checking forks under chaos");
156-
await workers.checkForks();
157-
} catch (e) {
158-
console.warn("[verify] Skipping check due to exception:", e);
159-
}
160-
})();
161-
}, 10 * 1000);
162-
};
153+
// Start periodic verification during chaos
154+
const verifyLoop = () => {
155+
verifyInterval = setInterval(() => {
156+
void (async () => {
157+
try {
158+
console.log("[verify] Checking forks under chaos");
159+
await workers.checkForks();
160+
} catch (e) {
161+
console.warn("[verify] Skipping check due to exception:", e);
162+
throw e;
163+
}
164+
})();
165+
}, 10 * 1000);
166+
};
163167

164-
verifyLoop();
165-
console.log("[chaos] Started verification interval (10000ms)");
166-
}
168+
verifyLoop();
169+
console.log("Started verification interval (10000ms)");
167170

168171
// Create groups
169172
const groupOperationPromises = Array.from(
@@ -228,7 +231,13 @@ describe(testName, () => {
228231
);
229232

230233
await Promise.all(groupOperationPromises);
234+
await workers.checkForks();
235+
} catch (e) {
236+
console.error("Error during fork testing:", e);
231237
} finally {
238+
if (verifyInterval) {
239+
clearInterval(verifyInterval);
240+
}
232241
// Clean up chaos if it was enabled
233242
if (chaosConfig.enabled) {
234243
console.log("[chaos] Cleaning up network chaos...");
@@ -237,9 +246,6 @@ describe(testName, () => {
237246
if (chaosInterval) {
238247
clearInterval(chaosInterval);
239248
}
240-
if (verifyInterval) {
241-
clearInterval(verifyInterval);
242-
}
243249

244250
// Clear network rules
245251
for (const node of allNodes) {
@@ -253,10 +259,6 @@ describe(testName, () => {
253259
}
254260
}
255261

256-
// Cooldown period to allow in-flight messages to be processed
257-
console.log("[chaos] Waiting 5s cooldown before final validation");
258-
await new Promise((r) => setTimeout(r, 5000));
259-
260262
console.log("[chaos] Cleanup complete");
261263
}
262264
}

helpers/analyzer.ts

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import fs from "fs";
22
import path from "path";
3+
import { forkDetectedString } from "forks/constants";
34
import { processLogFile, stripAnsi } from "./logger";
45

56
// Known test issues for tracking
@@ -180,11 +181,6 @@ export async function cleanForksLogs(
180181
const logsDir = path.join(process.cwd(), "logs");
181182
const outputDir = path.join(logsDir, "cleaned");
182183

183-
if (!fs.existsSync(logsDir)) {
184-
console.debug("No logs directory found");
185-
return;
186-
}
187-
188184
if (!fs.existsSync(outputDir)) {
189185
await fs.promises.mkdir(outputDir, { recursive: true });
190186
}
@@ -214,7 +210,7 @@ export async function cleanForksLogs(
214210
// Check if the file contains fork-related content
215211
const containsForkContent = await fileContainsString(
216212
rawFilePath,
217-
"may have forked",
213+
forkDetectedString,
218214
);
219215

220216
// Always preserve raw logs for debugging/analysis

helpers/logger.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import fs from "fs";
22
import path from "path";
33
import winston from "winston";
44
import "dotenv/config";
5+
import { forkDetectedString } from "forks/constants";
56

67
// Consolidated ANSI escape code regex
78
// eslint-disable-next-line no-control-regex
@@ -44,7 +45,7 @@ export async function processLogFile(
4445

4546
let buffer = "";
4647
let foundForkLine = false;
47-
const targetString = "may be fork";
48+
const targetString = forkDetectedString;
4849

4950
readStream.on("data", (chunk: string | Buffer) => {
5051
if (foundForkLine) {

0 commit comments

Comments
 (0)