From 386385a7377b9a9a06effd936af6ee96e4e97287 Mon Sep 17 00:00:00 2001 From: Tony Murphy Date: Thu, 3 Mar 2022 16:33:59 -0800 Subject: [PATCH 1/6] MergeTree: Add stress bug farm for LocalReferences --- .../merge-tree/src/test/testClientLogger.ts | 52 +++++++++++++------ 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/packages/dds/merge-tree/src/test/testClientLogger.ts b/packages/dds/merge-tree/src/test/testClientLogger.ts index 00f510f31dc1..64dad1461d7a 100644 --- a/packages/dds/merge-tree/src/test/testClientLogger.ts +++ b/packages/dds/merge-tree/src/test/testClientLogger.ts @@ -54,6 +54,16 @@ export function createClientsAtInitialState( return {...clients, all}; } export class TestClientLogger { + + public static toString(clients: readonly TestClient[]){ + + return clients.map((c)=>this.getSegString(c)).reduce<[string,string]>((pv,cv)=>{ + pv[0]+=`|${cv.acked.padEnd(cv.local.length,"")}`; + pv[1]+=`|${cv.local.padEnd(cv.acked.length,"")}`; + return pv; + },["",""]).join("\n"); + } + private readonly incrementalLog = false; private readonly paddings: number[] = []; @@ -80,7 +90,7 @@ export class TestClientLogger { const clientLogIndex = i*2 this.ackedLine[clientLogIndex]=getOpString(op.sequencedMessage ?? c.makeOpMessage(op.op)) - const segStrings = this.getSegString(c); + const segStrings = TestClientLogger.getSegString(c); this.ackedLine[clientLogIndex + 1] = segStrings.acked; this.localLine[clientLogIndex +1] = segStrings.local; @@ -109,14 +119,18 @@ export class TestClientLogger { } private addNewLogLine() { - if (this.incrementalLog) { - console.log(this.ackedLine.map((v, i) => v.padEnd(this.paddings[i])).join(" | ")); - console.log(this.ackedLine.map((v, i) => v.padEnd(this.paddings[i])).join(" | ")); + if(this.incrementalLog){ + while(this.roundLogLines.length > 0){ + const logLine = this.roundLogLines.shift(); + if(logLine.some((c)=>c.trim().length >0)){ + console.log(logLine.map((v, i) => v.padEnd(this.paddings[i])).join(" | ")); + } + } } this.ackedLine = []; this.localLine = []; this.clients.forEach((cc, clientLogIndex)=>{ - const segStrings = this.getSegString(cc); + const segStrings = TestClientLogger.getSegString(cc); this.ackedLine.push("", segStrings.acked); this.localLine.push("", segStrings.local); @@ -153,17 +167,21 @@ export class TestClientLogger { return baseText; } - public toString() { - let str = - `_: Local State\n` - + `-: Deleted\n` - + `*: Unacked Insert and Delete\n` - + `${this.clients[0].getCollabWindow().minSeq}: msn/offset\n` - + `Op format ::@,\n` - + `sequence number represented as offset from msn. L means local.\n` - + `op types: 0) insert 1) remove 2) annotate\n`; - if (this.title) { - str += `${this.title}\n`; + public toString(excludeHeader: boolean = false) { + let str = ""; + if(!excludeHeader){ + str += + `_: Local State\n` + + `-: Deleted\n` + + `*: Unacked Insert and Delete\n` + + `${this.clients[0].getCollabWindow().minSeq}: msn/offset\n` + + `Op format ::@,\n` + + `sequence number represented as offset from msn. L means local.\n` + + `op types: 0) insert 1) remove 2) annotate\n`; + + if (this.title) { + str += `${this.title}\n`; + } } str += this.roundLogLines .filter((line)=>line.some((c)=>c.trim().length >0)) @@ -172,7 +190,7 @@ export class TestClientLogger { return str; } - private getSegString(client: TestClient): { acked: string, local: string } { + private static getSegString(client: TestClient): { acked: string, local: string } { let acked: string = ""; let local: string = ""; const nodes = [...client.mergeTree.root.children]; From 163af727c638f4e8961f68d7a5082c84be1ea050 Mon Sep 17 00:00:00 2001 From: Tony Murphy Date: Thu, 3 Mar 2022 16:38:03 -0800 Subject: [PATCH 2/6] add the new file --- .../test/client.localReferenceFarm.spec.ts | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 packages/dds/merge-tree/src/test/client.localReferenceFarm.spec.ts diff --git a/packages/dds/merge-tree/src/test/client.localReferenceFarm.spec.ts b/packages/dds/merge-tree/src/test/client.localReferenceFarm.spec.ts new file mode 100644 index 000000000000..973730d68890 --- /dev/null +++ b/packages/dds/merge-tree/src/test/client.localReferenceFarm.spec.ts @@ -0,0 +1,117 @@ +/*! + * Copyright (c) Microsoft Corporation and contributors. All rights reserved. + * Licensed under the MIT License. + */ + +import { strict as assert } from "assert"; +import random from "random-js"; +import { doOverRange } from "."; +import { LocalReference, ReferenceType } from ".."; +import { + IMergeTreeOperationRunnerConfig, + removeRange, + runMergeTreeOperationRunner, + generateClientNames, + IConfigRange, +} from "./mergeTreeOperationRunner"; +import { TestClient } from "./testClient"; +import { TestClientLogger } from "./testClientLogger"; + + const defaultOptions: Record<"initLen" | "modLen", IConfigRange> & IMergeTreeOperationRunnerConfig = { + initLen: {min: 2, max: 4}, + modLen: {min: 1, max: 8}, + opsPerRoundRange: { min: 10, max: 10 }, + rounds: 10, + operations: [removeRange], + growthFunc: (input: number) => input * 2, +}; + + +describe("MergeTree.Client", () => { + // Generate a list of single character client names, support up to 69 clients + const clientNames = generateClientNames(); + + doOverRange(defaultOptions.initLen, defaultOptions.growthFunc, (initLen)=>{ + doOverRange(defaultOptions.modLen, defaultOptions.growthFunc, (modLen)=>{ + + it(`LocalReferenceFarm_${initLen}_${modLen}`, async () => { + const mt = random.engines.mt19937(); + mt.seedWithArray([0xDEADBEEF, 0xFEEDBED, initLen, modLen]); + + const clients: TestClient[] = new Array(3).fill(0).map(()=> new TestClient()); + clients.forEach( + (c, i) => c.startOrUpdateCollaboration(clientNames[i])); + + let seq = 0; + // init with random values + seq = runMergeTreeOperationRunner( + mt, + seq, + clients, + initLen, + defaultOptions + ); + // add local references + const refs: LocalReference[][]=[]; + + const validateRefs = (reason: string, workload:()=>void)=>{ + const preWorkload = TestClientLogger.toString(clients); + workload(); + for(let c=1;c{ + clients.forEach((c,i)=>{ + refs.push([]); + for(let t = 0;t{ + //trigger zamboni multiple times as it is incremental + for(let i = clients[0].getCollabWindow().minSeq;i<=seq;i++){ + clients.forEach((c)=>c.updateMinSeq(i)); + } + }); + + validateRefs("After More Ops", ()=>{ + // init with random values + seq = runMergeTreeOperationRunner( + mt, + seq, + clients, + modLen, + defaultOptions, + ); + }); + + + validateRefs("After Final Zamboni",()=>{ + //trigger zamboni multiple times as it is incremental + for(let i = clients[0].getCollabWindow().minSeq;i<=seq;i++){ + clients.forEach((c)=>c.updateMinSeq(i)); + } + }); + + }) + }); + }); +}); From 73377e6c5405e72cdb3a8fac3cd0676e8d5fc8c6 Mon Sep 17 00:00:00 2001 From: Tony Murphy Date: Mon, 7 Mar 2022 09:46:11 -0800 Subject: [PATCH 3/6] Add some dev notes on zamboni --- packages/dds/merge-tree/DEV.md | 9 +++++++++ .../src/test/client.localReferenceFarm.spec.ts | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/packages/dds/merge-tree/DEV.md b/packages/dds/merge-tree/DEV.md index 9fb7ceb46f60..a579b2e2dfb7 100644 --- a/packages/dds/merge-tree/DEV.md +++ b/packages/dds/merge-tree/DEV.md @@ -8,3 +8,12 @@ Ths distinction is important, as a removed segment with undefined length may not However a not yet visible segment with 0 length may already exist, or will eventually exits on all clients. These have implications for eventually consistent conflict resolution. Generally, we ignore removed segments, and special case invisible segments, like in the case of conflicting insert as handled in the `breakTie` function + +### Zamboni +Zamboni is the garbage collection process in the merge tree. As segment change due to inserts and deletes, we add them to a heap which keeps the segment with the lowest sequence number at the head. These segments drive the zamboni process which is also run on every change. The zamboni process peak at the heap to determine if the head is below the min sequence, then the segment is eligible. The minimum sequence number is important here, as the minium sequence number is a sequence seen by all clients, and all clients will specify their reference sequence number as above the minium sequence number. This mean that no new operations can come in that reference anything at or below the minimum sequence number, so we are safe to clean up anything we would need to applying incoming. Eligible segments are collected, and then a few different operations are done, superficially, merge, remove, and tree rebalance. Zamboni is incremental, and only collects a constant number of segments at each change so as not to introduce performance issues. + +Merge is done if two adjacent segments are of the same type like text, that type is mergable (markers are not), neither are deleted, and all the properties match. The merge process reduces the number of segments, which are leaf nodes of the merge tree. For instance a user may type `c`, `a`, and `t` with each character being it's own operation therefore segment. The user could then highlight that range, and set a property on on all the characters indicating that they are bold, `{bold: true}`. At some later point, these segments would move to the top of th heap, and their sequence numbers would move below the minium sequence number. At that point zamboni could take those individual segments, and merge the into a single segment, `cat` with the property `{bold: true}` + +Remove is a bit simpler. On removal of a segment, we track it's removed sequence number. When the segment's removed sequence number drops below the minimum sequence number it can be safely removed from the tree. + +Rebalance is a bit different from merge and remove, as it has to do with maintaining the tree itself. After merge or removal there are fewer segments aka leaf nodes in the tree. This allows us to more efficiently pack the non-leaf node of the tree, and potentially remove layers from the tree. This keeps the tree compact, which has both memory, and cpu performance implications. diff --git a/packages/dds/merge-tree/src/test/client.localReferenceFarm.spec.ts b/packages/dds/merge-tree/src/test/client.localReferenceFarm.spec.ts index 973730d68890..5c082c317ad7 100644 --- a/packages/dds/merge-tree/src/test/client.localReferenceFarm.spec.ts +++ b/packages/dds/merge-tree/src/test/client.localReferenceFarm.spec.ts @@ -20,7 +20,7 @@ import { TestClientLogger } from "./testClientLogger"; const defaultOptions: Record<"initLen" | "modLen", IConfigRange> & IMergeTreeOperationRunnerConfig = { initLen: {min: 2, max: 4}, modLen: {min: 1, max: 8}, - opsPerRoundRange: { min: 10, max: 10 }, + opsPerRoundRange: {min: 10, max: 10}, rounds: 10, operations: [removeRange], growthFunc: (input: number) => input * 2, From 1164c37513b14538bd1b7c5a0d2866067c1fb64b Mon Sep 17 00:00:00 2001 From: Tony Murphy Date: Tue, 8 Mar 2022 13:31:56 -0800 Subject: [PATCH 4/6] fix typo --- packages/dds/merge-tree/DEV.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/dds/merge-tree/DEV.md b/packages/dds/merge-tree/DEV.md index a579b2e2dfb7..8cdf8c54f49d 100644 --- a/packages/dds/merge-tree/DEV.md +++ b/packages/dds/merge-tree/DEV.md @@ -10,7 +10,7 @@ These have implications for eventually consistent conflict resolution. Generally of conflicting insert as handled in the `breakTie` function ### Zamboni -Zamboni is the garbage collection process in the merge tree. As segment change due to inserts and deletes, we add them to a heap which keeps the segment with the lowest sequence number at the head. These segments drive the zamboni process which is also run on every change. The zamboni process peak at the heap to determine if the head is below the min sequence, then the segment is eligible. The minimum sequence number is important here, as the minium sequence number is a sequence seen by all clients, and all clients will specify their reference sequence number as above the minium sequence number. This mean that no new operations can come in that reference anything at or below the minimum sequence number, so we are safe to clean up anything we would need to applying incoming. Eligible segments are collected, and then a few different operations are done, superficially, merge, remove, and tree rebalance. Zamboni is incremental, and only collects a constant number of segments at each change so as not to introduce performance issues. +Zamboni is the garbage collection process in the merge tree. As segment change due to inserts and deletes, we add them to a heap which keeps the segment with the lowest sequence number at the head. These segments drive the zamboni process which is also run on every change. The zamboni process peeks at the heap to determine if the head is below the min sequence, then the segment is eligible. The minimum sequence number is important here, as the minium sequence number is a sequence seen by all clients, and all clients will specify their reference sequence number as above the minium sequence number. This mean that no new operations can come in that reference anything at or below the minimum sequence number, so we are safe to clean up anything we would need to applying incoming. Eligible segments are collected, and then a few different operations are done, superficially, merge, remove, and tree rebalance. Zamboni is incremental, and only collects a constant number of segments at each change so as not to introduce performance issues. Merge is done if two adjacent segments are of the same type like text, that type is mergable (markers are not), neither are deleted, and all the properties match. The merge process reduces the number of segments, which are leaf nodes of the merge tree. For instance a user may type `c`, `a`, and `t` with each character being it's own operation therefore segment. The user could then highlight that range, and set a property on on all the characters indicating that they are bold, `{bold: true}`. At some later point, these segments would move to the top of th heap, and their sequence numbers would move below the minium sequence number. At that point zamboni could take those individual segments, and merge the into a single segment, `cat` with the property `{bold: true}` From 11a903a90ed286674106f6a2b2b8a5bf01a02e10 Mon Sep 17 00:00:00 2001 From: Tony Murphy Date: Tue, 8 Mar 2022 13:58:39 -0800 Subject: [PATCH 5/6] fix lints --- .../test/client.localReferenceFarm.spec.ts | 31 ++++++++----------- .../merge-tree/src/test/testClientLogger.ts | 18 +++++------ packages/test/snapshots/content | 2 +- 3 files changed, 22 insertions(+), 29 deletions(-) diff --git a/packages/dds/merge-tree/src/test/client.localReferenceFarm.spec.ts b/packages/dds/merge-tree/src/test/client.localReferenceFarm.spec.ts index 5c082c317ad7..5b06e8fe37f9 100644 --- a/packages/dds/merge-tree/src/test/client.localReferenceFarm.spec.ts +++ b/packages/dds/merge-tree/src/test/client.localReferenceFarm.spec.ts @@ -5,7 +5,6 @@ import { strict as assert } from "assert"; import random from "random-js"; -import { doOverRange } from "."; import { LocalReference, ReferenceType } from ".."; import { IMergeTreeOperationRunnerConfig, @@ -16,6 +15,7 @@ import { } from "./mergeTreeOperationRunner"; import { TestClient } from "./testClient"; import { TestClientLogger } from "./testClientLogger"; +import { doOverRange } from "."; const defaultOptions: Record<"initLen" | "modLen", IConfigRange> & IMergeTreeOperationRunnerConfig = { initLen: {min: 2, max: 4}, @@ -26,14 +26,12 @@ import { TestClientLogger } from "./testClientLogger"; growthFunc: (input: number) => input * 2, }; - describe("MergeTree.Client", () => { // Generate a list of single character client names, support up to 69 clients const clientNames = generateClientNames(); doOverRange(defaultOptions.initLen, defaultOptions.growthFunc, (initLen)=>{ doOverRange(defaultOptions.modLen, defaultOptions.growthFunc, (modLen)=>{ - it(`LocalReferenceFarm_${initLen}_${modLen}`, async () => { const mt = random.engines.mt19937(); mt.seedWithArray([0xDEADBEEF, 0xFEEDBED, initLen, modLen]); @@ -49,19 +47,19 @@ describe("MergeTree.Client", () => { seq, clients, initLen, - defaultOptions + defaultOptions, ); // add local references - const refs: LocalReference[][]=[]; + const refs: LocalReference[][] = []; - const validateRefs = (reason: string, workload:()=>void)=>{ + const validateRefs = (reason: string, workload: () => void)=>{ const preWorkload = TestClientLogger.toString(clients); workload(); - for(let c=1;c { validateRefs("Initialize", ()=>{ clients.forEach((c,i)=>{ refs.push([]); - for(let t = 0;t { }); }); - validateRefs("After Init Zamboni",()=>{ - //trigger zamboni multiple times as it is incremental - for(let i = clients[0].getCollabWindow().minSeq;i<=seq;i++){ + // trigger zamboni multiple times as it is incremental + for(let i = clients[0].getCollabWindow().minSeq; i <= seq; i++) { clients.forEach((c)=>c.updateMinSeq(i)); } }); @@ -103,15 +100,13 @@ describe("MergeTree.Client", () => { ); }); - validateRefs("After Final Zamboni",()=>{ - //trigger zamboni multiple times as it is incremental - for(let i = clients[0].getCollabWindow().minSeq;i<=seq;i++){ + // trigger zamboni multiple times as it is incremental + for(let i = clients[0].getCollabWindow().minSeq; i <= seq; i++) { clients.forEach((c)=>c.updateMinSeq(i)); } }); - - }) + }); }); }); }); diff --git a/packages/dds/merge-tree/src/test/testClientLogger.ts b/packages/dds/merge-tree/src/test/testClientLogger.ts index 4c893ed9bfb2..48a207070c4d 100644 --- a/packages/dds/merge-tree/src/test/testClientLogger.ts +++ b/packages/dds/merge-tree/src/test/testClientLogger.ts @@ -54,12 +54,10 @@ export function createClientsAtInitialState( return {...clients, all}; } export class TestClientLogger { - - public static toString(clients: readonly TestClient[]){ - + public static toString(clients: readonly TestClient[]) { return clients.map((c)=>this.getSegString(c)).reduce<[string,string]>((pv,cv)=>{ - pv[0]+=`|${cv.acked.padEnd(cv.local.length,"")}`; - pv[1]+=`|${cv.local.padEnd(cv.acked.length,"")}`; + pv[0] += `|${cv.acked.padEnd(cv.local.length,"")}`; + pv[1] += `|${cv.local.padEnd(cv.acked.length,"")}`; return pv; },["",""]).join("\n"); } @@ -89,7 +87,7 @@ export class TestClientLogger { } const clientLogIndex = i * 2; - this.ackedLine[clientLogIndex]=getOpString(op.sequencedMessage ?? c.makeOpMessage(op.op)) + this.ackedLine[clientLogIndex] = getOpString(op.sequencedMessage ?? c.makeOpMessage(op.op)); const segStrings = TestClientLogger.getSegString(c); this.ackedLine[clientLogIndex + 1] = segStrings.acked; this.localLine[clientLogIndex + 1] = segStrings.local; @@ -119,10 +117,10 @@ export class TestClientLogger { } private addNewLogLine() { - if(this.incrementalLog){ - while(this.roundLogLines.length > 0){ + if(this.incrementalLog) { + while(this.roundLogLines.length > 0) { const logLine = this.roundLogLines.shift(); - if(logLine.some((c)=>c.trim().length >0)){ + if(logLine.some((c)=>c.trim().length > 0)) { console.log(logLine.map((v, i) => v.padEnd(this.paddings[i])).join(" | ")); } } @@ -169,7 +167,7 @@ export class TestClientLogger { public toString(excludeHeader: boolean = false) { let str = ""; - if(!excludeHeader){ + if(!excludeHeader) { str += `_: Local State\n` + `-: Deleted\n` diff --git a/packages/test/snapshots/content b/packages/test/snapshots/content index 89a87761cf17..96453d2babab 160000 --- a/packages/test/snapshots/content +++ b/packages/test/snapshots/content @@ -1 +1 @@ -Subproject commit 89a87761cf17b6ed64c6e2be0c5b9cf52d3d3635 +Subproject commit 96453d2babab667c92e5bcb32532c3159af2153b From b3428c3231f78c088ebd25563da927c3d4578f26 Mon Sep 17 00:00:00 2001 From: Tony Murphy Date: Tue, 8 Mar 2022 14:36:49 -0800 Subject: [PATCH 6/6] fix submodule pointer --- packages/test/snapshots/content | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/test/snapshots/content b/packages/test/snapshots/content index 96453d2babab..89a87761cf17 160000 --- a/packages/test/snapshots/content +++ b/packages/test/snapshots/content @@ -1 +1 @@ -Subproject commit 96453d2babab667c92e5bcb32532c3159af2153b +Subproject commit 89a87761cf17b6ed64c6e2be0c5b9cf52d3d3635