From 65d45e0f8ccbfbac10947224ebe381d300fc05ed Mon Sep 17 00:00:00 2001 From: "Joshua A. Horton" Date: Thu, 8 Aug 2024 11:53:18 +0700 Subject: [PATCH 1/2] feat(developer): output new TrieModel format when compiling --- developer/src/kmc-model/src/build-trie.ts | 9 ++---- .../test-compile-model-with-pseudoclosure.ts | 9 +++--- .../src/kmc-model/test/test-compile-trie.ts | 30 +++++++++++++++---- 3 files changed, 33 insertions(+), 15 deletions(-) diff --git a/developer/src/kmc-model/src/build-trie.ts b/developer/src/kmc-model/src/build-trie.ts index 9e34cd54691..dd8429a0e7f 100644 --- a/developer/src/kmc-model/src/build-trie.ts +++ b/developer/src/kmc-model/src/build-trie.ts @@ -32,7 +32,7 @@ export function createTrieDataStructure(filenames: string[], searchTermToKey?: ( filenames.forEach(filename => parseWordListFromFilename(wordlist, filename)); let trie = buildTrie(wordlist, searchTermToKey as SearchTermToKey); - return JSON.stringify(trie); + return `{"data":${JSON.stringify(trie.compress())},"totalWeight":${trie.getTotalWeight()}}`; } /** @@ -212,14 +212,11 @@ export interface SearchTermToKey { * @param keyFunction Function that converts word forms into indexed search keys * @returns A JSON-serialiable object that can be given to the TrieModel constructor. */ -export function buildTrie(wordlist: WordList, keyFunction: SearchTermToKey): object { +export function buildTrie(wordlist: WordList, keyFunction: SearchTermToKey): TrieBuilder { let collater = new TrieBuilder(keyFunction); buildFromWordList(collater, wordlist); - return { - totalWeight: collater.getTotalWeight(), - root: collater.getRoot() - } + return collater; } /** diff --git a/developer/src/kmc-model/test/test-compile-model-with-pseudoclosure.ts b/developer/src/kmc-model/test/test-compile-model-with-pseudoclosure.ts index 08247e17c1d..e9848e84668 100644 --- a/developer/src/kmc-model/test/test-compile-model-with-pseudoclosure.ts +++ b/developer/src/kmc-model/test/test-compile-model-with-pseudoclosure.ts @@ -59,7 +59,8 @@ describe('LexicalModelCompiler - pseudoclosure compilation + use', function () { assert.match(code, /'-'/); assert.match(code, /'\+'/); assert.match(code, /'\^'/); - assert.match(code, /§/); + // From searchTermToKey: + assert.match(code, /'§'/); let modelInitIndex = code.indexOf('LMLayerWorker.loadModel'); let modelInitCode = code.substring(modelInitIndex); @@ -73,7 +74,7 @@ describe('LexicalModelCompiler - pseudoclosure compilation + use', function () { // Instead, our custom keyer should ensure that the following symbol DOES appear. // Verifies that the compiler uses the custom searchTermToKey definition. - assert.match(modelInitCode, /['"]§['"]/); + assert.match(modelInitCode, /[^ ]§/); // Make sure it compiles! let compilation = compileModelSourceCode(code); @@ -118,7 +119,7 @@ describe('LexicalModelCompiler - pseudoclosure compilation + use', function () { // Check that the prepended lowercase "-" DOES appear within the Trie, as keying // does not remove it in this variant. Verifies that the compiler actually // used the custom applyCasing definition! - assert.match(modelInitCode, /['"]-['"]/); + assert.match(modelInitCode, /[^ ]-/); // ' -' is indicative of a compressed number // Make sure it compiles! let compilation = compileModelSourceCode(code); @@ -157,7 +158,7 @@ describe('LexicalModelCompiler - pseudoclosure compilation + use', function () { // Check that the prepended lowercase "-" DOES appear within the Trie, as keying // does not remove it in this variant. Verifies that the compiler actually // used the custom applyCasing definition! - assert.match(modelInitCode, /['"]-['"]/); + assert.match(modelInitCode, /[^ ]-/); // ' -' is indicative of a compressed number // Make sure it compiles! let compilation = compileModelSourceCode(code); diff --git a/developer/src/kmc-model/test/test-compile-trie.ts b/developer/src/kmc-model/test/test-compile-trie.ts index a9829568370..78b121ceee1 100644 --- a/developer/src/kmc-model/test/test-compile-trie.ts +++ b/developer/src/kmc-model/test/test-compile-trie.ts @@ -154,14 +154,34 @@ describe('createTrieDataStructure()', function () { let lowercaseSourceCode = createTrieDataStructure([WORDLIST_FILENAME], (wf) => { return wf.toLowerCase() }) - assert.match(lowercaseSourceCode, /"key":\s*"turtles"/); - assert.notMatch(lowercaseSourceCode, /"key":\s*"TURTLES"/); + + // 'I' should be keyed to 'i', which should appear here. + assert.match(lowercaseSourceCode, /[^ ]i/); + // It's a sparse data set, so only the first letter of each word should appear + // in keyed form when compressed. + const lowerKeyCharMatches = ['L', 'T'] + .map((char) => lowercaseSourceCode.indexOf(char)) + .filter((entry) => entry > -1); + + // At least one letter of 'L' and 'T' should be missing; ideally none, + // but it's possible for one to appear as the encoding for a number. + assert.isAtMost(lowerKeyCharMatches.length, 1); + // 0 assumes that none of the chars appears in the encoded form. + // Fortunately... it's actually true for this fixture as-is. + assert.equal(lowerKeyCharMatches.length, 0); let uppercaseSourceCode = createTrieDataStructure([WORDLIST_FILENAME], (wf) => { return wf.toUpperCase() - }) - assert.match(uppercaseSourceCode, /"key":\s*"TURTLES"/); - assert.notMatch(uppercaseSourceCode, /"key":\s*"turtles"/); + }); + + // We don't do a a check for 'i' here because it appears both within the + // wordlist-word 'like' and within the property name 'totalWeight'. + const upperKeyCharMatches = ['I', 'L', 'T'] + .map((char) => uppercaseSourceCode.indexOf(char)) + .filter((entry) => entry > -1); + + // All first letters should appear in keyed form. + assert.equal(upperKeyCharMatches.length, 3); }); it('does not create `null`/"undefined"-keyed children', function () { From 83c696b2a9d6643ed7ed718d29c478a215739703 Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Mon, 26 Aug 2024 14:44:57 +0700 Subject: [PATCH 2/2] chore(developer): Apply suggestions from code review Co-authored-by: Marc Durdin --- developer/src/kmc-model/src/build-trie.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/developer/src/kmc-model/src/build-trie.ts b/developer/src/kmc-model/src/build-trie.ts index 235e4a26d1b..a886c230642 100644 --- a/developer/src/kmc-model/src/build-trie.ts +++ b/developer/src/kmc-model/src/build-trie.ts @@ -32,7 +32,10 @@ export function createTrieDataStructure(filenames: string[], searchTermToKey?: ( filenames.forEach(filename => parseWordListFromFilename(wordlist, filename)); let trie = buildTrie(wordlist, searchTermToKey as SearchTermToKey); - return `{"data":${JSON.stringify(trie.compress())},"totalWeight":${trie.getTotalWeight()}}`; + return JSON.stringify({ + data: trie.compress(), + totalWeight: trie.getTotalWeight() + }); } /**