Skip to content

Commit

Permalink
fix: Improve the speed of checking text. (#6004)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason3S authored Jul 30, 2024
1 parent 1029c4a commit 6327e44
Show file tree
Hide file tree
Showing 30 changed files with 2,145 additions and 60 deletions.
1 change: 1 addition & 0 deletions cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
".pnp.{js,cjs}",
".prettierignore",
".yarn",
"__snapshots__",
"*.{png,jpg,pdf,svg}",
"*.cpuprofile",
"*.heapprofile",
Expand Down
2 changes: 1 addition & 1 deletion integration-tests/src/sh.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ export function execAsync(command: string, options: ExecOptions = {}): Promise<S
return new Promise<Shell.ExecOutputReturnValue>((resolve) => {
Shell.exec(
command /* lgtm[js/shell-command-injection-from-environment] */,
{ silent: !echo, fatal: bail },
{ silent: !echo, fatal: bail, env: { ...process.env } },
(code, stdout, stderr) => resolve({ code, stdout, stderr }),
);
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ let dictionaryCounter = 0;

const DefaultAutoCacheSize = 1000;

let logRequests = false;
const log: LogEntry[] = [];

/**
* Caching Dictionary remembers method calls to increase performance.
*/
Expand All @@ -30,6 +33,22 @@ export interface CachingDictionary {
getPreferredSuggestions(word: string): PreferredSuggestion[] | undefined;
}

interface LogEntryBase extends SearchOptions {
time: number;
method: 'has';
word: string;
value?: unknown;
}

interface LogEntryHas extends LogEntryBase {
method: 'has';
value: boolean;
}

const startTime = performance.now();

export type LogEntry = LogEntryHas;

class CachedDict implements CachingDictionary {
readonly name: string;
readonly id = ++dictionaryCounter;
Expand All @@ -41,7 +60,16 @@ class CachedDict implements CachingDictionary {
// console.log(`CachedDict for ${this.name}`);
}

readonly has = autoCache((word: string) => this.dict.has(word, this.options), DefaultAutoCacheSize);
#has = autoCache((word: string) => this.dict.has(word, this.options), DefaultAutoCacheSize);
has = logRequests
? (word: string): boolean => {
const time = performance.now() - startTime;
const value = this.#has(word);
log.push({ time, method: 'has', word, value });
return value;
}
: this.#has;

readonly isNoSuggestWord = autoCache(
(word: string) => this.dict.isNoSuggestWord(word, this.options),
DefaultAutoCacheSize,
Expand All @@ -56,7 +84,7 @@ class CachedDict implements CachingDictionary {
return {
name: this.name,
id: this.id,
has: extractStats(this.has),
has: extractStats(this.#has),
isNoSuggestWord: extractStats(this.isNoSuggestWord),
isForbidden: extractStats(this.isForbidden),
getPreferredSuggestions: extractStats(this.getPreferredSuggestions),
Expand Down Expand Up @@ -90,3 +118,11 @@ export function createCachingDictionary(
knownOptions.set(dict, cached);
return cached;
}

export function enableLogging(enabled = !logRequests): void {
logRequests = enabled;
}

export function getLog(): LogEntryBase[] {
return log;
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

exports[`index > verify api 1`] = `
[
"_debug",
"createCachingDictionary",
"createCollection",
"createFailedToLoadDictionary",
Expand Down
12 changes: 12 additions & 0 deletions packages/cspell-dictionary/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import {
enableLogging as cacheDictionaryEnableLogging,
getLog as cacheDictionaryGetLog,
} from './SpellingDictionary/CachingDictionary.js';
export type {
CachingDictionary,
FindOptions,
Expand All @@ -24,3 +28,11 @@ export {
createSuggestDictionary,
createSuggestOptions,
} from './SpellingDictionary/index.js';

/**
* Debugging utilities.
*/
export const _debug = {
cacheDictionaryEnableLogging,
cacheDictionaryGetLog,
};
157 changes: 154 additions & 3 deletions packages/cspell-dictionary/src/perf/has.perf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,34 @@ import { buildITrieFromWords } from 'cspell-trie-lib';
import { loremIpsum } from 'lorem-ipsum';
import { suite } from 'perf-insight';

import { createCachingDictionary } from '../SpellingDictionary/CachingDictionary.js';
import { createSpellingDictionary } from '../SpellingDictionary/createSpellingDictionary.js';
import { createCollection } from '../SpellingDictionary/SpellingDictionaryCollection.js';

suite('dictionary has', async (test) => {
const words = genWords(10_000);
const words1 = genWords(10_000);
const words2 = genWords(1000);
const words3 = genWords(1000);

const iTrie = buildITrieFromWords(words);
const dict = createSpellingDictionary(words, 'test', import.meta.url);
const words = words1;

const iTrie = buildITrieFromWords(words1);
const dict = createSpellingDictionary(words1, 'test', import.meta.url);
const dict2 = createSpellingDictionary(words2, 'test2', import.meta.url);
const dict3 = createSpellingDictionary(words3, 'test3', import.meta.url);

const dictCol = createCollection([dict, dict2, dict3], 'test-collection');
const dictColRev = createCollection([dict3, dict2, dict], 'test-collection-reverse');

const cacheDictSingle = createCachingDictionary(dict, {});
const cacheDictCol = createCachingDictionary(dictCol, {});

const dictSet = new Set(words);

test('Set has 100k words', () => {
checkWords(dictSet, words);
});

test('dictionary has 100k words', () => {
checkWords(dict, words);
});
Expand All @@ -32,6 +44,14 @@ suite('dictionary has', async (test) => {
checkWords(dictColRev, words);
});

test('cache dictionary has 100k words', () => {
checkWords(cacheDictSingle, words);
});

test('cache collection has 100k words', () => {
checkWords(cacheDictCol, words);
});

test('iTrie has 100k words', () => {
checkWords(iTrie, words);
});
Expand All @@ -58,6 +78,12 @@ suite('dictionary has Not', async (test) => {
const dict3 = createSpellingDictionary(words3, 'test3', import.meta.url);
const dictCol = createCollection([dict, dict2, dict3], 'test-collection');

const dictSet = new Set(words);

test('Set has not 100k words', () => {
checkWords(dictSet, missingWords, false);
});

test('dictionary has not 100k words', () => {
checkWords(dict, missingWords, false);
});
Expand All @@ -80,6 +106,104 @@ suite('dictionary has Not', async (test) => {
});
});

suite('dictionary has sampling', async (test) => {
const words1 = genWords(10_000);
const words2 = genWords(1000);
const words3 = genWords(1000);

const sampleIdx = genSamples(100_000, words1.length);
const wordsSample = sampleIdx.map((i) => words1[i]);

const iTrie = buildITrieFromWords(words1);
const dict = createSpellingDictionary(words1, 'test', import.meta.url);
const dict2 = createSpellingDictionary(words2, 'test2', import.meta.url);
const dict3 = createSpellingDictionary(words3, 'test3', import.meta.url);

const dictCol = createCollection([dict, dict2, dict3], 'test-collection');
const dictColRev = createCollection([dict3, dict2, dict], 'test-collection-reverse');

const cacheDictSingle = createCachingDictionary(dict, {});
const cacheDictCol = createCachingDictionary(dictCol, {});

const dictSet = new Set(words1);

test('Set has 100k words', () => {
checkWords(dictSet, wordsSample);
});

test('dictionary has 100k words', () => {
checkWords(dict, wordsSample);
});

test('collection has 100k words', () => {
checkWords(dictCol, wordsSample);
});

test('collection reverse has 100k words', () => {
checkWords(dictColRev, wordsSample);
});

test('cache dictionary has 100k words', () => {
checkWords(cacheDictSingle, wordsSample);
});

test('cache collection has 100k words', () => {
checkWords(cacheDictCol, wordsSample);
});

test('iTrie has 100k words', () => {
checkWords(iTrie, wordsSample);
});

test('iTrie.hasWord has 100k words', () => {
const dict = { has: (word: string) => iTrie.hasWord(word, true) };
checkWords(dict, wordsSample);
});

test('iTrie.data has 100k words', () => {
checkWords(iTrie.data, wordsSample);
});
});

suite('dictionary isForbidden sampling', async (test) => {
const words1 = genWords(10_000);
const words2 = genWords(1000);
const words3 = genWords(1000);

const sampleIdx = genSamples(100_000, words1.length);
const wordsSample = sampleIdx.map((i) => words1[i]);

const dict = createSpellingDictionary(words1, 'test', import.meta.url);
const dict2 = createSpellingDictionary(words2, 'test2', import.meta.url);
const dict3 = createSpellingDictionary(words3, 'test3', import.meta.url);

const dictCol = createCollection([dict, dict2, dict3], 'test-collection');
const dictColRev = createCollection([dict3, dict2, dict], 'test-collection-reverse');

const cacheDictSingle = createCachingDictionary(dict, {});
const cacheDictCol = createCachingDictionary(dictCol, {});

test('dictionary isForbidden 100k words', () => {
checkForForbiddenWords(dict, wordsSample);
});

test('collection isForbidden 100k words', () => {
checkForForbiddenWords(dictCol, wordsSample);
});

test('collection reverse isForbidden 100k words', () => {
checkForForbiddenWords(dictColRev, wordsSample);
});

test('cache dictionary isForbidden 100k words', () => {
checkForForbiddenWords(cacheDictSingle, wordsSample);
});

test('cache collection isForbidden 100k words', () => {
checkForForbiddenWords(cacheDictCol, wordsSample);
});
});

function checkWords(dict: { has: (word: string) => boolean }, words: string[], expected = true, totalChecks = 100_000) {
let has = true;
const len = words.length;
Expand All @@ -94,6 +218,21 @@ function checkWords(dict: { has: (word: string) => boolean }, words: string[], e
assert(has, 'All words should be found in the dictionary');
}

function checkForForbiddenWords(
dict: { isForbidden: (word: string) => boolean },
words: string[],
totalChecks = 100_000,
) {
let result = true;
const len = words.length;
for (let i = 0; i < totalChecks; ++i) {
const word = words[i % len];
const r = !dict.isForbidden(word);
result = r && result;
}
assert(result, 'All words should not be forbidden');
}

function genWords(count: number, includeForbidden = true): string[] {
const setOfWords = new Set(loremIpsum({ count }).split(' '));

Expand Down Expand Up @@ -122,3 +261,15 @@ function genWords(count: number, includeForbidden = true): string[] {

return [...setOfWords];
}

function genSamples(count: number, max: number, depth = 3) {
const r = Array<number>(count);
for (let j = 0; j < count; ++j) {
let n = Math.random() * max;
for (let i = 1; i < depth; ++i) {
n = Math.random() * n;
}
r[j] = Math.floor(n);
}
return r;
}
12 changes: 8 additions & 4 deletions packages/cspell-pipe/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -99,18 +99,21 @@
"!**/*.tsbuildInfo",
"!**/__mocks__",
"!**/*.spec.*",
"!**/*.perf.*",
"!**/*.test.*",
"!**/perf/**",
"!**/test/**",
"!**/*.map"
],
"scripts": {
"build": "tsc -b . -f",
"watch": "tsc -b . -w -f",
"build": "tsc -p .",
"watch": "tsc -p . -w",
"clean": "shx rm -rf dist temp coverage \"*.tsbuildInfo\"",
"clean-build": "pnpm run clean && pnpm run build",
"coverage": "vitest run --coverage",
"test-watch": "vitest",
"test": "vitest run"
"test": "vitest run",
"test:perf": "NODE_ENV=production insight --register ts-node/esm --file \"**/*.perf.{mts,ts}\""
},
"repository": {
"type": "git",
Expand All @@ -124,6 +127,7 @@
"node": ">=18"
},
"devDependencies": {
"globby": "^14.0.2"
"globby": "^14.0.2",
"perf-insight": "^1.2.0"
}
}
8 changes: 4 additions & 4 deletions packages/cspell-pipe/src/operators/append.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ import type { PipeFn } from '../internalTypes.js';
export function opAppendAsync<T>(
...iterablesToAppend: (AsyncIterable<T> | Iterable<T>)[]
): (iter: AsyncIterable<T> | Iterable<T>) => AsyncIterable<T> {
async function* fn(iter: AsyncIterable<T> | Iterable<T>) {
async function* fnAppend(iter: AsyncIterable<T> | Iterable<T>) {
yield* iter;
for (const i of iterablesToAppend) {
yield* i;
}
}

return fn;
return fnAppend;
}

/**
Expand All @@ -25,14 +25,14 @@ export function opAppendAsync<T>(
* @returns
*/
export function opAppendSync<T>(...iterablesToAppend: Iterable<T>[]): (iter: Iterable<T>) => Iterable<T> {
function* fn(iter: Iterable<T>) {
function* fnAppend(iter: Iterable<T>) {
yield* iter;
for (const i of iterablesToAppend) {
yield* i;
}
}

return fn;
return fnAppend;
}

export function opAppend<T>(...iterablesToAppend: Iterable<T>[]): PipeFn<T, T> {
Expand Down
Loading

0 comments on commit 6327e44

Please sign in to comment.