Skip to content

Commit

Permalink
fix: Add option to not auto stem during split (#4310)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason3S committed Mar 13, 2023
1 parent fae4975 commit 23059da
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 33 deletions.
34 changes: 17 additions & 17 deletions packages/cspell-tools/src/compiler/legacyLineToWords.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,23 @@ describe('Validate legacyLineToWords', () => {
});

test.each`
line | expectedResult
${'hello'} | ${['hello']}
${'AppendIterator::getArrayIterator'} | ${['append', 'iterator', 'get', 'array']}
${'Austin Martin'} | ${['austin', 'martin']}
${'JPEGsBLOBs'} | ${['jpegs', 'blobs']}
${'CURLs CURLing' /* Sadly we cannot do this one correctly */} | ${['curls', 'curling']}
${'DNSTable Lookup'} | ${['dns', 'table', 'lookup']}
${'OUTRing'} | ${['outring']}
${'OUTRings'} | ${['outrings']}
${'DIRs'} | ${['dirs']}
${'AVGAspect'} | ${['avg', 'aspect']}
${'New York'} | ${['new', 'york']}
${'Namespace DNSLookup'} | ${['namespace', 'dns', 'lookup']}
${'well-educated'} | ${['well', 'educated']}
${'CURLcode'} | ${['cur', 'lcode']}
${'kDNSServiceErr_BadSig'} | ${['k', 'dns', 'service', 'err', 'bad', 'sig']}
${'apd_get_active_symbols'} | ${['apd', 'get', 'active', 'symbols']}
line | expectedResult
${'hello'} | ${['hello']}
${'AppendIterator::getArrayIterator'} | ${['append', 'iterator', 'get', 'array']}
${'Austin Martin'} | ${['austin', 'martin']}
${'JPEGSBlobs'} | ${['jpegs', 'blobs']}
${'CURLS Curling'} | ${['curls', 'curling']}
${'DNSTable Lookup'} | ${['dns', 'table', 'lookup']}
${'OUTRing'} | ${['out', 'ring']}
${'OUTRings'} | ${['out', 'rings']}
${'DIRs'} | ${['di', 'rs']}
${'AVGAspect'} | ${['avg', 'aspect']}
${'New York'} | ${['new', 'york']}
${'Namespace DNSLookup'} | ${['namespace', 'dns', 'lookup']}
${'well-educated'} | ${['well', 'educated']}
${'CURLcode'} | ${['cur', 'lcode']}
${'kDNSServiceErr_BadSig'} | ${['k', 'dns', 'service', 'err', 'bad', 'sig']}
${'apd_get_active_symbols'} | ${['apd', 'get', 'active', 'symbols']}
`('legacy splitting lines $line', ({ line, expectedResult }: { line: string; expectedResult: string[] }) => {
expect([...pipe(legacyLineToWords(line, false, allowed), opFilter(distinct()))]).toEqual(expectedResult);
});
Expand Down
24 changes: 13 additions & 11 deletions packages/cspell-tools/src/compiler/splitCamelCaseIfAllowed.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,19 @@ import { splitCamelCaseIfAllowed } from './splitCamelCaseIfAllowed';

describe('splitCamelCaseIfAllowed', () => {
test.each`
text | keepCase | allowed | expected
${''} | ${false} | ${undefined} | ${[]}
${'hello'} | ${false} | ${undefined} | ${['hello']}
${'helloThere'} | ${false} | ${['hello', 'there']} | ${['hello', 'there']}
${'helloThere'} | ${false} | ${['hello', 'There']} | ${['hello', 'There']}
${'helloThere'} | ${true} | ${['hello', 'There']} | ${['hello', 'There']}
${'ERRORCode'} | ${false} | ${['error', 'code']} | ${['error', 'code']}
${'ERRORCode'} | ${true} | ${['error', 'code']} | ${['ERROR', 'code']}
${'ERRORCode'} | ${true} | ${['code']} | ${['ERRORCode']}
${'ERRORCode'} | ${false} | ${['code']} | ${['ERRORCode']}
${'ErrorCode'} | ${true} | ${['error', 'code']} | ${['error', 'code']}
text | keepCase | allowed | expected
${''} | ${false} | ${undefined} | ${[]}
${'hello'} | ${false} | ${undefined} | ${['hello']}
${'helloThere'} | ${false} | ${['hello', 'there']} | ${['hello', 'there']}
${'helloThere'} | ${false} | ${['hello', 'There']} | ${['hello', 'There']}
${'helloThere'} | ${true} | ${['hello', 'There']} | ${['hello', 'There']}
${'ERRORCode'} | ${false} | ${['error', 'code']} | ${['error', 'code']}
${'ERRORCode'} | ${true} | ${['error', 'code']} | ${['ERROR', 'code']}
${'ERRORCode'} | ${true} | ${['code']} | ${['ERRORCode']}
${'ERRORCode'} | ${false} | ${['code']} | ${['ERRORCode']}
${'ErrorCode'} | ${true} | ${['error', 'code']} | ${['error', 'code']}
${'xmlUCSIsCatZ'} | ${true} | ${['xml', 'UCS', 'is', 'cat', 'z']} | ${['xml', 'UCS', 'is', 'cat', 'z']}
${'ADP_ConnectionStateMsg_Closed'} | ${true} | ${undefined} | ${['ADP', 'connection', 'state', 'msg', 'closed']}
`('splitCamelCaseIfAllowed $text $keepCase $allowed', ({ text, keepCase, allowed, expected }) => {
allowed = createAllowedSplitWords(allowed);
expect(splitCamelCaseIfAllowed(text, allowed, keepCase)).toEqual(expected);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ function isUnknown(word: string, allowedWords: AllowedSplitWordsCollection): boo
}

function splitCamelCase(word: string): Iterable<string> {
const splitWords = Text.splitCamelCaseWord(word).filter((word) => !regExpIsNumber.test(word));
const splitWords = Text.splitCamelCaseWord(word, false).filter((word) => !regExpIsNumber.test(word));
// We only want to preserve this: "New York" and not "Namespace DNSLookup"
if (splitWords.length > 1 && regExpSpaceOrDash.test(word)) {
return splitWords.flatMap((w) => w.split(regExpSpaceOrDash));
Expand Down
4 changes: 2 additions & 2 deletions packages/cspell-tools/src/compiler/text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ const regExSplitWords2 = /(\p{Lu})(\p{Lu}\p{Ll})/gu;
/**
* Split camelCase words into an array of strings.
*/
export function splitCamelCaseWord(word: string): string[] {
const wPrime = word.replace(regExUpperSOrIng, (s) => s[0] + s.slice(1).toLowerCase());
export function splitCamelCaseWord(word: string, autoStem = true): string[] {
const wPrime = autoStem ? word.replace(regExUpperSOrIng, (s) => s[0] + s.slice(1).toLowerCase()) : word;
const pass1 = wPrime.replace(regExSplitWords, '$1|$2');
const pass2 = pass1.replace(regExSplitWords2, '$1|$2');
const pass3 = pass2.replace(/[\d_]+/g, '|');
Expand Down
14 changes: 12 additions & 2 deletions packages/cspell-tools/src/compiler/wordListCompiler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,23 @@ export async function compileWordList(
destFilename: string,
options: CompileOptions
): Promise<void> {
const filter = normalizeTargetWords(options);
const finalLines = normalize(lines, options);

const finalSeq = pipe(wordListHeaderLines, opAppend(pipe(lines, filter)));
const finalSeq = pipe(wordListHeaderLines, opAppend(finalLines));

return createWordListTarget(destFilename)(finalSeq);
}

function normalize(lines: Iterable<string>, options: CompileOptions): Iterable<string> {
const filter = normalizeTargetWords(options);

const iter = pipe(lines, filter);
if (!options.sort) return iter;

const result = new Set(iter);
return [...result].sort();
}

function createWordListTarget(destFilename: string): (seq: Iterable<string>) => Promise<void> {
const target = createTarget(destFilename);
return (seq: Iterable<string>) =>
Expand Down

0 comments on commit 23059da

Please sign in to comment.