Skip to content

Commit

Permalink
feat: add full width characters (#36)
Browse files Browse the repository at this point in the history
* perf: improve regex perf. by fixing catastrophic backtracking

* test: polish tests for kana/kanji fns

* feat: add full width characters

* docs(readme.md): add link to the website containing demos
  • Loading branch information
arjunvegda authored Feb 20, 2022
1 parent bfd1b2a commit 13c162b
Show file tree
Hide file tree
Showing 15 changed files with 218 additions and 39 deletions.
22 changes: 19 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
<p align="center">
<img alt="Japanese Moji Logo" src="./docs/logo-light.svg">
<a href="https://japanese-moji.vercel.app">
<img alt="Japanese Moji Logo" src="./docs/logo-light.svg">
</a>
</p>
<p align="center">
<a href="https://www.npmjs.com/package/japanese-moji">
Expand All @@ -21,7 +23,7 @@

The word "Moji" translates to "Character" in English

## 🚀 Features
## Features

- Very small footprint with zero dependencies
- Supports strict and threshold-based validation for Kanji, Kana, and all the Japanese characters
Expand All @@ -31,6 +33,11 @@ The word "Moji" translates to "Character" in English
- Supports custom single Unicode and Unicode ranges out-of-the-box
- 100% TypeScript friendly
- ESM, CJS, and UMD builds
- Only pay for what you use this library is fully tree-shakable

## 🚀 Demo

Live demos are on this [website](https://japanese-moji.vercel.app)

## 📦 Install

Expand Down Expand Up @@ -85,7 +92,8 @@ const howMuchKanaIsPresentResult = howMuchKanaIsPresent('some string here');
### Japanese

Validates CJK punctuations, Hiragana, Katakana, Katakana phonetic extensions, Rare Kanji, Common and
Uncommon Kanji, Kanji compatibility ideographs, and Half-width Katakana forms
Uncommon Kanji, Kanji compatibility ideographs, Half-width Katakana forms, and Full-width (roman)
forms including punctuations

```ts
import { isValidJapanese, isJapanesePresent, howMuchJapaneseIsPresent } from 'japanese-moji';
Expand All @@ -112,6 +120,10 @@ enum CharacterSet {
KanjiCompatibilityIdeographs = 'KanjiCompatibilityIdeographs',
CommonUncommonKanji = 'CommonUncommonKanji',
HalfWidthKatakana = 'HalfWidthKatakana',
FullWidthUpperCase = 'FullWidthUpperCase',
FullWidthLowerCase = 'FullWidthLowerCase',
FullWidthNumbers = 'FullWidthNumbers',
FullWidthPunctuations = 'FullWidthPunctuations',
}

interface UnicodeRange {
Expand Down Expand Up @@ -172,6 +184,10 @@ const options: CreateValidatorOptions = {
CharacterSet.CommonUncommonKanji,
CharacterSet.KanjiCompatibilityIdeographs,
CharacterSet.HalfWidthKatakana,
CharacterSet.FullWidthUpperCase,
CharacterSet.FullWidthLowerCase,
CharacterSet.FullWidthNumbers,
CharacterSet.FullWidthPunctuations,
],
customRanges, // Optional
customUnicodes, // Optional
Expand Down
42 changes: 42 additions & 0 deletions __tests__/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,45 @@ export const MockHalfWidthKatakana: UnicodeRange = {
start: '\uff66',
end: '\uff9f',
};

export const MockFullWidthUpperCaseRange: UnicodeRange = {
start: '\uff21',
end: '\uff3a',
};

export const MockFullWidthLowerCaseRange: UnicodeRange = {
start: '\uff41',
end: '\uff5a',
};

export const MockFullWidthNumbersRange: UnicodeRange = {
start: '\uff10',
end: '\uff19',
};

export const MockFullWidthPunctuations: UnicodeRange = {
start: '\uff00',
end: '\uff0f',
};

const MockFullWidthPunctuationsExtensionA: UnicodeRange = {
start: '\uff1a',
end: '\uff20',
};

const MockFullWidthPunctuationsExtensionB: UnicodeRange = {
start: '\uff3b',
end: '\uff40',
};

const MockFullWidthPunctuationsExtensionC: UnicodeRange = {
start: '\uff5b',
end: '\uff65',
};

export const MockFullWidthPunctuationsRange: UnicodeRange[] = [
MockFullWidthPunctuations,
MockFullWidthPunctuationsExtensionA,
MockFullWidthPunctuationsExtensionB,
MockFullWidthPunctuationsExtensionC,
];
27 changes: 23 additions & 4 deletions __tests__/utils.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,35 @@
import type { UnicodeRange } from '../src';
import { makeString } from '../src/utils';

export const generateCharactersFromRange = (start: string, end: string): string => {
// Useful when string is huge and we want to test both ends of the string
// For example, test abc...xyz instead of the full abcdef... string, Since we only care about the start/end
const truncateMiddle = (str: string, width: number) => {
if (str.length <= width) {
return str;
}

const start = Math.ceil(width / 2);
const end = Math.floor(width / 2);

return str.substring(0, start) + str.substring(str.length - end);
};

export const generateCharactersFromRange = (start: string, end: string, width?: number): string => {
const startCode = start.charCodeAt(0);
const endCode = end.charCodeAt(0);
const length = endCode - startCode + 1;
return Array.from({ length }, (_, i) => String.fromCharCode(startCode + i)).join('');
const fullStr = Array.from({ length }, (_, i) => String.fromCharCode(startCode + i)).join('');

if (typeof width === 'number') {
return truncateMiddle(fullStr, width);
}

return fullStr;
};

export const generateCharactersFromRanges = (ranges: UnicodeRange[]): string => {
export const generateCharactersFromRanges = (ranges: UnicodeRange[], width?: number): string => {
return ranges.reduce((acc, range) => {
return acc + generateCharactersFromRange(range.start, range.end);
return acc + generateCharactersFromRange(range.start, range.end, width);
}, '');
};

Expand Down
29 changes: 19 additions & 10 deletions __tests__/validators/japanese.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ import {
MockKatakanaRange,
MockRareKanjiRange,
MockHalfWidthKatakana,
MockFullWidthUpperCaseRange,
MockFullWidthLowerCaseRange,
MockFullWidthNumbersRange,
MockFullWidthPunctuationsRange,
} from '../constants';

describe('validator - Japanese', () => {
Expand All @@ -21,24 +25,28 @@ describe('validator - Japanese', () => {
MockHiraganaRange,
MockCJKPunctuationsRange,
MockHalfWidthKatakana,
MockFullWidthUpperCaseRange,
MockFullWidthLowerCaseRange,
MockFullWidthNumbersRange,
...MockFullWidthPunctuationsRange,
]);

const invalidString = getRandomString(500);

describe('isValidJapanese', () => {
test('should return true when only Kanji characters are supplied', () => {
test('should return true when only Japanese characters are supplied', () => {
const result = isValidJapanese(fullJapaneseString);
expect(result).toBe(true);
});

test('should return false when only Kanji and non Kanji characters are supplied', () => {
test('should return false when only Japanese and non Japanese characters are supplied', () => {
const result = isValidJapanese(fullJapaneseString + invalidString);
expect(result).toBe(false);
});
});

describe('isValidJapanesePresent', () => {
test('should return true when only Kanji characters are supplied', () => {
test('should return true when only Japanese characters are supplied', () => {
const result = isJapanesePresent(fullJapaneseString);
expect(result).toBe(true);
});
Expand All @@ -54,19 +62,20 @@ describe('validator - Japanese', () => {
});
});

describe('howMuchKanjiPresent', () => {
test('should return 100 when only Kanji characters are supplied', () => {
describe('howMuchJapanesePresent', () => {
test('should return 100 when only Japanese characters are supplied', () => {
const result = howMuchJapaneseIsPresent(fullJapaneseString);
expect(result).toBe(100);
});

test('should return 98 when only Kanji characters are supplied', () => {
const result = +howMuchJapaneseIsPresent(invalidString + fullJapaneseString);
expect(toFixedNumber(result)).toBe(98);
test('should return 98 when only Japanese characters are supplied', () => {
const result = howMuchJapaneseIsPresent(invalidString + fullJapaneseString);
// Precision is important here since the length of the string is huge
expect(toFixedNumber(result, 3)).toBe(98.276);
});

test('should return 0 when no Kanji characters are supplied', () => {
const result = +howMuchJapaneseIsPresent(invalidString);
test('should return 0 when no Japanese characters are supplied', () => {
const result = howMuchJapaneseIsPresent(invalidString);
expect(toFixedNumber(result)).toBe(0);
});
});
Expand Down
11 changes: 6 additions & 5 deletions __tests__/validators/kana.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { generateCharactersFromRanges, toFixedNumber } from '../utils';
import { generateCharactersFromRanges, getRandomString, toFixedNumber } from '../utils';
import { howMuchKanaIsPresent, isKanaPresent, isValidKana } from '../../src';
import {
MockHalfWidthKatakana,
Expand All @@ -13,7 +13,7 @@ describe('validator - kana', () => {
MockHalfWidthKatakana,
]);

const invalidString = 'abcdefghijklmnopqrstuvwxyz';
const invalidString = getRandomString(27);

describe('isValidKana', () => {
test('should return true when only kana characters are supplied', () => {
Expand Down Expand Up @@ -51,12 +51,13 @@ describe('validator - kana', () => {
});

test('should return 89 when only kana characters are supplied', () => {
const result = +howMuchKanaIsPresent(fullKanaString + invalidString);
expect(toFixedNumber(result)).toBe(87);
const result = howMuchKanaIsPresent(fullKanaString + invalidString);
// Precision is important here due to the length of the string
expect(toFixedNumber(result, 3)).toBe(86.294);
});

test('should return 0 when no kana characters are supplied', () => {
const result = +howMuchKanaIsPresent(invalidString);
const result = howMuchKanaIsPresent(invalidString);
expect(toFixedNumber(result)).toBe(0);
});
});
Expand Down
3 changes: 2 additions & 1 deletion __tests__/validators/kanji.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ describe('validator - Kanji', () => {

test('should return 96 when only Kanji characters are supplied', () => {
const result = howMuchKanjiIsPresent(invalidString + rareKanjiRange);
expect(toFixedNumber(result)).toBe(96);
// Precision is important here due to the length of the string
expect(toFixedNumber(result, 3)).toBe(95.647);
});

test('should return 0 when no Kanji characters are supplied', () => {
Expand Down
18 changes: 18 additions & 0 deletions src/constants/built-ranges.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,13 @@ import {
KatakanaRange,
RareKanjiRange,
KanjiCompatibilityIdeographsRange,
FullWidthPunctuationsRange,
FullWidthUpperCaseRange,
FullWidthNumbersRange,
FullWidthLowerCaseRange,
} from './raw-ranges';
import { createRange } from '../utils/create-range';
import { createRanges } from '../utils/create-ranges';

export const CJKPunctuations = createRange(CJKPunctuationsRange.start, CJKPunctuationsRange.end);
export const Hiragana = createRange(HiraganaRange.start, HiraganaRange.end);
Expand All @@ -32,3 +37,16 @@ export const HalfWidthKatakana = createRange(
HalfWidthKatakanaRange.start,
HalfWidthKatakanaRange.end,
);

export const FullWidthUpperCase = createRange(
FullWidthUpperCaseRange.start,
FullWidthUpperCaseRange.end,
);

export const FullWidthLowerCase = createRange(
FullWidthLowerCaseRange.start,
FullWidthLowerCaseRange.end,
);

export const FullWidthNumbers = createRange(FullWidthNumbersRange.start, FullWidthNumbersRange.end);
export const FullWidthPunctuations = createRanges(FullWidthPunctuationsRange);
8 changes: 8 additions & 0 deletions src/constants/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ import {
KatakanaPhoneticExtension,
RareKanji,
KanjiCompatibilityIdeographs,
FullWidthPunctuations,
FullWidthLowerCase,
FullWidthUpperCase,
FullWidthNumbers,
} from './built-ranges';

export const characterSetMap: Readonly<CharacterDict> = Object.freeze({
Expand All @@ -19,6 +23,10 @@ export const characterSetMap: Readonly<CharacterDict> = Object.freeze({
[CharacterSet.KanjiCompatibilityIdeographs]: KanjiCompatibilityIdeographs,
[CharacterSet.CommonUncommonKanji]: CommonUncommonKanji,
[CharacterSet.HalfWidthKatakana]: HalfWidthKatakana,
[CharacterSet.FullWidthUpperCase]: FullWidthUpperCase,
[CharacterSet.FullWidthLowerCase]: FullWidthLowerCase,
[CharacterSet.FullWidthNumbers]: FullWidthNumbers,
[CharacterSet.FullWidthPunctuations]: FullWidthPunctuations,
});

export const defaultValidationThreshold = 85;
Expand Down
56 changes: 56 additions & 0 deletions src/constants/raw-ranges.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,59 @@ export const HalfWidthKatakanaRange: UnicodeRange = {
start: '\\uff66',
end: '\\uff9f',
};

// Roman full width forms
// Example - ABCD...Z
export const FullWidthUpperCaseRange: UnicodeRange = {
start: '\\uff21',
end: '\\uff3a',
};

// Roman full width forms
// Example - abcd...z
export const FullWidthLowerCaseRange: UnicodeRange = {
start: '\\uff41',
end: '\\uff5a',
};

// Roman half width forms
// Example - 0123...9
export const FullWidthNumbersRange: UnicodeRange = {
start: '\\uff10',
end: '\\uff19',
};

// Roman full width forms
// List - !"#$%&'()*+,-./
export const FullWidthPunctuations: UnicodeRange = {
start: '\\uff00',
end: '\\uff0f',
};

// Roman full width forms
// List - :;<=>?@
const FullWidthPunctuationsExtensionA: UnicodeRange = {
start: '\\uff1a',
end: '\\uff20',
};

// Roman full width forms
// List -[\]^_`
const FullWidthPunctuationsExtensionB: UnicodeRange = {
start: '\\uff3b',
end: '\\uff40',
};

// Roman full width forms
// List -{|}~⦅⦆。「」、・
const FullWidthPunctuationsExtensionC: UnicodeRange = {
start: '\\uff5b',
end: '\\uff65',
};

export const FullWidthPunctuationsRange: UnicodeRange[] = [
FullWidthPunctuations,
FullWidthPunctuationsExtensionA,
FullWidthPunctuationsExtensionB,
FullWidthPunctuationsExtensionC,
];
6 changes: 5 additions & 1 deletion src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@ export enum CharacterSet {
KanjiCompatibilityIdeographs = 'KanjiCompatibilityIdeographs',
CommonUncommonKanji = 'CommonUncommonKanji',
HalfWidthKatakana = 'HalfWidthKatakana',
FullWidthUpperCase = 'FullWidthUpperCase',
FullWidthLowerCase = 'FullWidthLowerCase',
FullWidthNumbers = 'FullWidthNumbers',
FullWidthPunctuations = 'FullWidthPunctuations',
}

export type CharacterDict = Record<CharacterSet, string>;
export type CharacterDict = Record<CharacterSet, string | string[]>;

/**
* All the values must be escaped to be used in a regex
Expand Down
2 changes: 1 addition & 1 deletion src/utils/create-range.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ export const createRange = (start: string, end: string): string => {
return '';
}

return makeString('[', start, '-', end, ']');
return makeString(start, '-', end);
};
Loading

0 comments on commit 13c162b

Please sign in to comment.