Skip to content

Commit d218336

Browse files
refactor: simplify char class encoding (#101)
1 parent 16d9164 commit d218336

9 files changed

+192
-60
lines changed

src/__tests__/example-email.ts

+3-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import {
1212

1313
test('example: email validation', () => {
1414
const usernameChars = charClass(charRange('a', 'z'), digit, anyOf('._%+-'));
15-
const hostnameChars = charClass(charRange('a', 'z'), digit, anyOf('-.'));
15+
const hostnameChars = charClass(charRange('a', 'z'), digit, anyOf('.-'));
1616
const domainChars = charRange('a', 'z');
1717

1818
const regex = buildRegExp(
@@ -38,5 +38,6 @@ test('example: email validation', () => {
3838
expect(regex).not.toMatchString('a@gmail.c');
3939
expect(regex).not.toMatchString('@gmail.com');
4040

41-
expect(regex).toEqualRegex(/^[a-z\d._%+-]+@[a-z\d.-]+\.[a-z]{2,}$/i);
41+
// eslint-disable-next-line no-useless-escape
42+
expect(regex).toEqualRegex(/^[a-z\d._%+\-]+@[a-z\d.\-]+\.[a-z]{2,}$/i);
4243
});

src/__tests__/example-hex-color.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,5 +42,5 @@ test('example: hex color validation', () => {
4242
expect(regex).not.toMatchString('#12345');
4343
expect(regex).not.toMatchString('#1234567');
4444

45-
expect(regex).toEqualRegex(/^#?(?:[a-f\d]{6}|[a-f\d]{3})$/i);
45+
expect(regex).toEqualRegex(/^#?(?:[\da-f]{6}|[\da-f]{3})$/i);
4646
});

src/__tests__/example-js-number.ts

+2-1
Original file line numberDiff line numberDiff line change
@@ -47,5 +47,6 @@ test('example: validate JavaScript number', () => {
4747
expect(numberValidator).not.toMatchString('.1.1');
4848
expect(numberValidator).not.toMatchString('.');
4949

50-
expect(numberValidator).toEqualRegex(/^[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?$/);
50+
// eslint-disable-next-line no-useless-escape
51+
expect(numberValidator).toEqualRegex(/^[+\-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+\-]?\d+)?$/);
5152
});

src/__tests__/example-url-simple.ts

+4-3
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@ import {
1616
test('example: simple url validation', () => {
1717
const protocol = [choiceOf('http', 'https'), '://'];
1818
const domainChars = charClass(charRange('a', 'z'), digit);
19-
const domainCharsHypen = charClass(domainChars, anyOf('-'));
19+
const domainCharsHyphen = charClass(domainChars, anyOf('-'));
2020

2121
const domainSegment = choiceOf(
2222
domainChars, // single char
23-
[domainChars, zeroOrMore(domainCharsHypen), domainChars], // multi char
23+
[domainChars, zeroOrMore(domainCharsHyphen), domainChars], // multi char
2424
);
2525

2626
const regex = buildRegExp([
@@ -45,6 +45,7 @@ test('example: simple url validation', () => {
4545
expect(regex).not.toMatchString('@gmail.com');
4646

4747
expect(regex).toEqualRegex(
48-
/^(?:(?:http|https):\/\/)?(?:(?:[a-z\d]|[a-z\d][a-z\d-]*[a-z\d])\.)+[a-z][a-z\d]+$/,
48+
// eslint-disable-next-line no-useless-escape
49+
/^(?:(?:http|https):\/\/)?(?:(?:[a-z\d]|[a-z\d][a-z\d\-]*[a-z\d])\.)+[a-z][a-z\d]+$/,
4950
);
5051
});

src/constructs/__tests__/char-class.test.ts

+150-13
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
/* eslint-disable no-useless-escape */
12
import {
23
anyOf,
4+
buildRegExp,
35
charClass,
46
charRange,
57
digit,
@@ -9,11 +11,16 @@ import {
911
nonWord,
1012
oneOrMore,
1113
optional,
14+
type RegexSequence,
1215
whitespace,
1316
word,
1417
zeroOrMore,
1518
} from '../..';
1619

20+
function u(sequence: RegexSequence) {
21+
return buildRegExp(sequence, { unicode: true });
22+
}
23+
1724
test('`charClass` base cases', () => {
1825
expect(charClass(charRange('a', 'z'))).toEqualRegex(/[a-z]/);
1926
expect(charClass(charRange('a', 'z'), charRange('A', 'Z'))).toEqualRegex(/[a-zA-Z]/);
@@ -66,51 +73,181 @@ test('`charRange` throws on incorrect arguments', () => {
6673
);
6774
});
6875

69-
test('`anyOf` pattern', () => {
76+
test('`anyOf` handles basic cases pattern', () => {
77+
expect(anyOf('a')).toMatchString('a');
7078
expect(anyOf('a')).toEqualRegex(/[a]/);
79+
80+
expect(['x', anyOf('a'), 'x']).toMatchString('xax');
7181
expect(['x', anyOf('a'), 'x']).toEqualRegex(/x[a]x/);
82+
83+
expect(anyOf('ab')).toMatchString('a');
84+
expect(anyOf('ab')).toMatchString('b');
85+
expect(anyOf('ab')).not.toMatchString('c');
7286
expect(anyOf('ab')).toEqualRegex(/[ab]/);
87+
88+
expect(['x', anyOf('ab')]).toMatchString('xa');
89+
expect(['x', anyOf('ab')]).toMatchString('xb');
90+
expect(['x', anyOf('ab')]).not.toMatchString('x0');
7391
expect(['x', anyOf('ab')]).toEqualRegex(/x[ab]/);
92+
93+
expect(['x', anyOf('ab'), 'x']).toMatchString('xax');
94+
expect(['x', anyOf('ab'), 'x']).toMatchString('xbx');
95+
expect(['x', anyOf('ab'), 'x']).not.toMatchString('x0x');
7496
expect(['x', anyOf('ab'), 'x']).toEqualRegex(/x[ab]x/);
7597
});
7698

99+
test('`anyOf` throws on empty text', () => {
100+
expect(() => anyOf('')).toThrowErrorMatchingInlineSnapshot(`"Expected at least one character"`);
101+
});
102+
77103
test('`anyOf` pattern with quantifiers', () => {
78104
expect(['x', oneOrMore(anyOf('abc')), 'x']).toEqualRegex(/x[abc]+x/);
79105
expect(['x', optional(anyOf('abc')), 'x']).toEqualRegex(/x[abc]?x/);
80106
expect(['x', zeroOrMore(anyOf('abc')), 'x']).toEqualRegex(/x[abc]*x/);
81107
});
82108

83-
test('`anyOf` pattern escapes special characters', () => {
84-
expect(anyOf('abc-+.]\\')).toEqualRegex(/[abc+.\]\\-]/);
85-
});
109+
test('`anyOf` handles hyphens', () => {
110+
expect(anyOf('^-')).toMatchString('^');
111+
expect(anyOf('^-')).toMatchString('-');
112+
expect(anyOf('^-')).not.toMatchString('a');
113+
expect(anyOf('^-')).toEqualRegex(/[\^\-]/);
114+
115+
expect(anyOf('-^')).toMatchString('^');
116+
expect(anyOf('-^')).toMatchString('-');
117+
expect(anyOf('-^')).not.toMatchString('a');
118+
expect(anyOf('-^')).toEqualRegex(/[\-\^]/);
86119

87-
test('`anyOf` pattern moves hyphen to the last position', () => {
88-
expect(anyOf('a-bc')).toEqualRegex(/[abc-]/);
120+
expect(anyOf('-^a')).toMatchString('^');
121+
expect(anyOf('-^a')).toMatchString('-');
122+
expect(anyOf('-^a')).toMatchString('a');
123+
expect(anyOf('-^a')).not.toMatchString('b');
124+
expect(anyOf('-^a')).toEqualRegex(/[\-\^a]/);
89125
});
90126

91-
test('`anyOf` pattern edge cases', () => {
92-
expect(anyOf('^-')).toEqualRegex(/[\^-]/);
93-
expect(anyOf('-^')).toEqualRegex(/[\^-]/);
94-
expect(anyOf('-^a')).toEqualRegex(/[a^-]/);
127+
test('`anyOf` handles hyphens in unicode mode', () => {
128+
expect(u(anyOf('^-'))).toMatchString('^');
129+
expect(u(anyOf('^-'))).toMatchString('^');
130+
expect(u(anyOf('^-'))).toMatchString('-');
131+
expect(u(anyOf('^-'))).not.toMatchString('a');
132+
expect(u(anyOf('^-'))).toEqualRegex(/[\^\-]/u);
133+
134+
expect(u(anyOf('-^'))).toMatchString('^');
135+
expect(u(anyOf('-^'))).toMatchString('-');
136+
expect(u(anyOf('-^'))).not.toMatchString('a');
137+
expect(u(anyOf('-^'))).toEqualRegex(/[\-\^]/u);
138+
139+
expect(u(anyOf('-^a'))).toMatchString('^');
140+
expect(u(anyOf('-^a'))).toMatchString('-');
141+
expect(u(anyOf('-^a'))).toMatchString('a');
142+
expect(u(anyOf('-^a'))).not.toMatchString('b');
143+
expect(u(anyOf('-^a'))).toEqualRegex(/[\-\^a]/u);
144+
});
95145

146+
test('`anyOf` handles special chars', () => {
147+
expect(anyOf('.')).toMatchString('.');
148+
expect(anyOf('.')).not.toMatchString('a');
96149
expect(anyOf('.')).toEqualRegex(/[.]/);
150+
151+
expect(anyOf('*')).toMatchString('*');
152+
expect(anyOf('*')).not.toMatchString('a');
97153
expect(anyOf('*')).toEqualRegex(/[*]/);
154+
155+
expect(anyOf('+')).toMatchString('+');
156+
expect(anyOf('+')).not.toMatchString('a');
98157
expect(anyOf('+')).toEqualRegex(/[+]/);
158+
159+
expect(anyOf('?')).toMatchString('?');
160+
expect(anyOf('?')).not.toMatchString('a');
99161
expect(anyOf('?')).toEqualRegex(/[?]/);
100-
expect(anyOf('^')).toEqualRegex(/[^]/);
162+
163+
expect(anyOf('^')).toMatchString('^');
164+
expect(anyOf('^')).not.toMatchString('a');
165+
expect(anyOf('^')).toEqualRegex(/[\^]/);
166+
167+
expect(anyOf('^0')).toMatchString('^');
168+
expect(anyOf('^0')).not.toMatchString('a');
169+
expect(anyOf('^0')).toEqualRegex(/[\^0]/);
170+
171+
expect(anyOf('0^')).toMatchString('^');
172+
expect(anyOf('0^')).not.toMatchString('a');
173+
expect(anyOf('0^')).toEqualRegex(/[0\^]/);
174+
175+
expect(anyOf('$')).toMatchString('$');
176+
expect(anyOf('$')).not.toMatchString('a');
101177
expect(anyOf('$')).toEqualRegex(/[$]/);
178+
179+
expect(anyOf('{')).toMatchString('{');
180+
expect(anyOf('{')).not.toMatchString('a');
102181
expect(anyOf('{')).toEqualRegex(/[{]/);
182+
183+
expect(anyOf('}')).toMatchString('}');
184+
expect(anyOf('}')).not.toMatchString('a');
103185
expect(anyOf('}')).toEqualRegex(/[}]/);
186+
187+
expect(anyOf('(')).toMatchString('(');
188+
expect(anyOf('(')).not.toMatchString('a');
104189
expect(anyOf('(')).toEqualRegex(/[(]/);
190+
191+
expect(anyOf(')')).toMatchString(')');
192+
expect(anyOf(')')).not.toMatchString('a');
105193
expect(anyOf(')')).toEqualRegex(/[)]/);
194+
195+
expect(anyOf('|')).toMatchString('|');
196+
expect(anyOf('|')).not.toMatchString('a');
106197
expect(anyOf('|')).toEqualRegex(/[|]/);
198+
199+
expect(anyOf('[')).toMatchString('[');
200+
expect(anyOf('[')).not.toMatchString('a');
107201
expect(anyOf('[')).toEqualRegex(/[[]/);
202+
203+
expect(anyOf(']')).toMatchString(']');
204+
expect(anyOf(']')).not.toMatchString('a');
108205
expect(anyOf(']')).toEqualRegex(/[\]]/);
206+
207+
expect(anyOf('\\')).toMatchString('\\');
208+
expect(anyOf('\\')).not.toMatchString('a');
109209
expect(anyOf('\\')).toEqualRegex(/[\\]/);
110210
});
111211

112-
test('`anyOf` throws on empty text', () => {
113-
expect(() => anyOf('')).toThrowErrorMatchingInlineSnapshot(`"Expected at least one character"`);
212+
test('`anyof` matches special characters', () => {
213+
expect(anyOf('a')).toMatchString('a');
214+
});
215+
216+
test('`anyof` matches special characters in unicode mode', () => {
217+
expect(u(anyOf('a'))).toMatchString('a');
218+
219+
expect(u(anyOf('.'))).toMatchString('.');
220+
expect(u(anyOf('.'))).not.toMatchString('a');
221+
expect(u(anyOf('*'))).toMatchString('*');
222+
expect(u(anyOf('*'))).not.toMatchString('a');
223+
expect(u(anyOf('+'))).toMatchString('+');
224+
expect(u(anyOf('+'))).not.toMatchString('a');
225+
expect(u(anyOf('?'))).toMatchString('?');
226+
expect(u(anyOf('?'))).not.toMatchString('a');
227+
expect(u(anyOf('^'))).toMatchString('^');
228+
expect(u(anyOf('^'))).not.toMatchString('a');
229+
expect(u(anyOf('^0'))).toMatchString('^');
230+
expect(u(anyOf('^0'))).not.toMatchString('a');
231+
expect(u(anyOf('0^'))).toMatchString('^');
232+
expect(u(anyOf('0^'))).not.toMatchString('a');
233+
expect(u(anyOf('$'))).toMatchString('$');
234+
expect(u(anyOf('$'))).not.toMatchString('a');
235+
expect(u(anyOf('{'))).toMatchString('{');
236+
expect(u(anyOf('{'))).not.toMatchString('a');
237+
expect(u(anyOf('}'))).toMatchString('}');
238+
expect(u(anyOf('}'))).not.toMatchString('a');
239+
expect(u(anyOf('('))).toMatchString('(');
240+
expect(u(anyOf('('))).not.toMatchString('a');
241+
expect(u(anyOf(')'))).toMatchString(')');
242+
expect(u(anyOf(')'))).not.toMatchString('a');
243+
expect(u(anyOf('|'))).toMatchString('|');
244+
expect(u(anyOf('|'))).not.toMatchString('a');
245+
expect(u(anyOf('['))).toMatchString('[');
246+
expect(u(anyOf('['))).not.toMatchString('a');
247+
expect(u(anyOf(']'))).toMatchString(']');
248+
expect(u(anyOf(']'))).not.toMatchString('a');
249+
expect(u(anyOf('\\'))).toMatchString('\\');
250+
expect(u(anyOf('\\'))).not.toMatchString('a');
114251
});
115252

116253
test('`negated` character class pattern', () => {

src/constructs/char-class.ts

+6-19
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@ export function charClass(...elements: Array<CharacterClass | CharacterEscape>):
1313
}
1414

1515
return {
16-
chars: elements.map((c) => c.chars).flat(),
17-
ranges: elements.map((c) => c.ranges ?? []).flat(),
16+
elements: elements.map((c) => c.elements).flat(),
1817
encode: encodeCharClass,
1918
};
2019
}
@@ -36,8 +35,7 @@ export function charRange(start: string, end: string): CharacterClass {
3635
}
3736

3837
return {
39-
chars: [],
40-
ranges: [{ start, end }],
38+
elements: [`${start}-${end}`],
4139
encode: encodeCharClass,
4240
};
4341
}
@@ -52,7 +50,7 @@ export function anyOf(chars: string): CharacterClass {
5250
ensureText(chars);
5351

5452
return {
55-
chars: chars.split('').map(escapeChar),
53+
elements: chars.split('').map(escapeChar),
5654
encode: encodeCharClass,
5755
};
5856
}
@@ -74,27 +72,16 @@ export const inverted = negated;
7472

7573
/** Escape chars for usage inside char class */
7674
function escapeChar(text: string): string {
77-
return text.replace(/[\]\\]/g, '\\$&'); // $& means the whole matched string
75+
// anyOf(']-\\^')
76+
return text.replace(/[\]\-\\^]/g, '\\$&'); // "$&" is whole matched string
7877
}
7978

8079
function encodeCharClass(
8180
this: CharacterClass | CharacterEscape,
8281
isNegated?: boolean,
8382
): EncodedRegex {
84-
// If passed characters includes hyphen (`-`) it need to be moved to
85-
// first (or last) place in order to treat it as hyphen character and not a range.
86-
// See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes#types
87-
const hyphen = this.chars.includes('-') ? '-' : '';
88-
const caret = this.chars.includes('^') ? '^' : '';
89-
const otherChars = this.chars.filter((c) => c !== '-' && c !== '^').join('');
90-
const ranges = this.ranges?.map(({ start, end }) => `${start}-${end}`).join('') ?? '';
91-
const negation = isNegated ? '^' : '';
92-
93-
let pattern = `[${negation}${ranges}${otherChars}${caret}${hyphen}]`;
94-
if (pattern === '[^-]') pattern = '[\\^-]';
95-
9683
return {
9784
precedence: 'atom',
98-
pattern,
85+
pattern: `[${isNegated ? '^' : ''}${this.elements.join('')}]`,
9986
};
10087
}

src/constructs/char-escape.ts

+6-6
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ export const any: EncodedRegex = {
1515
export const digit: CharacterEscape = {
1616
precedence: 'atom',
1717
pattern: '\\d',
18-
chars: ['\\d'],
18+
elements: ['\\d'],
1919
};
2020

2121
/**
@@ -24,7 +24,7 @@ export const digit: CharacterEscape = {
2424
export const nonDigit: CharacterEscape = {
2525
precedence: 'atom',
2626
pattern: '\\D',
27-
chars: ['\\D'],
27+
elements: ['\\D'],
2828
};
2929

3030
/**
@@ -33,7 +33,7 @@ export const nonDigit: CharacterEscape = {
3333
export const word: CharacterEscape = {
3434
precedence: 'atom',
3535
pattern: '\\w',
36-
chars: ['\\w'],
36+
elements: ['\\w'],
3737
};
3838

3939
/**
@@ -42,7 +42,7 @@ export const word: CharacterEscape = {
4242
export const nonWord: CharacterEscape = {
4343
precedence: 'atom',
4444
pattern: '\\W',
45-
chars: ['\\W'],
45+
elements: ['\\W'],
4646
};
4747

4848
/**
@@ -51,7 +51,7 @@ export const nonWord: CharacterEscape = {
5151
export const whitespace: CharacterEscape = {
5252
precedence: 'atom',
5353
pattern: '\\s',
54-
chars: ['\\s'],
54+
elements: ['\\s'],
5555
};
5656

5757
/**
@@ -60,7 +60,7 @@ export const whitespace: CharacterEscape = {
6060
export const nonWhitespace: CharacterEscape = {
6161
precedence: 'atom',
6262
pattern: '\\S',
63-
chars: ['\\S'],
63+
elements: ['\\S'],
6464
};
6565

6666
/**

0 commit comments

Comments
 (0)