Skip to content

Commit 63385ca

Browse files
refactor: improve tree shaking (#98)
1 parent 5e9b916 commit 63385ca

19 files changed

+253
-232
lines changed

src/__tests__/builder.test.ts

+12-8
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { buildRegExp, char, unicodeProperty } from '..';
1+
import { buildRegExp, unicodeChar, unicodeProperty } from '..';
22

33
test('`regexBuilder` flags', () => {
44
expect(buildRegExp('a').flags).toBe('');
@@ -34,22 +34,26 @@ test('`regexBuilder` flags', () => {
3434
});
3535

3636
test('`regexBuilder` throws when using unicode-aware features without `unicode` flag', () => {
37-
expect(() => buildRegExp(char(0x1234))).not.toThrow();
38-
expect(() => buildRegExp(char(0x12345), { unicode: true })).not.toThrow();
37+
expect(() => buildRegExp(unicodeChar(0x1234))).not.toThrow();
38+
expect(() => buildRegExp(unicodeChar(0x12345), { unicode: true })).not.toThrow();
3939
expect(() => buildRegExp(unicodeProperty('Emoji_Presentation'), { unicode: true })).not.toThrow();
4040

41-
expect(() => buildRegExp(char(0x123456))).toThrowErrorMatchingInlineSnapshot(
41+
expect(() => buildRegExp(unicodeChar(0x123456))).toThrowErrorMatchingInlineSnapshot(
4242
`"Expected a valid unicode code point but received 1193046"`,
4343
);
44-
expect(() => buildRegExp(char(0x12345))).toThrowErrorMatchingInlineSnapshot(
45-
`"The pattern "\\u{12345}" requires Unicode-aware mode. Please ensure the "unicode" flag is set."`,
44+
expect(() => buildRegExp(unicodeChar(0x12345))).toThrowErrorMatchingInlineSnapshot(
45+
`"Pattern "\\u{12345}" requires "unicode" flag to be set."`,
4646
);
4747
expect(() =>
4848
buildRegExp(unicodeProperty('Emoji_Presentation')),
4949
).toThrowErrorMatchingInlineSnapshot(
50-
`"The pattern "\\p{Emoji_Presentation}" requires Unicode-aware mode. Please ensure the "unicode" flag is set."`,
50+
`"Pattern "\\p{Emoji_Presentation}" requires "unicode" flag to be set."`,
5151
);
5252
expect(() => buildRegExp(/\P{Letter}/u)).toThrowErrorMatchingInlineSnapshot(
53-
`"The pattern "\\P{Letter}" requires Unicode-aware mode. Please ensure the "unicode" flag is set."`,
53+
`"Pattern "\\P{Letter}" requires "unicode" flag to be set."`,
5454
);
5555
});
56+
57+
test('`regexBuilder` does not throws on tricky unicode mode-like patterns', () => {
58+
expect(() => buildRegExp(/\\u{1234}/)).not.toThrow();
59+
});

src/builders.ts

+12-13
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,9 @@ import { encode } from './encoder';
1010
*/
1111
export function buildRegExp(sequence: RegexSequence, flags?: RegexFlags): RegExp {
1212
const pattern = encode(sequence).pattern;
13-
const flagsString = encodeFlags(flags ?? {});
14-
15-
if (!flags?.unicode) {
16-
const unicodeModePattern = getUnicodeModePattern(pattern);
17-
if (unicodeModePattern) {
18-
throw new Error(
19-
`The pattern "${unicodeModePattern}" requires Unicode-aware mode. Please ensure the "unicode" flag is set.`,
20-
);
21-
}
22-
}
13+
ensureUnicodeFlagIfNeeded(pattern, flags);
2314

15+
const flagsString = encodeFlags(flags ?? {});
2416
return new RegExp(pattern, flagsString);
2517
}
2618

@@ -47,9 +39,16 @@ function encodeFlags(flags: RegexFlags): string {
4739
return result;
4840
}
4941

50-
const unicodeModePatterns = /(?:\\u|\\p|\\P)\{.+?\}/;
42+
// Matches unicode mode patterns: \u{...}, \p{...}, \P{...}, but avoids valid \\u{...}, etc
43+
const unicodeModePatterns = /(?<!\\)(?:\\u|\\[pP])\{.+?\}/;
44+
45+
function ensureUnicodeFlagIfNeeded(pattern: string, flags: RegexFlags | undefined) {
46+
if (flags?.unicode) {
47+
return;
48+
}
5149

52-
function getUnicodeModePattern(pattern: string): string | null {
5350
const match = pattern.match(unicodeModePatterns);
54-
return match?.[0] ?? null;
51+
if (match) {
52+
throw new Error(`Pattern "${match?.[0]}" requires "unicode" flag to be set.`);
53+
}
5554
}

src/constructs/__tests__/char-class.test.ts

+15-18
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import {
22
anyOf,
3-
buildRegExp,
43
charClass,
54
charRange,
65
digit,
@@ -38,9 +37,7 @@ test('`charClass` joins character escapes', () => {
3837
});
3938

4039
test('`charClass` throws on empty text', () => {
41-
expect(() => charClass()).toThrowErrorMatchingInlineSnapshot(
42-
`"\`charClass\` should receive at least one element"`,
43-
);
40+
expect(() => charClass()).toThrowErrorMatchingInlineSnapshot(`"Expected at least one element"`);
4441
});
4542

4643
test('`charRange` pattern', () => {
@@ -49,15 +46,23 @@ test('`charRange` pattern', () => {
4946
expect([charRange('A', 'F'), 'x']).toEqualRegex(/[A-F]x/);
5047
});
5148

49+
test('`charRange` works both ways', () => {
50+
expect(charRange('a', 'z')).toEqualRegex(/[a-z]/);
51+
expect(charRange('z', 'a')).toEqualRegex(/[a-z]/);
52+
});
53+
5254
test('`charRange` throws on incorrect arguments', () => {
53-
expect(() => charRange('z', 'a')).toThrowErrorMatchingInlineSnapshot(
54-
`"\`start\` should be before or equal to \`end\`"`,
55-
);
5655
expect(() => charRange('aa', 'z')).toThrowErrorMatchingInlineSnapshot(
57-
`"\`charRange\` should receive only single character \`start\` string"`,
56+
`"Expected single characters, but received "aa" & "z""`,
5857
);
5958
expect(() => charRange('a', 'zz')).toThrowErrorMatchingInlineSnapshot(
60-
`"\`charRange\` should receive only single character \`end\` string"`,
59+
`"Expected single characters, but received "a" & "zz""`,
60+
);
61+
expect(() => charRange('', 'z')).toThrowErrorMatchingInlineSnapshot(
62+
`"Expected single characters, but received "" & "z""`,
63+
);
64+
expect(() => charRange('a', '')).toThrowErrorMatchingInlineSnapshot(
65+
`"Expected single characters, but received "a" & """`,
6166
);
6267
});
6368

@@ -105,9 +110,7 @@ test('`anyOf` pattern edge cases', () => {
105110
});
106111

107112
test('`anyOf` throws on empty text', () => {
108-
expect(() => anyOf('')).toThrowErrorMatchingInlineSnapshot(
109-
`"\`anyOf\` should received at least one character"`,
110-
);
113+
expect(() => anyOf('')).toThrowErrorMatchingInlineSnapshot(`"Expected at least one character"`);
111114
});
112115

113116
test('`negated` character class pattern', () => {
@@ -119,9 +122,3 @@ test('`negated` character class matching', () => {
119122
expect(negated(anyOf('a'))).not.toMatchString('aa');
120123
expect(negated(anyOf('a'))).toMatchGroups('aba', ['b']);
121124
});
122-
123-
test('`encodeCharacterClass` throws on empty text', () => {
124-
expect(() => buildRegExp(negated({ chars: [], ranges: [] }))).toThrowErrorMatchingInlineSnapshot(
125-
`"Character class should contain at least one character or character range"`,
126-
);
127-
});

src/constructs/__tests__/choice-of.test.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,6 @@ test('`choiceOf` pattern using nested regex', () => {
3434

3535
test('`choiceOf` throws on empty options', () => {
3636
expect(() => choiceOf()).toThrowErrorMatchingInlineSnapshot(
37-
`"\`choiceOf\` should receive at least one alternative"`,
37+
`"Expected at least one alternative"`,
3838
);
3939
});

src/constructs/__tests__/encoder.test.tsx

+2-2
Original file line numberDiff line numberDiff line change
@@ -75,14 +75,14 @@ test('`buildRegExp` throws error on unknown element', () => {
7575
// @ts-expect-error intentionally passing incorrect object
7676
buildRegExp({ type: 'unknown' }),
7777
).toThrowErrorMatchingInlineSnapshot(`
78-
"\`encodeElement\`: unknown element: {
78+
"Unsupported element. Received: {
7979
"type": "unknown"
8080
}"
8181
`);
8282
});
8383

8484
test('`buildPattern` throws on empty text', () => {
8585
expect(() => buildPattern('')).toThrowErrorMatchingInlineSnapshot(
86-
`"\`encodeText\`: received text should not be empty"`,
86+
`"Expected at least one character"`,
8787
);
8888
});

src/constructs/__tests__/repeat.test.tsx

+1-3
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,7 @@ test('`repeat` pattern optimizes grouping for atoms', () => {
1616
});
1717

1818
test('`repeat` throws on no children', () => {
19-
expect(() => repeat([], 1)).toThrowErrorMatchingInlineSnapshot(
20-
`"\`repeat\` should receive at least one element"`,
21-
);
19+
expect(() => repeat([], 1)).toThrowErrorMatchingInlineSnapshot(`"Expected at least one element"`);
2220
});
2321

2422
test('greedy `repeat` quantifier pattern', () => {

src/constructs/__tests__/char-escape-unicode.test.tsx src/constructs/__tests__/unicode.test.tsx

+61-50
Original file line numberDiff line numberDiff line change
@@ -5,88 +5,99 @@ import {
55
endOfString,
66
type RegexSequence,
77
startOfString,
8+
unicodeChar,
89
unicodeProperty,
910
} from '../..';
1011

1112
function u(sequence: RegexSequence) {
1213
return buildRegExp(sequence, { unicode: true });
1314
}
1415

15-
test('`char` pattern', () => {
16+
test('`unicodeChar` pattern', () => {
1617
// eslint-disable-next-line no-control-regex
17-
expect(char(0)).toEqualRegex(/\u0000/);
18+
expect(unicodeChar(0)).toEqualRegex(/\u0000/);
1819
// eslint-disable-next-line no-control-regex
19-
expect(char(0x1)).toEqualRegex(/\u0001/);
20+
expect(unicodeChar(0x1)).toEqualRegex(/\u0001/);
2021
// eslint-disable-next-line no-control-regex
21-
expect(char(0x12)).toEqualRegex(/\u0012/);
22-
expect(char(0x123)).toEqualRegex(/\u0123/);
23-
expect(char(0x1234)).toEqualRegex(/\u1234/);
22+
expect(unicodeChar(0x12)).toEqualRegex(/\u0012/);
23+
expect(unicodeChar(0x123)).toEqualRegex(/\u0123/);
24+
expect(unicodeChar(0x1234)).toEqualRegex(/\u1234/);
2425

2526
// eslint-disable-next-line no-control-regex
26-
expect(u(char(0))).toEqualRegex(new RegExp('\\u0000', 'u'));
27+
expect(u(unicodeChar(0))).toEqualRegex(new RegExp('\\u0000', 'u'));
2728
// eslint-disable-next-line no-control-regex
28-
expect(u(char(0x1))).toEqualRegex(new RegExp('\\u0001', 'u'));
29-
expect(u(char(0x12))).toEqualRegex(
29+
expect(u(unicodeChar(0x1))).toEqualRegex(new RegExp('\\u0001', 'u'));
30+
expect(u(unicodeChar(0x12))).toEqualRegex(
3031
// eslint-disable-next-line no-control-regex
3132
new RegExp('\\u0012', 'u'),
3233
);
33-
expect(char(0x0123)).toEqualRegex(/\u0123/);
34-
expect(char(0x1234)).toEqualRegex(/\u1234/);
34+
expect(unicodeChar(0x0123)).toEqualRegex(/\u0123/);
35+
expect(unicodeChar(0x1234)).toEqualRegex(/\u1234/);
3536

36-
expect(u(char(0x0123))).toEqualRegex(/\u0123/u);
37-
expect(u(char(0x1234))).toEqualRegex(/\u1234/u);
38-
expect(u(char(0x12345))).toEqualRegex(new RegExp('\\u{12345}', 'u'));
39-
expect(u(char(0x103456))).toEqualRegex(new RegExp('\\u{103456}', 'u'));
37+
expect(u(unicodeChar(0x0123))).toEqualRegex(/\u0123/u);
38+
expect(u(unicodeChar(0x1234))).toEqualRegex(/\u1234/u);
39+
expect(u(unicodeChar(0x12345))).toEqualRegex(new RegExp('\\u{12345}', 'u'));
40+
expect(u(unicodeChar(0x103456))).toEqualRegex(new RegExp('\\u{103456}', 'u'));
4041
});
4142

42-
test('`char` matching', () => {
43-
expect(char(0)).toMatchString('\u{0}');
44-
expect(char(0x1)).toMatchString('\u{1}');
45-
expect(char(0x12)).toMatchString('\u{12}}');
46-
expect(char(0x123)).toMatchString('\u{123}');
47-
expect(char(0x1234)).toMatchString('\u{1234}}');
48-
49-
expect(char('a'.codePointAt(0)!)).toMatchString('a');
50-
expect(char('ą'.codePointAt(0)!)).toMatchString('ą');
51-
expect(char('©'.codePointAt(0)!)).toMatchString('©');
52-
53-
expect(u(char(0))).toMatchString('\u{0}');
54-
expect(u(char(0))).not.toMatchString('a');
55-
expect(u(char(0x1))).toMatchString('\u{1}');
56-
expect(u(char(0x12))).toMatchString('\u{12}');
57-
expect(u(char(0x123))).toMatchString('\u{123}');
58-
expect(u(char(0x1234))).toMatchString('\u{1234}');
59-
expect(u(char(0x12345))).toMatchString('\u{12345}');
60-
expect(u(char(0x103456))).toMatchString('\u{103456}');
61-
62-
expect(u(char('a'.codePointAt(0)!))).toMatchString('a');
63-
expect(u(char('ą'.codePointAt(0)!))).toMatchString('ą');
64-
expect(u(char('©'.codePointAt(0)!))).toMatchString('©');
65-
expect(u(char('😎'.codePointAt(0)!))).toMatchString('😎');
66-
expect(u(char('😎'.codePointAt(0)!))).toMatchString('\u{1f60e}');
43+
test('`unicodeChar` matching', () => {
44+
expect(unicodeChar(0)).toMatchString('\u{0}');
45+
expect(unicodeChar(0x1)).toMatchString('\u{1}');
46+
expect(unicodeChar(0x12)).toMatchString('\u{12}}');
47+
expect(unicodeChar(0x123)).toMatchString('\u{123}');
48+
expect(unicodeChar(0x1234)).toMatchString('\u{1234}}');
49+
50+
expect(unicodeChar('a'.codePointAt(0)!)).toMatchString('a');
51+
expect(unicodeChar('ą'.codePointAt(0)!)).toMatchString('ą');
52+
expect(unicodeChar('©'.codePointAt(0)!)).toMatchString('©');
53+
54+
expect(u(unicodeChar(0))).toMatchString('\u{0}');
55+
expect(u(unicodeChar(0))).not.toMatchString('a');
56+
expect(u(unicodeChar(0x1))).toMatchString('\u{1}');
57+
expect(u(unicodeChar(0x12))).toMatchString('\u{12}');
58+
expect(u(unicodeChar(0x123))).toMatchString('\u{123}');
59+
expect(u(unicodeChar(0x1234))).toMatchString('\u{1234}');
60+
expect(u(unicodeChar(0x12345))).toMatchString('\u{12345}');
61+
expect(u(unicodeChar(0x103456))).toMatchString('\u{103456}');
62+
63+
expect(u(unicodeChar('a'.codePointAt(0)!))).toMatchString('a');
64+
expect(u(unicodeChar('ą'.codePointAt(0)!))).toMatchString('ą');
65+
expect(u(unicodeChar('©'.codePointAt(0)!))).toMatchString('©');
66+
expect(u(unicodeChar('😎'.codePointAt(0)!))).toMatchString('😎');
67+
expect(u(unicodeChar('😎'.codePointAt(0)!))).toMatchString('\u{1f60e}');
6768
});
6869

69-
test('`char` nesting matching', () => {
70-
expect(u(charClass(char('a'.codePointAt(0)!), char('ą'.codePointAt(0)!)))).toMatchString('a');
71-
expect(u(charClass(char('a'.codePointAt(0)!), char('ą'.codePointAt(0)!)))).toMatchString('ą');
72-
expect(u(charClass(char('a'.codePointAt(0)!), char('ą'.codePointAt(0)!)))).not.toMatchString('b');
70+
test('`unicodeChar` nesting matching', () => {
71+
expect(
72+
u(charClass(unicodeChar('a'.codePointAt(0)!), unicodeChar('ą'.codePointAt(0)!))),
73+
).toMatchString('a');
74+
expect(
75+
u(charClass(unicodeChar('a'.codePointAt(0)!), unicodeChar('ą'.codePointAt(0)!))),
76+
).toMatchString('ą');
77+
expect(
78+
u(charClass(unicodeChar('a'.codePointAt(0)!), unicodeChar('ą'.codePointAt(0)!))),
79+
).not.toMatchString('b');
7380
});
7481

75-
test('`char` edge cases handling', () => {
76-
expect(() => u(char(NaN))).toThrowErrorMatchingInlineSnapshot(
82+
test('`unicodeChar` edge cases handling', () => {
83+
expect(() => u(unicodeChar(NaN))).toThrowErrorMatchingInlineSnapshot(
7784
`"Expected a valid unicode code point but received NaN"`,
7885
);
79-
expect(() => u(char(1.5))).toThrowErrorMatchingInlineSnapshot(
86+
expect(() => u(unicodeChar(1.5))).toThrowErrorMatchingInlineSnapshot(
8087
`"Expected a valid unicode code point but received 1.5"`,
8188
);
82-
expect(() => u(char(-1))).toThrowErrorMatchingInlineSnapshot(
89+
expect(() => u(unicodeChar(-1))).toThrowErrorMatchingInlineSnapshot(
8390
`"Expected a valid unicode code point but received -1"`,
8491
);
85-
expect(() => u(char(0x110000))).toThrowErrorMatchingInlineSnapshot(
92+
expect(() => u(unicodeChar(0x110000))).toThrowErrorMatchingInlineSnapshot(
8693
`"Expected a valid unicode code point but received 1114112"`,
8794
);
8895

89-
expect(u(char(0x10ffff))).toEqualRegex(/\u{10ffff}/u);
96+
expect(u(unicodeChar(0x10ffff))).toEqualRegex(/\u{10ffff}/u);
97+
});
98+
99+
test('"char" alias', () => {
100+
expect(char('a'.codePointAt(0)!)).toEqualRegex(/\u0061/);
90101
});
91102

92103
test('`unicodeProperty` pattern', () => {

0 commit comments

Comments
 (0)