Skip to content

Commit ff359a2

Browse files
committed
feat: named capture groups & backreferences
wip chore: more tests feat: improved refs refactor: merge name and ref refactor: self code review refactor: tweaks refactor: rename reference to ref feat: example with html tags chore: self code review
1 parent 91663a7 commit ff359a2

12 files changed

+348
-27
lines changed

Diff for: jest-setup.ts

+2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import './test-utils/to-equal-regex';
22
import './test-utils/to-match-groups';
33
import './test-utils/to-match-all-groups';
4+
import './test-utils/to-match-named-groups';
5+
import './test-utils/to-match-all-named-groups';
46
import './test-utils/to-match-string';

Diff for: src/__tests__/example-html-tags.ts

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import {
2+
any,
3+
buildRegExp,
4+
capture,
5+
charClass,
6+
charRange,
7+
digit,
8+
oneOrMore,
9+
ref,
10+
zeroOrMore,
11+
} from '..';
12+
13+
test('example: html tag matching', () => {
14+
const tagName = oneOrMore(charClass(charRange('a', 'z'), digit));
15+
16+
const tagRef = ref('tag');
17+
const tagMatcher = buildRegExp(
18+
[
19+
'<',
20+
capture(tagName, { as: tagRef }),
21+
'>',
22+
capture(zeroOrMore(any, { greedy: false }), { as: 'content' }),
23+
'</',
24+
tagRef,
25+
'>',
26+
],
27+
{ ignoreCase: true, global: true },
28+
);
29+
30+
expect(tagMatcher).toMatchAllNamedGroups('<a>abc</a>', [{ tag: 'a', content: 'abc' }]);
31+
expect(tagMatcher).toMatchAllNamedGroups('<a><b>abc</b></a>', [
32+
{ tag: 'a', content: '<b>abc</b>' },
33+
]);
34+
expect(tagMatcher).toMatchAllNamedGroups('<a>abc1</a><b>abc2</b>', [
35+
{ tag: 'a', content: 'abc1' },
36+
{ tag: 'b', content: 'abc2' },
37+
]);
38+
39+
expect(tagMatcher).not.toMatchString('<a>abc</b>');
40+
41+
expect(tagMatcher).toEqualRegex('<(?<tag>[a-z\\d]+)>(?<content>.*?)<\\/\\k<tag>>');
42+
});

Diff for: src/constructs/__tests__/capture.test.tsx

+105-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,15 @@
1-
import { capture, oneOrMore } from '../..';
1+
import {
2+
any,
3+
anyOf,
4+
buildRegExp,
5+
capture,
6+
digit,
7+
inverted,
8+
oneOrMore,
9+
ref,
10+
word,
11+
wordBoundary,
12+
} from '../..';
213

314
test('`capture` pattern', () => {
415
expect(capture('a')).toEqualRegex(/(a)/);
@@ -12,3 +23,96 @@ test('`capture` matching', () => {
1223
expect(['a', capture('b')]).toMatchGroups('ab', ['ab', 'b']);
1324
expect(['a', capture('b'), capture('c')]).toMatchGroups('abc', ['abc', 'b', 'c']);
1425
});
26+
27+
test('named `capture` pattern', () => {
28+
expect(capture('a', { as: 'xyz' })).toEqualRegex('(?<xyz>a)');
29+
expect(capture('abc', { as: 'xyz' })).toEqualRegex('(?<xyz>abc)');
30+
expect(capture(oneOrMore('abc'), { as: 'xyz' })).toEqualRegex('(?<xyz>(?:abc)+)');
31+
expect(oneOrMore(capture('abc', { as: 'xyz' }))).toEqualRegex('(?<xyz>abc)+');
32+
});
33+
34+
test('named `capture` matching', () => {
35+
expect(capture('b', { as: 'x1' })).toMatchGroups('ab', ['b', 'b']);
36+
expect(capture('b', { as: 'x1' })).toMatchNamedGroups('ab', { x1: 'b' });
37+
38+
expect(['a', capture('b', { as: 'x1' })]).toMatchGroups('ab', ['ab', 'b']);
39+
expect(['a', capture('b', { as: 'x1' })]).toMatchNamedGroups('ab', { x1: 'b' });
40+
41+
expect([capture('a'), capture('b', { as: 'x1' }), capture('c', { as: 'x2' })]).toMatchGroups(
42+
'abc',
43+
['abc', 'a', 'b', 'c'],
44+
);
45+
expect([capture('a'), capture('b', { as: 'x1' }), capture('c', { as: 'x2' })]).toMatchNamedGroups(
46+
'abc',
47+
{ x1: 'b', x2: 'c' },
48+
);
49+
});
50+
51+
// Should have `ref0` as name.
52+
const firstRef = ref();
53+
54+
test('`reference` pattern', () => {
55+
expect([firstRef]).toEqualRegex(/\k<ref0>/);
56+
expect([ref('xyz')]).toEqualRegex(/\k<xyz>/);
57+
expect([capture(any, { as: firstRef }), ' ', firstRef]).toEqualRegex('(?<ref0>.) \\k<ref0>');
58+
59+
const otherRef = ref('r123');
60+
expect(['xx', capture(any, { as: otherRef }), ' ', otherRef, 'xx']).toEqualRegex(
61+
'xx(?<r123>.) \\k<r123>xx',
62+
);
63+
});
64+
65+
test('`reference` matching basic case', () => {
66+
const someRef = ref();
67+
expect([capture(word, { as: someRef }), someRef]).toMatchString('aa');
68+
expect([capture(digit, { as: someRef }), someRef]).toMatchString('11');
69+
70+
expect([capture(any, { as: someRef }), someRef]).not.toMatchString('ab');
71+
72+
expect([capture(digit, { as: someRef }), someRef]).not.toMatchString('1a');
73+
expect([capture(digit, { as: someRef }), someRef]).not.toMatchString('a1');
74+
});
75+
76+
test('`reference` matching HTML attributes', () => {
77+
const quoteRef = ref('quote');
78+
const quote = anyOf('"\'');
79+
const htmlAttributeRegex = buildRegExp([
80+
wordBoundary,
81+
capture(oneOrMore(word), { as: 'name' }),
82+
'=',
83+
capture(quote, { as: quoteRef }),
84+
capture(oneOrMore(inverted(quote)), { as: 'value' }),
85+
quoteRef,
86+
]);
87+
88+
expect(htmlAttributeRegex).toMatchNamedGroups('a="b"', {
89+
name: 'a',
90+
quote: '"',
91+
value: 'b',
92+
});
93+
expect(htmlAttributeRegex).toMatchNamedGroups('aa="bbb"', {
94+
name: 'aa',
95+
quote: '"',
96+
value: 'bbb',
97+
});
98+
expect(htmlAttributeRegex).toMatchNamedGroups(`aa='bbb'`, {
99+
name: 'aa',
100+
quote: `'`,
101+
value: 'bbb',
102+
});
103+
expect(htmlAttributeRegex).toMatchNamedGroups('<input type="number" />', {
104+
quote: '"',
105+
name: 'type',
106+
value: 'number',
107+
});
108+
expect(htmlAttributeRegex).toMatchNamedGroups(`<input type='number' />`, {
109+
quote: "'",
110+
name: 'type',
111+
value: 'number',
112+
});
113+
114+
expect(htmlAttributeRegex).not.toMatchString(`aa="bbb'`);
115+
expect(htmlAttributeRegex).not.toMatchString(`aa='bbb"`);
116+
expect(htmlAttributeRegex).not.toMatchString(`<input type='number" />`);
117+
expect(htmlAttributeRegex).not.toMatchString(`<input type="number' />`);
118+
});

Diff for: src/constructs/capture.ts

+53-1
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,71 @@ import type { RegexConstruct, RegexElement, RegexSequence } from '../types';
66
export interface Capture extends RegexConstruct {
77
type: 'capture';
88
children: RegexElement[];
9+
options?: CaptureOptions;
910
}
1011

11-
export function capture(sequence: RegexSequence): Capture {
12+
export interface CaptureOptions {
13+
/**
14+
* Either a name to be given to the capturing group or a `Reference` object ({@link ref})
15+
* that will allow to match the captured text again later. */
16+
as?: Backreference | string;
17+
}
18+
19+
export interface Backreference extends RegexConstruct {
20+
type: 'reference';
21+
name: string;
22+
}
23+
24+
/**
25+
* Creates a capturing group which allows the matched pattern to be available:
26+
* - in the match results (`String.match`, `String.matchAll`, or `RegExp.exec`)
27+
* - in the regex itself, through backreferences (@see ref)
28+
*/
29+
export function capture(sequence: RegexSequence, options?: CaptureOptions): Capture {
1230
return {
1331
type: 'capture',
1432
children: ensureArray(sequence),
33+
options,
1534
encode: encodeCapture,
1635
};
1736
}
1837

38+
let counter = 0;
39+
40+
/**
41+
* Creates a backreference to a capturing group.
42+
*
43+
* Backreferences allows to match the same text that was previously captured by a capturing group.
44+
*
45+
* @param name - Name to be given to the capturing group which receives this `Backreference`. If not provided, a unique name will be generated.
46+
*/
47+
export function ref(name?: string): Backreference {
48+
return {
49+
type: 'reference',
50+
name: name ?? `ref${counter++}`,
51+
encode: encodeReference,
52+
};
53+
}
54+
1955
function encodeCapture(this: Capture): EncodeResult {
56+
const ref = this.options?.as;
57+
if (ref) {
58+
const refName = typeof ref === 'string' ? ref : ref?.name;
59+
return {
60+
precedence: 'atom',
61+
pattern: `(?<${refName}>${encodeSequence(this.children).pattern})`,
62+
};
63+
}
64+
2065
return {
2166
precedence: 'atom',
2267
pattern: `(${encodeSequence(this.children).pattern})`,
2368
};
2469
}
70+
71+
function encodeReference(this: Backreference): EncodeResult {
72+
return {
73+
precedence: 'atom',
74+
pattern: `\\k<${this.name}>`,
75+
};
76+
}

Diff for: src/index.ts

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
1+
// Types
12
export type * from './types';
3+
export type { CaptureOptions } from './constructs/capture';
4+
export type { QuantifierOptions } from './constructs/quantifiers';
5+
export type { RepeatOptions } from './constructs/repeat';
26

7+
// Builders
38
export { buildPattern, buildRegExp } from './builders';
49

10+
// Constructs
511
export { endOfString, notWordBoundary, startOfString, wordBoundary } from './constructs/anchors';
6-
export { capture } from './constructs/capture';
12+
export { capture, ref } from './constructs/capture';
713
export {
814
any,
915
anyOf,

Diff for: test-utils/to-equal-regex.ts

+9-4
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,23 @@ import { wrapRegExp } from './utils';
44
export function toEqualRegex(
55
this: jest.MatcherContext,
66
received: RegExp | RegexSequence,
7-
expected: RegExp,
7+
expected: RegExp | string,
88
) {
99
received = wrapRegExp(received);
1010

1111
const options = {
1212
isNot: this.isNot,
1313
};
1414

15+
const expectedSource = typeof expected === 'string' ? expected : expected.source;
16+
const expectedFlags = typeof expected === 'string' ? undefined : expected.flags;
17+
1518
return {
16-
pass: expected.source === received.source && expected.flags === received.flags,
19+
pass:
20+
expectedSource === received.source &&
21+
(expectedFlags === undefined || expectedFlags === received.flags),
1722
message: () =>
18-
this.utils.matcherHint('toHavePattern', undefined, undefined, options) +
23+
this.utils.matcherHint('toEqualRegex', undefined, undefined, options) +
1924
'\n\n' +
2025
`Expected: ${this.isNot ? 'not ' : ''}${this.utils.printExpected(expected)}\n` +
2126
`Received: ${this.utils.printReceived(received)}`,
@@ -28,7 +33,7 @@ declare global {
2833
namespace jest {
2934
// eslint-disable-next-line @typescript-eslint/no-unused-vars
3035
interface Matchers<R, T = {}> {
31-
toEqualRegex(expected: RegExp): R;
36+
toEqualRegex(expected: RegExp | string): R;
3237
}
3338
}
3439
}

Diff for: test-utils/to-match-all-named-groups.ts

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import type { RegexSequence } from '../src/types';
2+
import { wrapRegExp } from './utils';
3+
4+
export function toMatchAllNamedGroups(
5+
this: jest.MatcherContext,
6+
received: RegExp | RegexSequence,
7+
inputText: string,
8+
expectedGroups: Array<Record<string, string>>,
9+
) {
10+
const receivedRegex = wrapRegExp(received);
11+
const matchResult = inputText.matchAll(receivedRegex);
12+
const receivedGroups = matchResult ? [...matchResult].map((r) => r.groups) : null;
13+
const options = {
14+
isNot: this.isNot,
15+
};
16+
17+
return {
18+
pass: this.equals(receivedGroups, expectedGroups),
19+
message: () =>
20+
this.utils.matcherHint('toMatchGroups', undefined, undefined, options) +
21+
'\n\n' +
22+
`Expected: ${this.isNot ? 'not ' : ''}${this.utils.printExpected(expectedGroups)}\n` +
23+
`Received: ${this.utils.printReceived(receivedGroups)}`,
24+
};
25+
}
26+
27+
expect.extend({ toMatchAllNamedGroups });
28+
29+
declare global {
30+
namespace jest {
31+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
32+
interface Matchers<R, T = {}> {
33+
toMatchAllNamedGroups(inputText: string, expectedGroups: Array<Record<string, string>>): R;
34+
}
35+
}
36+
}

Diff for: test-utils/to-match-groups.ts

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ import { wrapRegExp } from './utils';
44
export function toMatchGroups(
55
this: jest.MatcherContext,
66
received: RegExp | RegexSequence,
7-
expectedString: string,
7+
inputText: string,
88
expectedGroups: string[],
99
) {
1010
const receivedRegex = wrapRegExp(received);
11-
const matchResult = expectedString.match(receivedRegex);
11+
const matchResult = inputText.match(receivedRegex);
1212
const receivedGroups = matchResult ? [...matchResult] : null;
1313
const options = {
1414
isNot: this.isNot,
@@ -30,7 +30,7 @@ declare global {
3030
namespace jest {
3131
// eslint-disable-next-line @typescript-eslint/no-unused-vars
3232
interface Matchers<R, T = {}> {
33-
toMatchGroups(input: string, expected: string[]): R;
33+
toMatchGroups(inputText: string, expectedGroups: string[]): R;
3434
}
3535
}
3636
}

Diff for: test-utils/to-match-named-groups.ts

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import type { RegexSequence } from '../src/types';
2+
import { wrapRegExp } from './utils';
3+
4+
export function toMatchNamedGroups(
5+
this: jest.MatcherContext,
6+
received: RegExp | RegexSequence,
7+
inputText: string,
8+
expectedGroups: Record<string, string>,
9+
) {
10+
const receivedRegex = wrapRegExp(received);
11+
const matchResult = inputText.match(receivedRegex);
12+
const receivedGroups = matchResult ? matchResult.groups : null;
13+
const options = {
14+
isNot: this.isNot,
15+
};
16+
17+
return {
18+
pass: this.equals(receivedGroups, expectedGroups),
19+
message: () =>
20+
this.utils.matcherHint('toMatchGroups', undefined, undefined, options) +
21+
'\n\n' +
22+
`Expected: ${this.isNot ? 'not ' : ''}${this.utils.printExpected(expectedGroups)}\n` +
23+
`Received: ${this.utils.printReceived(receivedGroups)}`,
24+
};
25+
}
26+
27+
expect.extend({ toMatchNamedGroups });
28+
29+
declare global {
30+
namespace jest {
31+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
32+
interface Matchers<R, T = {}> {
33+
toMatchNamedGroups(inputText: string, expectedGroups: Record<string, string>): R;
34+
}
35+
}
36+
}

0 commit comments

Comments
 (0)